├── .gitignore ├── LICENSE ├── README.md ├── ThirdPartyNotices.txt ├── data └── ILSVRC2015 │ └── ImageSets │ ├── DET_train_30classes.txt │ ├── VID_train_15frames.txt │ ├── VID_val_frames.txt │ └── VID_val_videos.txt ├── demo └── ILSVRC2015_val_00007010 │ ├── 000000.JPEG │ ├── 000001.JPEG │ ├── 000002.JPEG │ ├── 000003.JPEG │ ├── 000004.JPEG │ ├── 000005.JPEG │ ├── 000006.JPEG │ ├── 000007.JPEG │ ├── 000008.JPEG │ ├── 000009.JPEG │ ├── 000010.JPEG │ ├── 000011.JPEG │ ├── 000012.JPEG │ ├── 000013.JPEG │ ├── 000014.JPEG │ ├── 000015.JPEG │ ├── 000016.JPEG │ ├── 000017.JPEG │ ├── 000018.JPEG │ ├── 000019.JPEG │ ├── 000020.JPEG │ ├── 000021.JPEG │ ├── 000022.JPEG │ ├── 000023.JPEG │ ├── 000024.JPEG │ ├── 000025.JPEG │ ├── 000026.JPEG │ ├── 000027.JPEG │ ├── 000028.JPEG │ ├── 000029.JPEG │ ├── 000030.JPEG │ ├── 000031.JPEG │ ├── 000032.JPEG │ ├── 000033.JPEG │ ├── 000034.JPEG │ ├── 000035.JPEG │ ├── 000036.JPEG │ ├── 000037.JPEG │ ├── 000038.JPEG │ ├── 000039.JPEG │ ├── 000040.JPEG │ ├── 000041.JPEG │ ├── 000042.JPEG │ ├── 000043.JPEG │ ├── 000044.JPEG │ ├── 000045.JPEG │ ├── 000046.JPEG │ ├── 000047.JPEG │ ├── 000048.JPEG │ ├── 000049.JPEG │ ├── 000050.JPEG │ ├── 000051.JPEG │ ├── 000052.JPEG │ ├── 000053.JPEG │ ├── 000054.JPEG │ ├── 000055.JPEG │ ├── 000056.JPEG │ ├── 000057.JPEG │ ├── 000058.JPEG │ ├── 000059.JPEG │ ├── 000060.JPEG │ ├── 000061.JPEG │ ├── 000062.JPEG │ ├── 000063.JPEG │ ├── 000064.JPEG │ ├── 000065.JPEG │ ├── 000066.JPEG │ ├── 000067.JPEG │ ├── 000068.JPEG │ ├── 000069.JPEG │ ├── 000070.JPEG │ ├── 000071.JPEG │ ├── 000072.JPEG │ ├── 000073.JPEG │ ├── 000074.JPEG │ ├── 000075.JPEG │ ├── 000076.JPEG │ ├── 000077.JPEG │ ├── 000078.JPEG │ ├── 000079.JPEG │ ├── 000080.JPEG │ ├── 000081.JPEG │ ├── 000082.JPEG │ ├── 000083.JPEG │ ├── 000084.JPEG │ ├── 000085.JPEG │ ├── 000086.JPEG │ ├── 000087.JPEG │ ├── 000088.JPEG │ ├── 000089.JPEG │ ├── 000090.JPEG │ ├── 000091.JPEG │ ├── 000092.JPEG │ ├── 000093.JPEG │ ├── 000094.JPEG │ ├── 000095.JPEG │ ├── 000096.JPEG │ ├── 000097.JPEG │ ├── 000098.JPEG │ ├── 000099.JPEG │ ├── 000100.JPEG │ ├── 000101.JPEG │ ├── 000102.JPEG │ ├── 000103.JPEG │ ├── 000104.JPEG │ ├── 000105.JPEG │ ├── 000106.JPEG │ ├── 000107.JPEG │ ├── 000108.JPEG │ ├── 000109.JPEG │ ├── 000110.JPEG │ ├── 000111.JPEG │ ├── 000112.JPEG │ ├── 000113.JPEG │ ├── 000114.JPEG │ ├── 000115.JPEG │ ├── 000116.JPEG │ ├── 000117.JPEG │ ├── 000118.JPEG │ ├── 000119.JPEG │ ├── 000120.JPEG │ ├── 000121.JPEG │ ├── 000122.JPEG │ ├── 000123.JPEG │ ├── 000124.JPEG │ ├── 000125.JPEG │ ├── 000126.JPEG │ ├── 000127.JPEG │ ├── 000128.JPEG │ ├── 000129.JPEG │ ├── 000130.JPEG │ ├── 000131.JPEG │ ├── 000132.JPEG │ ├── 000133.JPEG │ ├── 000134.JPEG │ ├── 000135.JPEG │ ├── 000136.JPEG │ ├── 000137.JPEG │ ├── 000138.JPEG │ ├── 000139.JPEG │ ├── 000140.JPEG │ ├── 000141.JPEG │ ├── 000142.JPEG │ └── 000143.JPEG ├── dff_rfcn ├── __init__.py ├── _init_paths.py ├── config │ ├── __init__.py │ └── config.py ├── core │ ├── DataParallelExecutorGroup.py │ ├── __init__.py │ ├── callback.py │ ├── loader.py │ ├── metric.py │ ├── module.py │ ├── rcnn.py │ └── tester.py ├── demo.py ├── demo_batch.py ├── function │ ├── __init__.py │ ├── test_rcnn.py │ ├── test_rpn.py │ ├── train_rcnn.py │ └── train_rpn.py ├── operator_cxx │ ├── multi_proposal-inl.h │ ├── multi_proposal.cc │ ├── multi_proposal.cu │ ├── psroi_pooling-inl.h │ ├── psroi_pooling.cc │ └── psroi_pooling.cu ├── operator_py │ ├── __init__.py │ ├── box_annotator_ohem.py │ ├── proposal.py │ ├── proposal_target.py │ ├── rpn_inv_normalize.py │ └── tile_as.py ├── symbols │ ├── __init__.py │ └── resnet_v1_101_flownet_rfcn.py ├── test.py └── train_end2end.py ├── experiments ├── dff_rfcn │ ├── cfgs │ │ ├── dff_rfcn_vid_demo.yaml │ │ └── resnet_v1_101_flownet_imagenet_vid_rfcn_end2end_ohem.yaml │ ├── dff_rfcn_end2end_train_test.py │ └── dff_rfcn_test.py └── rfcn │ ├── cfgs │ ├── resnet_v1_101_imagenet_vid_rfcn_end2end_ohem.yaml │ └── rfcn_vid_demo.yaml │ ├── rfcn_end2end_train_test.py │ └── rfcn_test.py ├── init.bat ├── init.sh ├── lib ├── Makefile ├── __init__.py ├── bbox │ ├── .gitignore │ ├── __init__.py │ ├── bbox.pyx │ ├── bbox_regression.py │ ├── bbox_transform.py │ ├── setup_linux.py │ └── setup_windows.py ├── dataset │ ├── __init__.py │ ├── ds_utils.py │ ├── imagenet_vid.py │ ├── imagenet_vid_eval.py │ └── imdb.py ├── nms │ ├── __init__.py │ ├── cpu_nms.pyx │ ├── gpu_nms.cu │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms.py │ ├── nms_kernel.cu │ ├── setup_linux.py │ ├── setup_windows.py │ └── setup_windows_cuda.py ├── rpn │ ├── __init__.py │ ├── generate_anchor.py │ └── rpn.py └── utils │ ├── PrefetchingIter.py │ ├── __init__.py │ ├── combine_model.py │ ├── create_logger.py │ ├── image.py │ ├── image_processing.py │ ├── load_data.py │ ├── load_model.py │ ├── lr_scheduler.py │ ├── roidb.py │ ├── save_model.py │ ├── show_boxes.py │ ├── symbol.py │ └── tictoc.py └── rfcn ├── __init__.py ├── _init_paths.py ├── config ├── __init__.py └── config.py ├── core ├── DataParallelExecutorGroup.py ├── __init__.py ├── callback.py ├── loader.py ├── metric.py ├── module.py ├── rcnn.py └── tester.py ├── demo.py ├── demo_batch.py ├── function ├── __init__.py ├── test_rcnn.py ├── test_rpn.py ├── train_rcnn.py └── train_rpn.py ├── operator_cxx ├── multi_proposal-inl.h ├── multi_proposal.cc ├── multi_proposal.cu ├── psroi_pooling-inl.h ├── psroi_pooling.cc └── psroi_pooling.cu ├── operator_py ├── __init__.py ├── box_annotator_ohem.py ├── proposal.py ├── proposal_target.py └── rpn_inv_normalize.py ├── symbols ├── __init__.py └── resnet_v1_101_rfcn.py ├── test.py └── train_end2end.py /.gitignore: -------------------------------------------------------------------------------- 1 | # IntelliJ project files 2 | .idea 3 | *.iml 4 | out 5 | gen 6 | 7 | ### Vim template 8 | [._]*.s[a-w][a-z] 9 | [._]s[a-w][a-z] 10 | *.un~ 11 | Session.vim 12 | .netrwhist 13 | *~ 14 | 15 | ### IPythonNotebook template 16 | # Temporary data 17 | .ipynb_checkpoints/ 18 | 19 | ### Python template 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | env/ 31 | build/ 32 | develop-eggs/ 33 | dist/ 34 | downloads/ 35 | eggs/ 36 | .eggs/ 37 | #lib/ 38 | #lib64/ 39 | parts/ 40 | sdist/ 41 | var/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *,cover 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | *.ipynb 80 | *.params 81 | *.json 82 | .vscode/ 83 | 84 | lib/dataset/pycocotools/*.c 85 | lib/dataset/pycocotools/*.cpp 86 | lib/nms/*.c 87 | lib/nms/*.cpp 88 | 89 | external 90 | output 91 | model 92 | 93 | .db 94 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | 4 | Copyright (c) 2017 Microsoft Corporation 5 | 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | -------------------------------------------------------------------------------- /ThirdPartyNotices.txt: -------------------------------------------------------------------------------- 1 | Deep Feature Flow 2 | 3 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION 4 | 5 | This project incorporates components from the projects listed below. The original copyright notices and the licenses under which Microsoft received such components are set forth below. Microsoft reserves all rights not expressly granted herein, whether by implication, estoppel or otherwise. 6 | 7 | 1. MXNet (https://github.com/apache/incubator-mxnet) 8 | 2. Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 9 | 3. Faster R-CNN (https://github.com/rbgirshick/py-faster-rcnn) 10 | 4. MS COCO API (https://github.com/cocodataset/cocoapi) 11 | 12 | 13 | MXNet 14 | 15 | Copyright (c) 2015-2016 by Contributors 16 | 17 | Licensed under the Apache License, Version 2.0 (the "License"); 18 | you may not use this file except in compliance with the License. 19 | You may obtain a copy of the License at 20 | 21 | http://www.apache.org/licenses/LICENSE-2.0 22 | 23 | Unless required by applicable law or agreed to in writing, software 24 | distributed under the License is distributed on an "AS IS" BASIS, 25 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 26 | See the License for the specific language governing permissions and 27 | limitations under the License. 28 | 29 | 30 | Fast R-CNN 31 | 32 | Copyright (c) Microsoft Corporation 33 | 34 | All rights reserved. 35 | 36 | MIT License 37 | 38 | Permission is hereby granted, free of charge, to any person obtaining a 39 | copy of this software and associated documentation files (the "Software"), 40 | to deal in the Software without restriction, including without limitation 41 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 42 | and/or sell copies of the Software, and to permit persons to whom the 43 | Software is furnished to do so, subject to the following conditions: 44 | 45 | The above copyright notice and this permission notice shall be included 46 | in all copies or substantial portions of the Software. 47 | 48 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 49 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 50 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 51 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 52 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 53 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 54 | OTHER DEALINGS IN THE SOFTWARE. 55 | 56 | 57 | Faster R-CNN 58 | 59 | The MIT License (MIT) 60 | 61 | Copyright (c) 2015 Microsoft Corporation 62 | 63 | Permission is hereby granted, free of charge, to any person obtaining a copy 64 | of this software and associated documentation files (the "Software"), to deal 65 | in the Software without restriction, including without limitation the rights 66 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 67 | copies of the Software, and to permit persons to whom the Software is 68 | furnished to do so, subject to the following conditions: 69 | 70 | The above copyright notice and this permission notice shall be included in 71 | all copies or substantial portions of the Software. 72 | 73 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 74 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 75 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 76 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 77 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 78 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 79 | THE SOFTWARE. 80 | 81 | 82 | MS COCO API 83 | 84 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 85 | All rights reserved. 86 | 87 | Redistribution and use in source and binary forms, with or without 88 | modification, are permitted provided that the following conditions are met: 89 | 90 | 1. Redistributions of source code must retain the above copyright notice, this 91 | list of conditions and the following disclaimer. 92 | 2. Redistributions in binary form must reproduce the above copyright notice, 93 | this list of conditions and the following disclaimer in the documentation 94 | and/or other materials provided with the distribution. 95 | 96 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 97 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 98 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 99 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 100 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 101 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 102 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 103 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 104 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 105 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 106 | 107 | The views and conclusions contained in the software and documentation are those 108 | of the authors and should not be interpreted as representing official policies, 109 | either expressed or implied, of the FreeBSD Project. 110 | -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000000.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000000.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000001.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000001.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000002.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000002.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000003.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000003.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000004.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000004.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000005.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000005.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000006.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000006.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000007.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000007.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000008.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000008.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000009.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000009.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000010.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000010.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000011.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000011.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000012.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000012.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000013.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000013.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000014.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000014.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000015.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000015.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000016.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000016.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000017.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000017.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000018.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000018.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000019.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000019.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000020.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000020.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000021.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000021.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000022.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000022.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000023.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000023.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000024.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000024.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000025.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000025.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000026.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000026.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000027.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000027.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000028.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000028.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000029.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000029.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000030.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000030.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000031.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000031.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000032.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000032.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000033.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000033.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000034.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000034.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000035.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000035.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000036.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000036.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000037.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000037.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000038.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000038.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000039.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000039.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000040.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000040.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000041.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000041.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000042.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000042.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000043.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000043.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000044.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000044.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000045.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000045.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000046.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000046.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000047.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000047.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000048.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000048.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000049.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000049.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000050.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000050.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000051.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000051.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000052.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000052.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000053.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000053.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000054.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000054.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000055.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000055.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000056.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000056.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000057.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000057.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000058.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000058.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000059.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000059.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000060.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000060.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000061.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000061.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000062.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000062.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000063.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000063.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000064.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000064.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000065.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000065.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000066.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000066.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000067.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000067.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000068.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000068.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000069.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000069.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000070.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000070.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000071.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000071.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000072.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000072.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000073.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000073.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000074.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000074.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000075.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000075.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000076.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000076.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000077.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000077.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000078.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000078.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000079.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000079.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000080.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000080.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000081.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000081.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000082.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000082.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000083.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000083.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000084.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000084.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000085.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000085.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000086.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000086.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000087.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000087.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000088.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000088.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000089.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000089.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000090.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000090.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000091.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000091.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000092.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000092.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000093.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000093.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000094.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000094.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000095.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000095.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000096.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000096.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000097.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000097.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000098.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000098.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000099.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000099.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000100.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000100.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000101.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000101.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000102.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000102.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000103.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000103.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000104.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000104.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000105.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000105.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000106.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000106.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000107.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000107.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000108.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000108.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000109.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000109.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000110.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000110.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000111.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000111.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000112.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000112.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000113.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000113.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000114.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000114.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000115.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000115.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000116.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000116.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000117.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000117.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000118.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000118.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000119.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000119.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000120.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000120.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000121.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000121.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000122.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000122.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000123.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000123.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000124.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000124.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000125.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000125.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000126.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000126.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000127.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000127.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000128.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000128.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000129.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000129.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000130.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000130.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000131.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000131.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000132.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000132.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000133.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000133.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000134.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000134.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000135.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000135.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000136.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000136.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000137.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000137.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000138.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000138.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000139.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000139.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000140.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000140.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000141.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000141.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000142.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000142.JPEG -------------------------------------------------------------------------------- /demo/ILSVRC2015_val_00007010/000143.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/demo/ILSVRC2015_val_00007010/000143.JPEG -------------------------------------------------------------------------------- /dff_rfcn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/dff_rfcn/__init__.py -------------------------------------------------------------------------------- /dff_rfcn/_init_paths.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import os.path as osp 9 | import sys 10 | 11 | def add_path(path): 12 | if path not in sys.path: 13 | sys.path.insert(0, path) 14 | 15 | this_dir = osp.dirname(__file__) 16 | 17 | lib_path = osp.join(this_dir, '..', 'lib') 18 | add_path(lib_path) 19 | -------------------------------------------------------------------------------- /dff_rfcn/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/dff_rfcn/config/__init__.py -------------------------------------------------------------------------------- /dff_rfcn/config/config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Xizhou Zhu, Yuwen Xiong, Bin Xiao 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import yaml 15 | import numpy as np 16 | from easydict import EasyDict as edict 17 | 18 | config = edict() 19 | 20 | config.MXNET_VERSION = '' 21 | config.output_path = '' 22 | config.symbol = '' 23 | config.gpus = '' 24 | config.CLASS_AGNOSTIC = True 25 | config.SCALES = [(600, 1000)] # first is scale (the shorter side); second is max size 26 | 27 | # default training 28 | config.default = edict() 29 | config.default.frequent = 20 30 | config.default.kvstore = 'device' 31 | 32 | # network related params 33 | config.network = edict() 34 | config.network.pretrained = '' 35 | config.network.pretrained_flow = '' 36 | config.network.pretrained_epoch = 0 37 | config.network.PIXEL_MEANS = np.array([0, 0, 0]) 38 | config.network.IMAGE_STRIDE = 0 39 | config.network.RPN_FEAT_STRIDE = 16 40 | config.network.RCNN_FEAT_STRIDE = 16 41 | config.network.FIXED_PARAMS = ['gamma', 'beta'] 42 | config.network.ANCHOR_SCALES = (8, 16, 32) 43 | config.network.ANCHOR_RATIOS = (0.5, 1, 2) 44 | config.network.NORMALIZE_RPN = True 45 | config.network.ANCHOR_MEANS = (0.0, 0.0, 0.0, 0.0) 46 | config.network.ANCHOR_STDS = (0.1, 0.1, 0.4, 0.4) 47 | config.network.NUM_ANCHORS = len(config.network.ANCHOR_SCALES) * len(config.network.ANCHOR_RATIOS) 48 | config.network.DFF_FEAT_DIM = 1024 49 | 50 | # dataset related params 51 | config.dataset = edict() 52 | config.dataset.dataset = 'ImageNetVID' 53 | config.dataset.image_set = 'DET_train_30classes+VID_train_15frames' 54 | config.dataset.test_image_set = 'VID_val_videos' 55 | config.dataset.root_path = './data' 56 | config.dataset.dataset_path = './data/ILSVRC2015' 57 | config.dataset.NUM_CLASSES = 31 58 | 59 | 60 | config.TRAIN = edict() 61 | 62 | config.TRAIN.lr = 0 63 | config.TRAIN.lr_step = '' 64 | config.TRAIN.lr_factor = 0.1 65 | config.TRAIN.warmup = False 66 | config.TRAIN.warmup_lr = 0 67 | config.TRAIN.warmup_step = 0 68 | config.TRAIN.momentum = 0.9 69 | config.TRAIN.wd = 0.0005 70 | config.TRAIN.begin_epoch = 0 71 | config.TRAIN.end_epoch = 0 72 | config.TRAIN.model_prefix = '' 73 | 74 | # whether resume training 75 | config.TRAIN.RESUME = False 76 | # whether flip image 77 | config.TRAIN.FLIP = True 78 | # whether shuffle image 79 | config.TRAIN.SHUFFLE = True 80 | # whether use OHEM 81 | config.TRAIN.ENABLE_OHEM = False 82 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 83 | config.TRAIN.BATCH_IMAGES = 2 84 | # e2e changes behavior of anchor loader and metric 85 | config.TRAIN.END2END = False 86 | # group images with similar aspect ratio 87 | config.TRAIN.ASPECT_GROUPING = True 88 | 89 | # R-CNN 90 | # rcnn rois batch size 91 | config.TRAIN.BATCH_ROIS = 128 92 | config.TRAIN.BATCH_ROIS_OHEM = 128 93 | # rcnn rois sampling params 94 | config.TRAIN.FG_FRACTION = 0.25 95 | config.TRAIN.FG_THRESH = 0.5 96 | config.TRAIN.BG_THRESH_HI = 0.5 97 | config.TRAIN.BG_THRESH_LO = 0.0 98 | # rcnn bounding box regression params 99 | config.TRAIN.BBOX_REGRESSION_THRESH = 0.5 100 | config.TRAIN.BBOX_WEIGHTS = np.array([1.0, 1.0, 1.0, 1.0]) 101 | 102 | # RPN anchor loader 103 | # rpn anchors batch size 104 | config.TRAIN.RPN_BATCH_SIZE = 256 105 | # rpn anchors sampling params 106 | config.TRAIN.RPN_FG_FRACTION = 0.5 107 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 108 | config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 109 | config.TRAIN.RPN_CLOBBER_POSITIVES = False 110 | # rpn bounding box regression params 111 | config.TRAIN.RPN_BBOX_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 112 | config.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 113 | 114 | # used for end2end training 115 | # RPN proposal 116 | config.TRAIN.CXX_PROPOSAL = True 117 | config.TRAIN.RPN_NMS_THRESH = 0.7 118 | config.TRAIN.RPN_PRE_NMS_TOP_N = 12000 119 | config.TRAIN.RPN_POST_NMS_TOP_N = 2000 120 | config.TRAIN.RPN_MIN_SIZE = config.network.RPN_FEAT_STRIDE 121 | # approximate bounding box regression 122 | config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True 123 | config.TRAIN.BBOX_MEANS = (0.0, 0.0, 0.0, 0.0) 124 | config.TRAIN.BBOX_STDS = (0.1, 0.1, 0.2, 0.2) 125 | 126 | # DFF, trained image sampled from [min_offset, max_offset] 127 | config.TRAIN.MIN_OFFSET = -9 128 | config.TRAIN.MAX_OFFSET = 0 129 | 130 | config.TEST = edict() 131 | 132 | # R-CNN testing 133 | # use rpn to generate proposal 134 | config.TEST.HAS_RPN = False 135 | # size of images for each device 136 | config.TEST.BATCH_IMAGES = 1 137 | 138 | # RPN proposal 139 | config.TEST.CXX_PROPOSAL = True 140 | config.TEST.RPN_NMS_THRESH = 0.7 141 | config.TEST.RPN_PRE_NMS_TOP_N = 6000 142 | config.TEST.RPN_POST_NMS_TOP_N = 300 143 | config.TEST.RPN_MIN_SIZE = config.network.RPN_FEAT_STRIDE 144 | 145 | # RCNN nms 146 | config.TEST.NMS = 0.3 147 | 148 | # DFF 149 | config.TEST.KEY_FRAME_INTERVAL = 10 150 | 151 | config.TEST.max_per_image = 300 152 | 153 | # Test Model Epoch 154 | config.TEST.test_epoch = 0 155 | 156 | 157 | def update_config(config_file): 158 | exp_config = None 159 | with open(config_file) as f: 160 | exp_config = edict(yaml.load(f)) 161 | for k, v in exp_config.items(): 162 | if k in config: 163 | if isinstance(v, dict): 164 | if k == 'TRAIN': 165 | if 'BBOX_WEIGHTS' in v: 166 | v['BBOX_WEIGHTS'] = np.array(v['BBOX_WEIGHTS']) 167 | elif k == 'network': 168 | if 'PIXEL_MEANS' in v: 169 | v['PIXEL_MEANS'] = np.array(v['PIXEL_MEANS']) 170 | for vk, vv in v.items(): 171 | config[k][vk] = vv 172 | else: 173 | if k == 'SCALES': 174 | config[k][0] = (tuple(v)) 175 | else: 176 | config[k] = v 177 | else: 178 | raise ValueError("key must exist in config.py") 179 | -------------------------------------------------------------------------------- /dff_rfcn/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/dff_rfcn/core/__init__.py -------------------------------------------------------------------------------- /dff_rfcn/core/callback.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import time 15 | import logging 16 | import mxnet as mx 17 | 18 | 19 | class Speedometer(object): 20 | def __init__(self, batch_size, frequent=50): 21 | self.batch_size = batch_size 22 | self.frequent = frequent 23 | self.init = False 24 | self.tic = 0 25 | self.last_count = 0 26 | 27 | def __call__(self, param): 28 | """Callback to Show speed.""" 29 | count = param.nbatch 30 | if self.last_count > count: 31 | self.init = False 32 | self.last_count = count 33 | 34 | if self.init: 35 | if count % self.frequent == 0: 36 | speed = self.frequent * self.batch_size / (time.time() - self.tic) 37 | s = '' 38 | if param.eval_metric is not None: 39 | name, value = param.eval_metric.get() 40 | s = "Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-" % (param.epoch, count, speed) 41 | for n, v in zip(name, value): 42 | s += "%s=%f,\t" % (n, v) 43 | else: 44 | s = "Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec" % (param.epoch, count, speed) 45 | 46 | logging.info(s) 47 | print(s) 48 | self.tic = time.time() 49 | else: 50 | self.init = True 51 | self.tic = time.time() 52 | 53 | 54 | def do_checkpoint(prefix, means, stds): 55 | def _callback(iter_no, sym, arg, aux): 56 | weight = arg['rfcn_bbox_weight'] 57 | bias = arg['rfcn_bbox_bias'] 58 | repeat = bias.shape[0] / means.shape[0] 59 | 60 | arg['rfcn_bbox_weight_test'] = weight * mx.nd.repeat(mx.nd.array(stds), repeats=repeat).reshape((bias.shape[0], 1, 1, 1)) 61 | arg['rfcn_bbox_bias_test'] = arg['rfcn_bbox_bias'] * mx.nd.repeat(mx.nd.array(stds), repeats=repeat) + mx.nd.repeat(mx.nd.array(means), repeats=repeat) 62 | mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux) 63 | arg.pop('rfcn_bbox_weight_test') 64 | arg.pop('rfcn_bbox_bias_test') 65 | return _callback -------------------------------------------------------------------------------- /dff_rfcn/function/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/dff_rfcn/function/__init__.py -------------------------------------------------------------------------------- /dff_rfcn/function/test_rcnn.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import argparse 15 | import pprint 16 | import logging 17 | import time 18 | import os 19 | import numpy as np 20 | import mxnet as mx 21 | 22 | from symbols import * 23 | from dataset import * 24 | from core.loader import TestLoader 25 | from core.tester import Predictor, pred_eval, pred_eval_multiprocess 26 | from utils.load_model import load_param 27 | 28 | def get_predictor(sym, sym_instance, cfg, arg_params, aux_params, test_data, ctx): 29 | # infer shape 30 | data_shape_dict = dict(test_data.provide_data_single) 31 | sym_instance.infer_shape(data_shape_dict) 32 | sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) 33 | 34 | # decide maximum shape 35 | data_names = [k[0] for k in test_data.provide_data_single] 36 | label_names = None 37 | max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), 38 | ('data_key', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),]] 39 | 40 | # create predictor 41 | predictor = Predictor(sym, data_names, label_names, 42 | context=ctx, max_data_shapes=max_data_shape, 43 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 44 | arg_params=arg_params, aux_params=aux_params) 45 | return predictor 46 | 47 | def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, 48 | ctx, prefix, epoch, 49 | vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): 50 | if not logger: 51 | assert False, 'require a logger' 52 | 53 | # print cfg 54 | pprint.pprint(cfg) 55 | logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) 56 | 57 | # load symbol and testing data 58 | key_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() 59 | cur_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() 60 | key_sym = key_sym_instance.get_key_test_symbol(cfg) 61 | cur_sym = cur_sym_instance.get_cur_test_symbol(cfg) 62 | imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) 63 | roidb = imdb.gt_roidb() 64 | 65 | # get test data iter 66 | # split roidbs 67 | gpu_num = len(ctx) 68 | roidbs = [[] for x in range(gpu_num)] 69 | roidbs_seg_lens = np.zeros(gpu_num, dtype=np.int) 70 | for x in roidb: 71 | gpu_id = np.argmin(roidbs_seg_lens) 72 | roidbs[gpu_id].append(x) 73 | roidbs_seg_lens[gpu_id] += x['frame_seg_len'] 74 | 75 | # get test data iter 76 | test_datas = [TestLoader(x, cfg, batch_size=1, shuffle=shuffle, has_rpn=has_rpn) for x in roidbs] 77 | 78 | # load model 79 | arg_params, aux_params = load_param(prefix, epoch, process=True) 80 | 81 | # create predictor 82 | key_predictors = [get_predictor(key_sym, key_sym_instance, cfg, arg_params, aux_params, test_datas[i], [ctx[i]]) for i in range(gpu_num)] 83 | cur_predictors = [get_predictor(cur_sym, cur_sym_instance, cfg, arg_params, aux_params, test_datas[i], [ctx[i]]) for i in range(gpu_num)] 84 | 85 | # start detection 86 | #pred_eval(0, key_predictors[0], cur_predictors[0], test_datas[0], imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger) 87 | pred_eval_multiprocess(gpu_num, key_predictors, cur_predictors, test_datas, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger) 88 | -------------------------------------------------------------------------------- /dff_rfcn/function/test_rpn.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import argparse 15 | import pprint 16 | import logging 17 | import mxnet as mx 18 | 19 | from symbols import * 20 | from dataset import * 21 | from core.loader import TestLoader 22 | from core.tester import Predictor, generate_proposals 23 | from utils.load_model import load_param 24 | 25 | 26 | def test_rpn(cfg, dataset, image_set, root_path, dataset_path, 27 | ctx, prefix, epoch, 28 | vis, shuffle, thresh, logger=None, output_path=None): 29 | # set up logger 30 | if not logger: 31 | logging.basicConfig() 32 | logger = logging.getLogger() 33 | logger.setLevel(logging.INFO) 34 | 35 | # rpn generate proposal cfg 36 | cfg.TEST.HAS_RPN = True 37 | 38 | # print cfg 39 | pprint.pprint(cfg) 40 | logger.info('testing rpn cfg:{}\n'.format(pprint.pformat(cfg))) 41 | 42 | # load symbol 43 | sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() 44 | sym = sym_instance.get_symbol_rpn(cfg, is_train=False) 45 | 46 | # load dataset and prepare imdb for training 47 | imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) 48 | roidb = imdb.gt_roidb() 49 | test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=True) 50 | 51 | # load model 52 | arg_params, aux_params = load_param(prefix, epoch) 53 | 54 | # infer shape 55 | data_shape_dict = dict(test_data.provide_data_single) 56 | sym_instance.infer_shape(data_shape_dict) 57 | 58 | # check parameters 59 | sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) 60 | 61 | # decide maximum shape 62 | data_names = [k[0] for k in test_data.provide_data[0]] 63 | label_names = None if test_data.provide_label[0] is None else [k[0] for k in test_data.provide_label[0]] 64 | max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] 65 | 66 | # create predictor 67 | predictor = Predictor(sym, data_names, label_names, 68 | context=ctx, max_data_shapes=max_data_shape, 69 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 70 | arg_params=arg_params, aux_params=aux_params) 71 | 72 | # start testing 73 | imdb_boxes = generate_proposals(predictor, test_data, imdb, cfg, vis=vis, thresh=thresh) 74 | 75 | all_log_info = imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes) 76 | logger.info(all_log_info) 77 | -------------------------------------------------------------------------------- /dff_rfcn/operator_cxx/multi_proposal.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2017 Microsoft 3 | * Licensed under The MIT License [see LICENSE for details] 4 | * \file multi_proposal.cc 5 | * \brief 6 | * \author Xizhou Zhu 7 | */ 8 | 9 | #include "./multi_proposal-inl.h" 10 | 11 | 12 | namespace mxnet { 13 | namespace op { 14 | 15 | template 16 | class MultiProposalOp : public Operator{ 17 | public: 18 | explicit MultiProposalOp(MultiProposalParam param) { 19 | this->param_ = param; 20 | } 21 | 22 | virtual void Forward(const OpContext &ctx, 23 | const std::vector &in_data, 24 | const std::vector &req, 25 | const std::vector &out_data, 26 | const std::vector &aux_states) { 27 | LOG(FATAL) << "not implemented"; 28 | } 29 | 30 | virtual void Backward(const OpContext &ctx, 31 | const std::vector &out_grad, 32 | const std::vector &in_data, 33 | const std::vector &out_data, 34 | const std::vector &req, 35 | const std::vector &in_grad, 36 | const std::vector &aux_states) { 37 | LOG(FATAL) << "not implemented"; 38 | } 39 | 40 | private: 41 | MultiProposalParam param_; 42 | }; // class MultiProposalOp 43 | 44 | template<> 45 | Operator *CreateOp(MultiProposalParam param) { 46 | return new MultiProposalOp(param); 47 | } 48 | 49 | Operator* MultiProposalProp::CreateOperator(Context ctx) const { 50 | DO_BIND_DISPATCH(CreateOp, param_); 51 | } 52 | 53 | DMLC_REGISTER_PARAMETER(MultiProposalParam); 54 | 55 | MXNET_REGISTER_OP_PROPERTY(_contrib_MultiProposal, MultiProposalProp) 56 | .describe("Generate region proposals via RPN") 57 | .add_argument("cls_score", "NDArray-or-Symbol", "Score of how likely proposal is object.") 58 | .add_argument("bbox_pred", "NDArray-or-Symbol", "BBox Predicted deltas from anchors for proposals") 59 | .add_argument("im_info", "NDArray-or-Symbol", "Image size and scale.") 60 | .add_arguments(MultiProposalParam::__FIELDS__()); 61 | 62 | } // namespace op 63 | } // namespace mxnet 64 | -------------------------------------------------------------------------------- /dff_rfcn/operator_cxx/psroi_pooling.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2017 by Contributors 3 | * Copyright (c) 2017 Microsoft 4 | * Licensed under The MIT License [see LICENSE for details] 5 | * \file psroi_pooling.cc 6 | * \brief psroi pooling operator 7 | * \author Yi Li, Tairui Chen, Guodong Zhang, Jifeng Dai 8 | */ 9 | #include "./psroi_pooling-inl.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using std::max; 17 | using std::min; 18 | using std::floor; 19 | using std::ceil; 20 | 21 | namespace mshadow { 22 | template 23 | inline void PSROIPoolForward(const Tensor &out, 24 | const Tensor &data, 25 | const Tensor &bbox, 26 | const Tensor &mapping_channel, 27 | const float spatial_scale_, 28 | const int output_dim_, 29 | const int group_size_) { 30 | // NOT_IMPLEMENTED; 31 | return; 32 | } 33 | 34 | template 35 | inline void PSROIPoolBackwardAcc(const Tensor &in_grad, 36 | const Tensor &out_grad, 37 | const Tensor &bbox, 38 | const Tensor &mapping_channel, 39 | const float spatial_scale_, 40 | const int output_dim_) { 41 | // NOT_IMPLEMENTED; 42 | return; 43 | } 44 | } // namespace mshadow 45 | 46 | namespace mxnet { 47 | namespace op { 48 | 49 | template<> 50 | Operator *CreateOp(PSROIPoolingParam param, int dtype) { 51 | Operator* op = NULL; 52 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 53 | op = new PSROIPoolingOp(param); 54 | }); 55 | return op; 56 | } 57 | 58 | Operator *PSROIPoolingProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 59 | std::vector *in_type) const { 60 | std::vector out_shape, aux_shape; 61 | std::vector out_type, aux_type; 62 | CHECK(InferType(in_type, &out_type, &aux_type)); 63 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 64 | DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); 65 | } 66 | 67 | DMLC_REGISTER_PARAMETER(PSROIPoolingParam); 68 | 69 | MXNET_REGISTER_OP_PROPERTY(_contrib_PSROIPooling, PSROIPoolingProp) 70 | .describe("Performs region-of-interest pooling on inputs. Resize bounding box coordinates by " 71 | "spatial_scale and crop input feature maps accordingly. The cropped feature maps are pooled " 72 | "by max pooling to a fixed size output indicated by pooled_size. batch_size will change to " 73 | "the number of region bounding boxes after PSROIPooling") 74 | .add_argument("data", "Symbol", "Input data to the pooling operator, a 4D Feature maps") 75 | .add_argument("rois", "Symbol", "Bounding box coordinates, a 2D array of " 76 | "[[batch_index, x1, y1, x2, y2]]. (x1, y1) and (x2, y2) are top left and down right corners " 77 | "of designated region of interest. batch_index indicates the index of corresponding image " 78 | "in the input data") 79 | .add_arguments(PSROIPoolingParam::__FIELDS__()); 80 | } // namespace op 81 | } // namespace mxnet -------------------------------------------------------------------------------- /dff_rfcn/operator_py/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/dff_rfcn/operator_py/__init__.py -------------------------------------------------------------------------------- /dff_rfcn/operator_py/box_annotator_ohem.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | """ 9 | Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them. 10 | """ 11 | 12 | import mxnet as mx 13 | import numpy as np 14 | from distutils.util import strtobool 15 | 16 | 17 | 18 | 19 | class BoxAnnotatorOHEMOperator(mx.operator.CustomOp): 20 | def __init__(self, num_classes, num_reg_classes, roi_per_img): 21 | super(BoxAnnotatorOHEMOperator, self).__init__() 22 | self._num_classes = num_classes 23 | self._num_reg_classes = num_reg_classes 24 | self._roi_per_img = roi_per_img 25 | 26 | def forward(self, is_train, req, in_data, out_data, aux): 27 | 28 | cls_score = in_data[0] 29 | bbox_pred = in_data[1] 30 | labels = in_data[2].asnumpy() 31 | bbox_targets = in_data[3] 32 | bbox_weights = in_data[4] 33 | 34 | per_roi_loss_cls = mx.nd.SoftmaxActivation(cls_score) + 1e-14 35 | per_roi_loss_cls = per_roi_loss_cls.asnumpy() 36 | per_roi_loss_cls = per_roi_loss_cls[np.arange(per_roi_loss_cls.shape[0], dtype='int'), labels.astype('int')] 37 | per_roi_loss_cls = -1 * np.log(per_roi_loss_cls) 38 | per_roi_loss_cls = np.reshape(per_roi_loss_cls, newshape=(-1,)) 39 | 40 | per_roi_loss_bbox = bbox_weights * mx.nd.smooth_l1((bbox_pred - bbox_targets), scalar=1.0) 41 | per_roi_loss_bbox = mx.nd.sum(per_roi_loss_bbox, axis=1).asnumpy() 42 | 43 | top_k_per_roi_loss = np.argsort(per_roi_loss_cls + per_roi_loss_bbox) 44 | labels_ohem = labels 45 | labels_ohem[top_k_per_roi_loss[::-1][self._roi_per_img:]] = -1 46 | bbox_weights_ohem = bbox_weights.asnumpy() 47 | bbox_weights_ohem[top_k_per_roi_loss[::-1][self._roi_per_img:]] = 0 48 | 49 | labels_ohem = mx.nd.array(labels_ohem) 50 | bbox_weights_ohem = mx.nd.array(bbox_weights_ohem) 51 | 52 | for ind, val in enumerate([labels_ohem, bbox_weights_ohem]): 53 | self.assign(out_data[ind], req[ind], val) 54 | 55 | 56 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 57 | for i in range(len(in_grad)): 58 | self.assign(in_grad[i], req[i], 0) 59 | 60 | 61 | @mx.operator.register('BoxAnnotatorOHEM') 62 | class BoxAnnotatorOHEMProp(mx.operator.CustomOpProp): 63 | def __init__(self, num_classes, num_reg_classes, roi_per_img): 64 | super(BoxAnnotatorOHEMProp, self).__init__(need_top_grad=False) 65 | self._num_classes = int(num_classes) 66 | self._num_reg_classes = int(num_reg_classes) 67 | self._roi_per_img = int(roi_per_img) 68 | 69 | def list_arguments(self): 70 | return ['cls_score', 'bbox_pred', 'labels', 'bbox_targets', 'bbox_weights'] 71 | 72 | def list_outputs(self): 73 | return ['labels_ohem', 'bbox_weights_ohem'] 74 | 75 | def infer_shape(self, in_shape): 76 | labels_shape = in_shape[2] 77 | bbox_weights_shape = in_shape[4] 78 | 79 | return in_shape, \ 80 | [labels_shape, bbox_weights_shape] 81 | 82 | def create_operator(self, ctx, shapes, dtypes): 83 | return BoxAnnotatorOHEMOperator(self._num_classes, self._num_reg_classes, self._roi_per_img) 84 | 85 | def declare_backward_dependency(self, out_grad, in_data, out_data): 86 | return [] 87 | -------------------------------------------------------------------------------- /dff_rfcn/operator_py/proposal_target.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | """ 15 | Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them. 16 | """ 17 | 18 | import mxnet as mx 19 | import numpy as np 20 | from distutils.util import strtobool 21 | from easydict import EasyDict as edict 22 | import cPickle 23 | 24 | 25 | from core.rcnn import sample_rois 26 | 27 | DEBUG = False 28 | 29 | 30 | class ProposalTargetOperator(mx.operator.CustomOp): 31 | def __init__(self, num_classes, batch_images, batch_rois, cfg, fg_fraction): 32 | super(ProposalTargetOperator, self).__init__() 33 | self._num_classes = num_classes 34 | self._batch_images = batch_images 35 | self._batch_rois = batch_rois 36 | self._cfg = cfg 37 | self._fg_fraction = fg_fraction 38 | 39 | if DEBUG: 40 | self._count = 0 41 | self._fg_num = 0 42 | self._bg_num = 0 43 | 44 | def forward(self, is_train, req, in_data, out_data, aux): 45 | assert self._batch_rois == -1 or self._batch_rois % self._batch_images == 0, \ 46 | 'batchimages {} must devide batch_rois {}'.format(self._batch_images, self._batch_rois) 47 | all_rois = in_data[0].asnumpy() 48 | gt_boxes = in_data[1].asnumpy() 49 | 50 | if self._batch_rois == -1: 51 | rois_per_image = all_rois.shape[0] + gt_boxes.shape[0] 52 | fg_rois_per_image = rois_per_image 53 | else: 54 | rois_per_image = self._batch_rois / self._batch_images 55 | fg_rois_per_image = np.round(self._fg_fraction * rois_per_image).astype(int) 56 | 57 | 58 | # Include ground-truth boxes in the set of candidate rois 59 | zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) 60 | all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1])))) 61 | # Sanity check: single batch only 62 | assert np.all(all_rois[:, 0] == 0), 'Only single item batches are supported' 63 | 64 | rois, labels, bbox_targets, bbox_weights = \ 65 | sample_rois(all_rois, fg_rois_per_image, rois_per_image, self._num_classes, self._cfg, gt_boxes=gt_boxes) 66 | 67 | if DEBUG: 68 | print "labels=", labels 69 | print 'num fg: {}'.format((labels > 0).sum()) 70 | print 'num bg: {}'.format((labels == 0).sum()) 71 | self._count += 1 72 | self._fg_num += (labels > 0).sum() 73 | self._bg_num += (labels == 0).sum() 74 | print "self._count=", self._count 75 | print 'num fg avg: {}'.format(self._fg_num / self._count) 76 | print 'num bg avg: {}'.format(self._bg_num / self._count) 77 | print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num)) 78 | 79 | for ind, val in enumerate([rois, labels, bbox_targets, bbox_weights]): 80 | self.assign(out_data[ind], req[ind], val) 81 | 82 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 83 | self.assign(in_grad[0], req[0], 0) 84 | self.assign(in_grad[1], req[1], 0) 85 | 86 | 87 | @mx.operator.register('proposal_target') 88 | class ProposalTargetProp(mx.operator.CustomOpProp): 89 | def __init__(self, num_classes, batch_images, batch_rois, cfg, fg_fraction='0.25'): 90 | super(ProposalTargetProp, self).__init__(need_top_grad=False) 91 | self._num_classes = int(num_classes) 92 | self._batch_images = int(batch_images) 93 | self._batch_rois = int(batch_rois) 94 | self._cfg = cPickle.loads(cfg) 95 | self._fg_fraction = float(fg_fraction) 96 | 97 | def list_arguments(self): 98 | return ['rois', 'gt_boxes'] 99 | 100 | def list_outputs(self): 101 | return ['rois_output', 'label', 'bbox_target', 'bbox_weight'] 102 | 103 | def infer_shape(self, in_shape): 104 | rpn_rois_shape = in_shape[0] 105 | gt_boxes_shape = in_shape[1] 106 | 107 | rois = rpn_rois_shape[0] + gt_boxes_shape[0] if self._batch_rois == -1 else self._batch_rois 108 | 109 | output_rois_shape = (rois, 5) 110 | label_shape = (rois, ) 111 | bbox_target_shape = (rois, self._num_classes * 4) 112 | bbox_weight_shape = (rois, self._num_classes * 4) 113 | 114 | return [rpn_rois_shape, gt_boxes_shape], \ 115 | [output_rois_shape, label_shape, bbox_target_shape, bbox_weight_shape] 116 | 117 | def create_operator(self, ctx, shapes, dtypes): 118 | return ProposalTargetOperator(self._num_classes, self._batch_images, self._batch_rois, self._cfg, self._fg_fraction) 119 | 120 | def declare_backward_dependency(self, out_grad, in_data, out_data): 121 | return [] 122 | -------------------------------------------------------------------------------- /dff_rfcn/operator_py/rpn_inv_normalize.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Xizhou Zhu 6 | # -------------------------------------------------------- 7 | 8 | import mxnet as mx 9 | import numpy as np 10 | from distutils.util import strtobool 11 | 12 | class RPNInvNormalizeOperator(mx.operator.CustomOp): 13 | def __init__(self, num_anchors, bbox_mean, bbox_std): 14 | super(RPNInvNormalizeOperator, self).__init__() 15 | self._num_anchors = num_anchors 16 | self._bbox_mean = mx.ndarray.Reshape(mx.nd.array(bbox_mean), shape=(1,4,1,1)) 17 | self._bbox_std = mx.ndarray.Reshape(mx.nd.array(bbox_std), shape=(1,4,1,1)) 18 | 19 | def forward(self, is_train, req, in_data, out_data, aux): 20 | bbox_pred = in_data[0] 21 | tile_shape = (bbox_pred.shape[0], self._num_anchors, bbox_pred.shape[2], bbox_pred.shape[3]) 22 | bbox_mean = mx.ndarray.tile(self._bbox_mean.as_in_context(bbox_pred.context), reps=tile_shape) 23 | bbox_std = mx.ndarray.tile(self._bbox_std.as_in_context(bbox_pred.context), reps=tile_shape) 24 | bbox_pred = bbox_pred * bbox_std + bbox_mean 25 | 26 | self.assign(out_data[0], req[0], bbox_pred) 27 | 28 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 29 | self.assign(in_grad[0], req[0], 0) 30 | 31 | @mx.operator.register('rpn_inv_normalize') 32 | class RPNInvNormalizeProp(mx.operator.CustomOpProp): 33 | def __init__(self, num_anchors, bbox_mean='(0.0, 0.0, 0.0, 0.0)', bbox_std='0.1, 0.1, 0.2, 0.2'): 34 | super(RPNInvNormalizeProp, self).__init__(need_top_grad=False) 35 | self._num_anchors = int(num_anchors) 36 | self._bbox_mean = np.fromstring(bbox_mean[1:-1], dtype=float, sep=',') 37 | self._bbox_std = np.fromstring(bbox_std[1:-1], dtype=float, sep=',') 38 | 39 | def list_arguments(self): 40 | return ['bbox_pred'] 41 | 42 | def list_outputs(self): 43 | return ['out_bbox_pred'] 44 | 45 | def infer_shape(self, in_shape): 46 | 47 | return [in_shape[0]], \ 48 | [in_shape[0]] 49 | 50 | def create_operator(self, ctx, shapes, dtypes): 51 | return RPNInvNormalizeOperator(self._num_anchors, self._bbox_mean, self._bbox_std) 52 | 53 | def declare_backward_dependency(self, out_grad, in_data, out_data): 54 | return [] 55 | -------------------------------------------------------------------------------- /dff_rfcn/operator_py/tile_as.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Xizhou Zhu 6 | # -------------------------------------------------------- 7 | 8 | import mxnet as mx 9 | import numpy as np 10 | from distutils.util import strtobool 11 | 12 | class TileAsOperator(mx.operator.CustomOp): 13 | def __init__(self): 14 | super(TileAsOperator, self).__init__() 15 | 16 | def forward(self, is_train, req, in_data, out_data, aux): 17 | data_content = in_data[0] 18 | data_tiled = mx.ndarray.tile(data_content, reps=(in_data[1].shape[0], 1, 1, 1)) 19 | self.assign(out_data[0], req[0], data_tiled) 20 | 21 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 22 | self.assign(in_grad[0], req[0], 0) 23 | self.assign(in_grad[1], req[1], 0) 24 | 25 | 26 | @mx.operator.register('tile_as') 27 | class TileAsProp(mx.operator.CustomOpProp): 28 | def __init__(self): 29 | super(TileAsProp, self).__init__(need_top_grad=False) 30 | 31 | def list_arguments(self): 32 | return ['data_content', 'data_shape'] 33 | 34 | def list_outputs(self): 35 | return ['data_tiled'] 36 | 37 | def infer_shape(self, in_shape): 38 | data_content_shape = in_shape[0] 39 | data_shape_shape = in_shape[1] 40 | 41 | tiled_data_shape = (data_shape_shape[0], data_content_shape[1], data_content_shape[2], data_content_shape[3]) 42 | 43 | return [data_content_shape, data_shape_shape], \ 44 | [tiled_data_shape] 45 | 46 | def create_operator(self, ctx, shapes, dtypes): 47 | return TileAsOperator() 48 | 49 | def declare_backward_dependency(self, out_grad, in_data, out_data): 50 | return out_grad 51 | -------------------------------------------------------------------------------- /dff_rfcn/symbols/__init__.py: -------------------------------------------------------------------------------- 1 | import resnet_v1_101_flownet_rfcn -------------------------------------------------------------------------------- /dff_rfcn/test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import _init_paths 15 | 16 | import cv2 17 | import argparse 18 | import os 19 | import sys 20 | import time 21 | import logging 22 | from config.config import config, update_config 23 | 24 | def parse_args(): 25 | parser = argparse.ArgumentParser(description='Test a R-FCN network') 26 | # general 27 | parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str) 28 | 29 | args, rest = parser.parse_known_args() 30 | update_config(args.cfg) 31 | 32 | # rcnn 33 | parser.add_argument('--vis', help='turn on visualization', action='store_true') 34 | parser.add_argument('--ignore_cache', help='ignore cached results boxes', action='store_true') 35 | parser.add_argument('--thresh', help='valid detection threshold', default=1e-4, type=float) 36 | parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true') 37 | args = parser.parse_args() 38 | return args 39 | 40 | args = parse_args() 41 | curr_path = os.path.abspath(os.path.dirname(__file__)) 42 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION)) 43 | 44 | import mxnet as mx 45 | from function.test_rcnn import test_rcnn 46 | from utils.create_logger import create_logger 47 | 48 | 49 | def main(): 50 | ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] 51 | print args 52 | 53 | logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set) 54 | 55 | test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path, 56 | ctx, os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix), config.TEST.test_epoch, 57 | args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path) 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /experiments/dff_rfcn/cfgs/dff_rfcn_vid_demo.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/dff_rfcn/imagenet_vid" 4 | gpus: '0' 5 | CLASS_AGNOSTIC: true 6 | SCALES: 7 | - 600 8 | - 1000 9 | default: 10 | frequent: 100 11 | kvstore: device 12 | network: 13 | PIXEL_MEANS: 14 | - 103.06 15 | - 115.90 16 | - 123.15 17 | IMAGE_STRIDE: 0 18 | RCNN_FEAT_STRIDE: 16 19 | RPN_FEAT_STRIDE: 16 20 | FIXED_PARAMS: 21 | - conv1 22 | - bn_conv1 23 | - res2 24 | - bn2 25 | - gamma 26 | - beta 27 | ANCHOR_RATIOS: 28 | - 0.5 29 | - 1 30 | - 2 31 | ANCHOR_SCALES: 32 | - 8 33 | - 16 34 | - 32 35 | ANCHOR_MEANS: 36 | - 0.0 37 | - 0.0 38 | - 0.0 39 | - 0.0 40 | ANCHOR_STDS: 41 | - 0.1 42 | - 0.1 43 | - 0.4 44 | - 0.4 45 | NORMALIZE_RPN: TRUE 46 | NUM_ANCHORS: 9 47 | dataset: 48 | NUM_CLASSES: 31 49 | dataset: ImageNetVID 50 | dataset_path: "./data/ILSVRC2015" 51 | image_set: DET_train_30classes+VID_train_15frames 52 | root_path: "./data" 53 | test_image_set: VID_val_videos 54 | proposal: rpn 55 | TRAIN: 56 | lr: 0.00025 57 | lr_step: '1.333' 58 | warmup: false 59 | begin_epoch: 0 60 | end_epoch: 2 61 | model_prefix: 'dff_rfcn_vid' 62 | # whether resume training 63 | RESUME: false 64 | # whether flip image 65 | FLIP: true 66 | # whether shuffle image 67 | SHUFFLE: true 68 | # whether use OHEM 69 | ENABLE_OHEM: true 70 | # size of images for each device, 1 for e2e 71 | BATCH_IMAGES: 1 72 | # e2e changes behavior of anchor loader and metric 73 | END2END: true 74 | # group images with similar aspect ratio 75 | ASPECT_GROUPING: true 76 | # R-CNN 77 | # rcnn rois batch size 78 | BATCH_ROIS: -1 79 | BATCH_ROIS_OHEM: 128 80 | # rcnn rois sampling params 81 | FG_FRACTION: 0.25 82 | FG_THRESH: 0.5 83 | BG_THRESH_HI: 0.5 84 | BG_THRESH_LO: 0.0 85 | # rcnn bounding box regression params 86 | BBOX_REGRESSION_THRESH: 0.5 87 | BBOX_WEIGHTS: 88 | - 1.0 89 | - 1.0 90 | - 1.0 91 | - 1.0 92 | 93 | # RPN anchor loader 94 | # rpn anchors batch size 95 | RPN_BATCH_SIZE: 256 96 | # rpn anchors sampling params 97 | RPN_FG_FRACTION: 0.5 98 | RPN_POSITIVE_OVERLAP: 0.7 99 | RPN_NEGATIVE_OVERLAP: 0.3 100 | RPN_CLOBBER_POSITIVES: false 101 | # rpn bounding box regression params 102 | RPN_BBOX_WEIGHTS: 103 | - 1.0 104 | - 1.0 105 | - 1.0 106 | - 1.0 107 | RPN_POSITIVE_WEIGHT: -1.0 108 | # used for end2end training 109 | # RPN proposal 110 | CXX_PROPOSAL: true 111 | RPN_NMS_THRESH: 0.7 112 | RPN_PRE_NMS_TOP_N: 6000 113 | RPN_POST_NMS_TOP_N: 300 114 | RPN_MIN_SIZE: 0 115 | # approximate bounding box regression 116 | BBOX_NORMALIZATION_PRECOMPUTED: true 117 | BBOX_MEANS: 118 | - 0.0 119 | - 0.0 120 | - 0.0 121 | - 0.0 122 | BBOX_STDS: 123 | - 0.1 124 | - 0.1 125 | - 0.2 126 | - 0.2 127 | TEST: 128 | # use rpn to generate proposal 129 | HAS_RPN: true 130 | # size of images for each device 131 | BATCH_IMAGES: 1 132 | # RPN proposal 133 | CXX_PROPOSAL: true 134 | RPN_NMS_THRESH: 0.7 135 | RPN_PRE_NMS_TOP_N: 6000 136 | RPN_POST_NMS_TOP_N: 300 137 | RPN_MIN_SIZE: 0 138 | # RCNN nms 139 | NMS: 0.3 140 | test_epoch: 2 141 | -------------------------------------------------------------------------------- /experiments/dff_rfcn/cfgs/resnet_v1_101_flownet_imagenet_vid_rfcn_end2end_ohem.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/dff_rfcn/imagenet_vid" 4 | symbol: resnet_v1_101_flownet_rfcn 5 | gpus: '0,1,2,3' 6 | CLASS_AGNOSTIC: true 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_flow: "./model/pretrained_model/flownet" 16 | pretrained_epoch: 0 17 | PIXEL_MEANS: 18 | - 103.06 19 | - 115.90 20 | - 123.15 21 | IMAGE_STRIDE: 0 22 | RCNN_FEAT_STRIDE: 16 23 | RPN_FEAT_STRIDE: 16 24 | FIXED_PARAMS: 25 | - conv1 26 | - bn_conv1 27 | - res2 28 | - bn2 29 | - gamma 30 | - beta 31 | ANCHOR_RATIOS: 32 | - 0.5 33 | - 1 34 | - 2 35 | ANCHOR_SCALES: 36 | - 8 37 | - 16 38 | - 32 39 | ANCHOR_MEANS: 40 | - 0.0 41 | - 0.0 42 | - 0.0 43 | - 0.0 44 | ANCHOR_STDS: 45 | - 0.1 46 | - 0.1 47 | - 0.4 48 | - 0.4 49 | NORMALIZE_RPN: TRUE 50 | NUM_ANCHORS: 9 51 | dataset: 52 | NUM_CLASSES: 31 53 | dataset: ImageNetVID 54 | dataset_path: "./data/ILSVRC2015" 55 | image_set: DET_train_30classes+VID_train_15frames 56 | root_path: "./data" 57 | test_image_set: VID_val_videos 58 | proposal: rpn 59 | TRAIN: 60 | lr: 0.00025 61 | lr_step: '1.333' 62 | warmup: false 63 | begin_epoch: 0 64 | end_epoch: 2 65 | model_prefix: 'dff_rfcn_vid' 66 | # whether resume training 67 | RESUME: false 68 | # whether flip image 69 | FLIP: true 70 | # whether shuffle image 71 | SHUFFLE: true 72 | # whether use OHEM 73 | ENABLE_OHEM: true 74 | # size of images for each device, 1 for e2e 75 | BATCH_IMAGES: 1 76 | # e2e changes behavior of anchor loader and metric 77 | END2END: true 78 | # group images with similar aspect ratio 79 | ASPECT_GROUPING: true 80 | # R-CNN 81 | # rcnn rois batch size 82 | BATCH_ROIS: -1 83 | BATCH_ROIS_OHEM: 128 84 | # rcnn rois sampling params 85 | FG_FRACTION: 0.25 86 | FG_THRESH: 0.5 87 | BG_THRESH_HI: 0.5 88 | BG_THRESH_LO: 0.0 89 | # rcnn bounding box regression params 90 | BBOX_REGRESSION_THRESH: 0.5 91 | BBOX_WEIGHTS: 92 | - 1.0 93 | - 1.0 94 | - 1.0 95 | - 1.0 96 | 97 | # RPN anchor loader 98 | # rpn anchors batch size 99 | RPN_BATCH_SIZE: 256 100 | # rpn anchors sampling params 101 | RPN_FG_FRACTION: 0.5 102 | RPN_POSITIVE_OVERLAP: 0.7 103 | RPN_NEGATIVE_OVERLAP: 0.3 104 | RPN_CLOBBER_POSITIVES: false 105 | # rpn bounding box regression params 106 | RPN_BBOX_WEIGHTS: 107 | - 1.0 108 | - 1.0 109 | - 1.0 110 | - 1.0 111 | RPN_POSITIVE_WEIGHT: -1.0 112 | # used for end2end training 113 | # RPN proposal 114 | CXX_PROPOSAL: true 115 | RPN_NMS_THRESH: 0.7 116 | RPN_PRE_NMS_TOP_N: 6000 117 | RPN_POST_NMS_TOP_N: 300 118 | RPN_MIN_SIZE: 0 119 | # approximate bounding box regression 120 | BBOX_NORMALIZATION_PRECOMPUTED: true 121 | BBOX_MEANS: 122 | - 0.0 123 | - 0.0 124 | - 0.0 125 | - 0.0 126 | BBOX_STDS: 127 | - 0.1 128 | - 0.1 129 | - 0.2 130 | - 0.2 131 | TEST: 132 | # use rpn to generate proposal 133 | HAS_RPN: true 134 | # size of images for each device 135 | BATCH_IMAGES: 1 136 | # RPN proposal 137 | CXX_PROPOSAL: true 138 | RPN_NMS_THRESH: 0.7 139 | RPN_PRE_NMS_TOP_N: 6000 140 | RPN_POST_NMS_TOP_N: 300 141 | RPN_MIN_SIZE: 0 142 | # RCNN nms 143 | NMS: 0.3 144 | test_epoch: 2 145 | -------------------------------------------------------------------------------- /experiments/dff_rfcn/dff_rfcn_end2end_train_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | os.environ['PYTHONUNBUFFERED'] = '1' 11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 13 | this_dir = os.path.dirname(__file__) 14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'dff_rfcn')) 15 | 16 | import train_end2end 17 | import test 18 | 19 | if __name__ == "__main__": 20 | train_end2end.main() 21 | test.main() 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /experiments/dff_rfcn/dff_rfcn_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | os.environ['PYTHONUNBUFFERED'] = '1' 11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 13 | this_dir = os.path.dirname(__file__) 14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'dff_rfcn')) 15 | 16 | import test 17 | 18 | if __name__ == "__main__": 19 | test.main() 20 | -------------------------------------------------------------------------------- /experiments/rfcn/cfgs/resnet_v1_101_imagenet_vid_rfcn_end2end_ohem.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rfcn/imagenet_vid" 4 | symbol: resnet_v1_101_rfcn 5 | gpus: '0,1,2,3' 6 | CLASS_AGNOSTIC: true 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | ANCHOR_RATIOS: 31 | - 0.5 32 | - 1 33 | - 2 34 | ANCHOR_SCALES: 35 | - 8 36 | - 16 37 | - 32 38 | ANCHOR_MEANS: 39 | - 0.0 40 | - 0.0 41 | - 0.0 42 | - 0.0 43 | ANCHOR_STDS: 44 | - 0.1 45 | - 0.1 46 | - 0.4 47 | - 0.4 48 | NORMALIZE_RPN: TRUE 49 | NUM_ANCHORS: 9 50 | dataset: 51 | NUM_CLASSES: 31 52 | dataset: ImageNetVID 53 | dataset_path: "./data/ILSVRC2015" 54 | image_set: DET_train_30classes+VID_train_15frames 55 | root_path: "./data" 56 | test_image_set: VID_val_frames 57 | proposal: rpn 58 | TRAIN: 59 | lr: 0.00025 60 | lr_step: '1.333' 61 | warmup: false 62 | begin_epoch: 0 63 | end_epoch: 2 64 | model_prefix: 'rfcn_vid' 65 | # whether resume training 66 | RESUME: false 67 | # whether flip image 68 | FLIP: true 69 | # whether shuffle image 70 | SHUFFLE: true 71 | # whether use OHEM 72 | ENABLE_OHEM: true 73 | # size of images for each device, 1 for e2e 74 | BATCH_IMAGES: 1 75 | # e2e changes behavior of anchor loader and metric 76 | END2END: true 77 | # group images with similar aspect ratio 78 | ASPECT_GROUPING: true 79 | # R-CNN 80 | # rcnn rois batch size 81 | BATCH_ROIS: -1 82 | BATCH_ROIS_OHEM: 128 83 | # rcnn rois sampling params 84 | FG_FRACTION: 0.25 85 | FG_THRESH: 0.5 86 | BG_THRESH_HI: 0.5 87 | BG_THRESH_LO: 0.0 88 | # rcnn bounding box regression params 89 | BBOX_REGRESSION_THRESH: 0.5 90 | BBOX_WEIGHTS: 91 | - 1.0 92 | - 1.0 93 | - 1.0 94 | - 1.0 95 | 96 | # RPN anchor loader 97 | # rpn anchors batch size 98 | RPN_BATCH_SIZE: 256 99 | # rpn anchors sampling params 100 | RPN_FG_FRACTION: 0.5 101 | RPN_POSITIVE_OVERLAP: 0.7 102 | RPN_NEGATIVE_OVERLAP: 0.3 103 | RPN_CLOBBER_POSITIVES: false 104 | # rpn bounding box regression params 105 | RPN_BBOX_WEIGHTS: 106 | - 1.0 107 | - 1.0 108 | - 1.0 109 | - 1.0 110 | RPN_POSITIVE_WEIGHT: -1.0 111 | # used for end2end training 112 | # RPN proposal 113 | CXX_PROPOSAL: true 114 | RPN_NMS_THRESH: 0.7 115 | RPN_PRE_NMS_TOP_N: 6000 116 | RPN_POST_NMS_TOP_N: 300 117 | RPN_MIN_SIZE: 0 118 | # approximate bounding box regression 119 | BBOX_NORMALIZATION_PRECOMPUTED: true 120 | BBOX_MEANS: 121 | - 0.0 122 | - 0.0 123 | - 0.0 124 | - 0.0 125 | BBOX_STDS: 126 | - 0.1 127 | - 0.1 128 | - 0.2 129 | - 0.2 130 | TEST: 131 | # use rpn to generate proposal 132 | HAS_RPN: true 133 | # size of images for each device 134 | BATCH_IMAGES: 1 135 | # RPN proposal 136 | CXX_PROPOSAL: true 137 | RPN_NMS_THRESH: 0.7 138 | RPN_PRE_NMS_TOP_N: 6000 139 | RPN_POST_NMS_TOP_N: 300 140 | RPN_MIN_SIZE: 0 141 | # RCNN nms 142 | NMS: 0.3 143 | test_epoch: 2 144 | -------------------------------------------------------------------------------- /experiments/rfcn/cfgs/rfcn_vid_demo.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rfcn/imagenet_vid" 4 | gpus: '0' 5 | CLASS_AGNOSTIC: true 6 | SCALES: 7 | - 600 8 | - 1000 9 | default: 10 | frequent: 100 11 | kvstore: device 12 | network: 13 | PIXEL_MEANS: 14 | - 103.06 15 | - 115.90 16 | - 123.15 17 | IMAGE_STRIDE: 0 18 | RCNN_FEAT_STRIDE: 16 19 | RPN_FEAT_STRIDE: 16 20 | FIXED_PARAMS: 21 | - conv1 22 | - bn_conv1 23 | - res2 24 | - bn2 25 | - gamma 26 | - beta 27 | ANCHOR_RATIOS: 28 | - 0.5 29 | - 1 30 | - 2 31 | ANCHOR_SCALES: 32 | - 8 33 | - 16 34 | - 32 35 | ANCHOR_MEANS: 36 | - 0.0 37 | - 0.0 38 | - 0.0 39 | - 0.0 40 | ANCHOR_STDS: 41 | - 0.1 42 | - 0.1 43 | - 0.4 44 | - 0.4 45 | NORMALIZE_RPN: TRUE 46 | NUM_ANCHORS: 9 47 | dataset: 48 | NUM_CLASSES: 31 49 | dataset: ImageNetVID 50 | dataset_path: "./data/ILSVRC2015" 51 | image_set: DET_train_30classes+VID_train_15frames 52 | root_path: "./data" 53 | test_image_set: VID_val_frames 54 | proposal: rpn 55 | TRAIN: 56 | lr: 0.00025 57 | lr_step: '1.333' 58 | warmup: false 59 | begin_epoch: 0 60 | end_epoch: 2 61 | model_prefix: 'rfcn_vid' 62 | # whether resume training 63 | RESUME: false 64 | # whether flip image 65 | FLIP: true 66 | # whether shuffle image 67 | SHUFFLE: true 68 | # whether use OHEM 69 | ENABLE_OHEM: true 70 | # size of images for each device, 1 for e2e 71 | BATCH_IMAGES: 1 72 | # e2e changes behavior of anchor loader and metric 73 | END2END: true 74 | # group images with similar aspect ratio 75 | ASPECT_GROUPING: true 76 | # R-CNN 77 | # rcnn rois batch size 78 | BATCH_ROIS: -1 79 | BATCH_ROIS_OHEM: 128 80 | # rcnn rois sampling params 81 | FG_FRACTION: 0.25 82 | FG_THRESH: 0.5 83 | BG_THRESH_HI: 0.5 84 | BG_THRESH_LO: 0.0 85 | # rcnn bounding box regression params 86 | BBOX_REGRESSION_THRESH: 0.5 87 | BBOX_WEIGHTS: 88 | - 1.0 89 | - 1.0 90 | - 1.0 91 | - 1.0 92 | 93 | # RPN anchor loader 94 | # rpn anchors batch size 95 | RPN_BATCH_SIZE: 256 96 | # rpn anchors sampling params 97 | RPN_FG_FRACTION: 0.5 98 | RPN_POSITIVE_OVERLAP: 0.7 99 | RPN_NEGATIVE_OVERLAP: 0.3 100 | RPN_CLOBBER_POSITIVES: false 101 | # rpn bounding box regression params 102 | RPN_BBOX_WEIGHTS: 103 | - 1.0 104 | - 1.0 105 | - 1.0 106 | - 1.0 107 | RPN_POSITIVE_WEIGHT: -1.0 108 | # used for end2end training 109 | # RPN proposal 110 | CXX_PROPOSAL: true 111 | RPN_NMS_THRESH: 0.7 112 | RPN_PRE_NMS_TOP_N: 6000 113 | RPN_POST_NMS_TOP_N: 300 114 | RPN_MIN_SIZE: 0 115 | # approximate bounding box regression 116 | BBOX_NORMALIZATION_PRECOMPUTED: true 117 | BBOX_MEANS: 118 | - 0.0 119 | - 0.0 120 | - 0.0 121 | - 0.0 122 | BBOX_STDS: 123 | - 0.1 124 | - 0.1 125 | - 0.2 126 | - 0.2 127 | TEST: 128 | # use rpn to generate proposal 129 | HAS_RPN: true 130 | # size of images for each device 131 | BATCH_IMAGES: 1 132 | # RPN proposal 133 | CXX_PROPOSAL: true 134 | RPN_NMS_THRESH: 0.7 135 | RPN_PRE_NMS_TOP_N: 6000 136 | RPN_POST_NMS_TOP_N: 300 137 | RPN_MIN_SIZE: 0 138 | # RCNN nms 139 | NMS: 0.3 140 | test_epoch: 2 141 | -------------------------------------------------------------------------------- /experiments/rfcn/rfcn_end2end_train_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | os.environ['PYTHONUNBUFFERED'] = '1' 11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 13 | this_dir = os.path.dirname(__file__) 14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'rfcn')) 15 | 16 | import train_end2end 17 | import test 18 | 19 | if __name__ == "__main__": 20 | train_end2end.main() 21 | test.main() 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /experiments/rfcn/rfcn_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | os.environ['PYTHONUNBUFFERED'] = '1' 11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 13 | this_dir = os.path.dirname(__file__) 14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'rfcn')) 15 | 16 | import test 17 | 18 | if __name__ == "__main__": 19 | test.main() 20 | -------------------------------------------------------------------------------- /init.bat: -------------------------------------------------------------------------------- 1 | cd /d %~dp0 2 | mkdir .\output 3 | mkdir .\external\mxnet 4 | mkdir .\model\pretrained_model 5 | pause 6 | cd lib\bbox 7 | python setup_windows.py build_ext --inplace 8 | cd ..\nms 9 | python setup_windows.py build_ext --inplace 10 | python setup_windows_cuda.py build_ext --inplace 11 | cd ..\.. 12 | pause 13 | -------------------------------------------------------------------------------- /init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p ./output 4 | mkdir -p ./external/mxnet 5 | mkdir -p ./model/pretrained_model 6 | 7 | cd lib/bbox 8 | python setup_linux.py build_ext --inplace 9 | cd ../nms 10 | python setup_linux.py build_ext --inplace 11 | cd ../.. 12 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd nms/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 3 | cd bbox/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 4 | cd dataset/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 5 | clean: 6 | cd nms/; rm *.so *.c *.cpp; cd ../../ 7 | cd bbox/; rm *.so *.c *.cpp; cd ../../ 8 | cd dataset/pycocotools/; rm *.so; cd ../../ 9 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/lib/__init__.py -------------------------------------------------------------------------------- /lib/bbox/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp -------------------------------------------------------------------------------- /lib/bbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/lib/bbox/__init__.py -------------------------------------------------------------------------------- /lib/bbox/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # Modified by Yuwen Xiong 7 | # -------------------------------------------------------- 8 | # Based on: 9 | # py-faster-rcnn 10 | # Copyright (c) 2015 Microsoft 11 | # Licence under The MIT License 12 | # https://github.com/rbgirshick/py-faster-rcnn 13 | # -------------------------------------------------------- 14 | 15 | cimport cython 16 | import numpy as np 17 | cimport numpy as np 18 | 19 | DTYPE = np.float 20 | ctypedef np.float_t DTYPE_t 21 | 22 | def bbox_overlaps_cython( 23 | np.ndarray[DTYPE_t, ndim=2] boxes, 24 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 25 | """ 26 | Parameters 27 | ---------- 28 | boxes: (N, 4) ndarray of float 29 | query_boxes: (K, 4) ndarray of float 30 | Returns 31 | ------- 32 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 33 | """ 34 | cdef unsigned int N = boxes.shape[0] 35 | cdef unsigned int K = query_boxes.shape[0] 36 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 37 | cdef DTYPE_t iw, ih, box_area 38 | cdef DTYPE_t ua 39 | cdef unsigned int k, n 40 | for k in range(K): 41 | box_area = ( 42 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 43 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 44 | ) 45 | for n in range(N): 46 | iw = ( 47 | min(boxes[n, 2], query_boxes[k, 2]) - 48 | max(boxes[n, 0], query_boxes[k, 0]) + 1 49 | ) 50 | if iw > 0: 51 | ih = ( 52 | min(boxes[n, 3], query_boxes[k, 3]) - 53 | max(boxes[n, 1], query_boxes[k, 1]) + 1 54 | ) 55 | if ih > 0: 56 | ua = float( 57 | (boxes[n, 2] - boxes[n, 0] + 1) * 58 | (boxes[n, 3] - boxes[n, 1] + 1) + 59 | box_area - iw * ih 60 | ) 61 | overlaps[n, k] = iw * ih / ua 62 | return overlaps 63 | -------------------------------------------------------------------------------- /lib/bbox/bbox_regression.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # py-faster-rcnn 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The MIT License 11 | # https://github.com/rbgirshick/py-faster-rcnn 12 | # -------------------------------------------------------- 13 | 14 | """ 15 | This file has functions about generating bounding box regression targets 16 | """ 17 | 18 | import numpy as np 19 | 20 | from bbox_transform import bbox_overlaps, bbox_transform 21 | 22 | 23 | def compute_bbox_regression_targets(rois, overlaps, labels, cfg): 24 | """ 25 | given rois, overlaps, gt labels, compute bounding box regression targets 26 | :param rois: roidb[i]['boxes'] k * 4 27 | :param overlaps: roidb[i]['max_overlaps'] k * 1 28 | :param labels: roidb[i]['max_classes'] k * 1 29 | :return: targets[i][class, dx, dy, dw, dh] k * 5 30 | """ 31 | # Ensure ROIs are floats 32 | rois = rois.astype(np.float, copy=False) 33 | 34 | # Sanity check 35 | if len(rois) != len(overlaps): 36 | print 'bbox regression: this should not happen' 37 | 38 | # Indices of ground-truth ROIs 39 | gt_inds = np.where(overlaps == 1)[0] 40 | if len(gt_inds) == 0: 41 | print 'something wrong : zero ground truth rois' 42 | # Indices of examples for which we try to make predictions 43 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_REGRESSION_THRESH)[0] 44 | 45 | # Get IoU overlap between each ex ROI and gt ROI 46 | ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) 47 | 48 | # Find which gt ROI each ex ROI has max overlap with: 49 | # this will be the ex ROI's gt target 50 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 51 | gt_rois = rois[gt_inds[gt_assignment], :] 52 | ex_rois = rois[ex_inds, :] 53 | 54 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 55 | targets[ex_inds, 0] = labels[ex_inds] 56 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 57 | return targets 58 | 59 | 60 | def add_bbox_regression_targets(roidb, cfg): 61 | """ 62 | given roidb, add ['bbox_targets'] and normalize bounding box regression targets 63 | :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb 64 | :return: means, std variances of targets 65 | """ 66 | print 'add bounding box regression targets' 67 | assert len(roidb) > 0 68 | assert 'max_classes' in roidb[0] 69 | 70 | num_images = len(roidb) 71 | num_classes = 2 if cfg.CLASS_AGNOSTIC else roidb[0]['gt_overlaps'].shape[1] 72 | 73 | for im_i in range(num_images): 74 | rois = roidb[im_i]['boxes'] 75 | max_overlaps = roidb[im_i]['max_overlaps'] 76 | max_classes = roidb[im_i]['max_classes'] 77 | roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes, cfg) 78 | 79 | if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: 80 | # use fixed / precomputed means and stds instead of empirical values 81 | means = np.tile(np.array(cfg.TRAIN.BBOX_MEANS), (num_classes, 1)) 82 | stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS), (num_classes, 1)) 83 | else: 84 | # compute mean, std values 85 | class_counts = np.zeros((num_classes, 1)) + 1e-14 86 | sums = np.zeros((num_classes, 4)) 87 | squared_sums = np.zeros((num_classes, 4)) 88 | for im_i in range(num_images): 89 | targets = roidb[im_i]['bbox_targets'] 90 | for cls in range(1, num_classes): 91 | cls_indexes = np.where(targets[:, 0] > 0)[0] if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0] 92 | if cls_indexes.size > 0: 93 | class_counts[cls] += cls_indexes.size 94 | sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0) 95 | squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0) 96 | 97 | means = sums / class_counts 98 | # var(x) = E(x^2) - E(x)^2 99 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 100 | 101 | print 'bbox target means:' 102 | print means 103 | print means[1:, :].mean(axis=0) # ignore bg class 104 | print 'bbox target stdevs:' 105 | print stds 106 | print stds[1:, :].mean(axis=0) # ignore bg class 107 | 108 | 109 | # normalized targets 110 | for im_i in range(num_images): 111 | targets = roidb[im_i]['bbox_targets'] 112 | for cls in range(1, num_classes): 113 | cls_indexes = np.where(targets[:, 0] > 0) if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0] 114 | roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :] 115 | roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :] 116 | 117 | return means.ravel(), stds.ravel() 118 | 119 | 120 | def expand_bbox_regression_targets(bbox_targets_data, num_classes, cfg): 121 | """ 122 | expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets 123 | :param bbox_targets_data: [k * 5] 124 | :param num_classes: number of classes 125 | :return: bbox target processed [k * 4 num_classes] 126 | bbox_weights ! only foreground boxes have bbox regression computation! 127 | """ 128 | classes = bbox_targets_data[:, 0] 129 | if cfg.CLASS_AGNOSTIC: 130 | num_classes = 2 131 | bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32) 132 | bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 133 | indexes = np.where(classes > 0)[0] 134 | for index in indexes: 135 | cls = classes[index] 136 | start = int(4 * 1 if cls > 0 else 0) if cfg.CLASS_AGNOSTIC else int(4 * cls) 137 | end = start + 4 138 | bbox_targets[index, start:end] = bbox_targets_data[index, 1:] 139 | bbox_weights[index, start:end] = cfg.TRAIN.BBOX_WEIGHTS 140 | return bbox_targets, bbox_weights 141 | 142 | -------------------------------------------------------------------------------- /lib/bbox/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # py-faster-rcnn 9 | # Copyright (c) 2015 Microsoft 10 | # Licence under The MIT License 11 | # https://github.com/rbgirshick/py-faster-rcnn 12 | # -------------------------------------------------------- 13 | 14 | 15 | import os 16 | from os.path import join as pjoin 17 | from setuptools import setup 18 | from distutils.extension import Extension 19 | from Cython.Distutils import build_ext 20 | import numpy as np 21 | 22 | # Obtain the numpy include directory. This logic works across numpy versions. 23 | try: 24 | numpy_include = np.get_include() 25 | except AttributeError: 26 | numpy_include = np.get_numpy_include() 27 | 28 | 29 | def customize_compiler_for_nvcc(self): 30 | """inject deep into distutils to customize how the dispatch 31 | to gcc/nvcc works. 32 | If you subclass UnixCCompiler, it's not trivial to get your subclass 33 | injected in, and still have the right customizations (i.e. 34 | distutils.sysconfig.customize_compiler) run on it. So instead of going 35 | the OO route, I have this. Note, it's kindof like a wierd functional 36 | subclassing going on.""" 37 | 38 | # tell the compiler it can processes .cu 39 | self.src_extensions.append('.cu') 40 | 41 | # save references to the default compiler_so and _comple methods 42 | default_compiler_so = self.compiler_so 43 | super = self._compile 44 | 45 | # now redefine the _compile method. This gets executed for each 46 | # object but distutils doesn't have the ability to change compilers 47 | # based on source extension: we add it. 48 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 49 | if os.path.splitext(src)[1] == '.cu': 50 | # use the cuda for .cu files 51 | self.set_executable('compiler_so', CUDA['nvcc']) 52 | # use only a subset of the extra_postargs, which are 1-1 translated 53 | # from the extra_compile_args in the Extension class 54 | postargs = extra_postargs['nvcc'] 55 | else: 56 | postargs = extra_postargs['gcc'] 57 | 58 | super(obj, src, ext, cc_args, postargs, pp_opts) 59 | # reset the default compiler_so, which we might have changed for cuda 60 | self.compiler_so = default_compiler_so 61 | 62 | # inject our redefined _compile method into the class 63 | self._compile = _compile 64 | 65 | 66 | # run the customize_compiler 67 | class custom_build_ext(build_ext): 68 | def build_extensions(self): 69 | customize_compiler_for_nvcc(self.compiler) 70 | build_ext.build_extensions(self) 71 | 72 | 73 | ext_modules = [ 74 | Extension( 75 | "bbox", 76 | ["bbox.pyx"], 77 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 78 | include_dirs=[numpy_include] 79 | ), 80 | ] 81 | 82 | setup( 83 | name='bbox_cython', 84 | ext_modules=ext_modules, 85 | # inject our custom trigger 86 | cmdclass={'build_ext': custom_build_ext}, 87 | ) 88 | -------------------------------------------------------------------------------- /lib/bbox/setup_windows.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # py-faster-rcnn 9 | # Copyright (c) 2015 Microsoft 10 | # Licence under The MIT License 11 | # https://github.com/rbgirshick/py-faster-rcnn 12 | # -------------------------------------------------------- 13 | 14 | import numpy as np 15 | import os 16 | from os.path import join as pjoin 17 | #from distutils.core import setup 18 | from setuptools import setup 19 | from distutils.extension import Extension 20 | from Cython.Distutils import build_ext 21 | import subprocess 22 | 23 | #change for windows, by MrX 24 | nvcc_bin = 'nvcc.exe' 25 | lib_dir = 'lib/x64' 26 | 27 | import distutils.msvc9compiler 28 | distutils.msvc9compiler.VERSION = 14.0 29 | 30 | # Obtain the numpy include directory. This logic works across numpy versions. 31 | try: 32 | numpy_include = np.get_include() 33 | except AttributeError: 34 | numpy_include = np.get_numpy_include() 35 | 36 | ext_modules = [ 37 | # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts 38 | Extension( 39 | "bbox", 40 | sources=["bbox.pyx"], 41 | extra_compile_args={}, 42 | include_dirs = [numpy_include] 43 | ), 44 | ] 45 | 46 | setup( 47 | name='fast_rcnn', 48 | ext_modules=ext_modules, 49 | # inject our custom trigger 50 | cmdclass={'build_ext': build_ext}, 51 | ) 52 | -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from imdb import IMDB 2 | from imagenet_vid import ImageNetVID 3 | -------------------------------------------------------------------------------- /lib/dataset/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | 11 | def unique_boxes(boxes, scale=1.0): 12 | """ return indices of unique boxes """ 13 | v = np.array([1, 1e3, 1e6, 1e9]) 14 | hashes = np.round(boxes * scale).dot(v) 15 | _, index = np.unique(hashes, return_index=True) 16 | return np.sort(index) 17 | 18 | 19 | def filter_small_boxes(boxes, min_size): 20 | w = boxes[:, 2] - boxes[:, 0] 21 | h = boxes[:, 3] - boxes[:, 1] 22 | keep = np.where((w >= min_size) & (h > min_size))[0] 23 | return keep -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # py-faster-rcnn 9 | # Copyright (c) 2015 Microsoft 10 | # Licence under The MIT License 11 | # https://github.com/rbgirshick/py-faster-rcnn 12 | # -------------------------------------------------------- 13 | 14 | import numpy as np 15 | cimport numpy as np 16 | 17 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 18 | return a if a >= b else b 19 | 20 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 21 | return a if a <= b else b 22 | 23 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 24 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 25 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 26 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 27 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 28 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 29 | 30 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 31 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i') 32 | 33 | cdef int ndets = dets.shape[0] 34 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 35 | np.zeros((ndets), dtype=np.int) 36 | 37 | # nominal indices 38 | cdef int _i, _j 39 | # sorted indices 40 | cdef int i, j 41 | # temp variables for box i's (the box currently under consideration) 42 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 43 | # variables for computing overlap with box j (lower scoring box) 44 | cdef np.float32_t xx1, yy1, xx2, yy2 45 | cdef np.float32_t w, h 46 | cdef np.float32_t inter, ovr 47 | 48 | keep = [] 49 | for _i in range(ndets): 50 | i = order[_i] 51 | if suppressed[i] == 1: 52 | continue 53 | keep.append(i) 54 | ix1 = x1[i] 55 | iy1 = y1[i] 56 | ix2 = x2[i] 57 | iy2 = y2[i] 58 | iarea = areas[i] 59 | for _j in range(_i + 1, ndets): 60 | j = order[_j] 61 | if suppressed[j] == 1: 62 | continue 63 | xx1 = max(ix1, x1[j]) 64 | yy1 = max(iy1, y1[j]) 65 | xx2 = min(ix2, x2[j]) 66 | yy2 = min(iy2, y2[j]) 67 | w = max(0.0, xx2 - xx1 + 1) 68 | h = max(0.0, yy2 - yy1 + 1) 69 | inter = w * h 70 | ovr = inter / (iarea + areas[j] - inter) 71 | if ovr >= thresh: 72 | suppressed[j] = 1 73 | 74 | return keep 75 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Deep Feature Flow 3 | // Copyright (c) 2017 Microsoft 4 | // Licensed under The MIT License 5 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | // ------------------------------------------------------------------ 7 | // Based on: 8 | // Faster R-CNN 9 | // Copyright (c) 2015 Microsoft 10 | // Licensed under The MIT License 11 | // https://github.com/shaoqingren/faster_rcnn 12 | // ------------------------------------------------------------------ 13 | 14 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 15 | int boxes_dim, float nms_overlap_thresh, int device_id); 16 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------ 2 | # Deep Feature Flow 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License 5 | # Written by Yuwen Xiong, Tao Kong 6 | # ------------------------------------------------------------------ 7 | # Based on: 8 | # Faster R-CNN 9 | # Copyright (c) 2015 Microsoft 10 | # Licensed under The MIT License 11 | # https://github.com/shaoqingren/faster_rcnn 12 | # ------------------------------------------------------------------ 13 | 14 | import numpy as np 15 | cimport numpy as np 16 | 17 | assert sizeof(int) == sizeof(np.int32_t) 18 | 19 | cdef extern from "gpu_nms.hpp": 20 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 21 | 22 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 23 | np.int32_t device_id=0): 24 | cdef int boxes_num = dets.shape[0] 25 | cdef int boxes_dim = dets.shape[1] 26 | cdef int num_out 27 | cdef np.ndarray[np.int32_t, ndim=1] \ 28 | keep = np.zeros(boxes_num, dtype=np.int32) 29 | cdef np.ndarray[np.float32_t, ndim=1] \ 30 | scores = dets[:, 4] 31 | cdef np.ndarray[np.int32_t, ndim=1] \ 32 | order = scores.argsort()[::-1].astype(np.int32) 33 | cdef np.ndarray[np.float32_t, ndim=2] \ 34 | sorted_dets = dets[order, :] 35 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 36 | keep = keep[:num_out] 37 | return list(order[keep]) 38 | -------------------------------------------------------------------------------- /lib/nms/nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # py-faster-rcnn 9 | # Copyright (c) 2015 Microsoft 10 | # Licence under The MIT License 11 | # https://github.com/rbgirshick/py-faster-rcnn 12 | # -------------------------------------------------------- 13 | 14 | import numpy as np 15 | 16 | from cpu_nms import cpu_nms 17 | from gpu_nms import gpu_nms 18 | 19 | def py_nms_wrapper(thresh): 20 | def _nms(dets): 21 | return nms(dets, thresh) 22 | return _nms 23 | 24 | 25 | def cpu_nms_wrapper(thresh): 26 | def _nms(dets): 27 | return cpu_nms(dets, thresh) 28 | return _nms 29 | 30 | 31 | def gpu_nms_wrapper(thresh, device_id): 32 | def _nms(dets): 33 | return gpu_nms(dets, thresh, device_id) 34 | return _nms 35 | 36 | 37 | def nms(dets, thresh): 38 | """ 39 | greedily select boxes with high confidence and overlap with current maximum <= thresh 40 | rule out overlap >= thresh 41 | :param dets: [[x1, y1, x2, y2 score]] 42 | :param thresh: retain overlap < thresh 43 | :return: indexes to keep 44 | """ 45 | if dets.shape[0] == 0: 46 | return [] 47 | 48 | x1 = dets[:, 0] 49 | y1 = dets[:, 1] 50 | x2 = dets[:, 2] 51 | y2 = dets[:, 3] 52 | scores = dets[:, 4] 53 | 54 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 55 | order = scores.argsort()[::-1] 56 | 57 | keep = [] 58 | while order.size > 0: 59 | i = order[0] 60 | keep.append(i) 61 | xx1 = np.maximum(x1[i], x1[order[1:]]) 62 | yy1 = np.maximum(y1[i], y1[order[1:]]) 63 | xx2 = np.minimum(x2[i], x2[order[1:]]) 64 | yy2 = np.minimum(y2[i], y2[order[1:]]) 65 | 66 | w = np.maximum(0.0, xx2 - xx1 + 1) 67 | h = np.maximum(0.0, yy2 - yy1 + 1) 68 | inter = w * h 69 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 70 | 71 | inds = np.where(ovr <= thresh)[0] 72 | order = order[inds + 1] 73 | 74 | return keep 75 | -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Deep Feature Flow 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License 5 | // Written by Yuwen Xiong 6 | // ------------------------------------------------------------------ 7 | // Based on: 8 | // Faster R-CNN 9 | // Copyright (c) 2015 Microsoft 10 | // Licensed under The MIT License 11 | // https://github.com/shaoqingren/faster_rcnn 12 | // ------------------------------------------------------------------ 13 | 14 | #include "gpu_nms.hpp" 15 | #include 16 | #include 17 | 18 | #define CUDA_CHECK(condition) \ 19 | /* Code block avoids redefinition of cudaError_t error */ \ 20 | do { \ 21 | cudaError_t error = condition; \ 22 | if (error != cudaSuccess) { \ 23 | std::cout << cudaGetErrorString(error) << std::endl; \ 24 | } \ 25 | } while (0) 26 | 27 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 28 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 29 | 30 | __device__ inline float devIoU(float const * const a, float const * const b) { 31 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 32 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 33 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 34 | float interS = width * height; 35 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 36 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 37 | return interS / (Sa + Sb - interS); 38 | } 39 | 40 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 41 | const float *dev_boxes, unsigned long long *dev_mask) { 42 | const int row_start = blockIdx.y; 43 | const int col_start = blockIdx.x; 44 | 45 | // if (row_start > col_start) return; 46 | 47 | const int row_size = 48 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 49 | const int col_size = 50 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 51 | 52 | __shared__ float block_boxes[threadsPerBlock * 5]; 53 | if (threadIdx.x < col_size) { 54 | block_boxes[threadIdx.x * 5 + 0] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 56 | block_boxes[threadIdx.x * 5 + 1] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 58 | block_boxes[threadIdx.x * 5 + 2] = 59 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 60 | block_boxes[threadIdx.x * 5 + 3] = 61 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 62 | block_boxes[threadIdx.x * 5 + 4] = 63 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 64 | } 65 | __syncthreads(); 66 | 67 | if (threadIdx.x < row_size) { 68 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 69 | const float *cur_box = dev_boxes + cur_box_idx * 5; 70 | int i = 0; 71 | unsigned long long t = 0; 72 | int start = 0; 73 | if (row_start == col_start) { 74 | start = threadIdx.x + 1; 75 | } 76 | for (i = start; i < col_size; i++) { 77 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 78 | t |= 1ULL << i; 79 | } 80 | } 81 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 82 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 83 | } 84 | } 85 | 86 | void _set_device(int device_id) { 87 | int current_device; 88 | CUDA_CHECK(cudaGetDevice(¤t_device)); 89 | if (current_device == device_id) { 90 | return; 91 | } 92 | // The call to cudaSetDevice must come before any calls to Get, which 93 | // may perform initialization using the GPU. 94 | CUDA_CHECK(cudaSetDevice(device_id)); 95 | } 96 | 97 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 98 | int boxes_dim, float nms_overlap_thresh, int device_id) { 99 | _set_device(device_id); 100 | 101 | float* boxes_dev = NULL; 102 | unsigned long long* mask_dev = NULL; 103 | 104 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 105 | 106 | CUDA_CHECK(cudaMalloc(&boxes_dev, 107 | boxes_num * boxes_dim * sizeof(float))); 108 | CUDA_CHECK(cudaMemcpy(boxes_dev, 109 | boxes_host, 110 | boxes_num * boxes_dim * sizeof(float), 111 | cudaMemcpyHostToDevice)); 112 | 113 | CUDA_CHECK(cudaMalloc(&mask_dev, 114 | boxes_num * col_blocks * sizeof(unsigned long long))); 115 | 116 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 117 | DIVUP(boxes_num, threadsPerBlock)); 118 | dim3 threads(threadsPerBlock); 119 | nms_kernel<<>>(boxes_num, 120 | nms_overlap_thresh, 121 | boxes_dev, 122 | mask_dev); 123 | 124 | std::vector mask_host(boxes_num * col_blocks); 125 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 126 | mask_dev, 127 | sizeof(unsigned long long) * boxes_num * col_blocks, 128 | cudaMemcpyDeviceToHost)); 129 | 130 | std::vector remv(col_blocks); 131 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 132 | 133 | int num_to_keep = 0; 134 | for (int i = 0; i < boxes_num; i++) { 135 | int nblock = i / threadsPerBlock; 136 | int inblock = i % threadsPerBlock; 137 | 138 | if (!(remv[nblock] & (1ULL << inblock))) { 139 | keep_out[num_to_keep++] = i; 140 | unsigned long long *p = &mask_host[0] + i * col_blocks; 141 | for (int j = nblock; j < col_blocks; j++) { 142 | remv[j] |= p[j]; 143 | } 144 | } 145 | } 146 | *num_out = num_to_keep; 147 | 148 | CUDA_CHECK(cudaFree(boxes_dev)); 149 | CUDA_CHECK(cudaFree(mask_dev)); 150 | } 151 | -------------------------------------------------------------------------------- /lib/nms/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # py-faster-rcnn 9 | # Copyright (c) 2015 Microsoft 10 | # Licence under The MIT License 11 | # https://github.com/rbgirshick/py-faster-rcnn 12 | # -------------------------------------------------------- 13 | 14 | import os 15 | from os.path import join as pjoin 16 | from setuptools import setup 17 | from distutils.extension import Extension 18 | from Cython.Distutils import build_ext 19 | import numpy as np 20 | 21 | 22 | def find_in_path(name, path): 23 | "Find a file in a search path" 24 | # Adapted fom 25 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 26 | for dir in path.split(os.pathsep): 27 | binpath = pjoin(dir, name) 28 | if os.path.exists(binpath): 29 | return os.path.abspath(binpath) 30 | return None 31 | 32 | 33 | def locate_cuda(): 34 | """Locate the CUDA environment on the system 35 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 36 | and values giving the absolute path to each directory. 37 | Starts by looking for the CUDAHOME env variable. If not found, everything 38 | is based on finding 'nvcc' in the PATH. 39 | """ 40 | 41 | # first check if the CUDAHOME env variable is in use 42 | if 'CUDAHOME' in os.environ: 43 | home = os.environ['CUDAHOME'] 44 | nvcc = pjoin(home, 'bin', 'nvcc') 45 | else: 46 | # otherwise, search the PATH for NVCC 47 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 48 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 49 | if nvcc is None: 50 | raise EnvironmentError('The nvcc binary could not be ' 51 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 52 | home = os.path.dirname(os.path.dirname(nvcc)) 53 | 54 | cudaconfig = {'home':home, 'nvcc':nvcc, 55 | 'include': pjoin(home, 'include'), 56 | 'lib64': pjoin(home, 'lib64')} 57 | for k, v in cudaconfig.iteritems(): 58 | if not os.path.exists(v): 59 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 60 | 61 | return cudaconfig 62 | CUDA = locate_cuda() 63 | 64 | 65 | # Obtain the numpy include directory. This logic works across numpy versions. 66 | try: 67 | numpy_include = np.get_include() 68 | except AttributeError: 69 | numpy_include = np.get_numpy_include() 70 | 71 | 72 | def customize_compiler_for_nvcc(self): 73 | """inject deep into distutils to customize how the dispatch 74 | to gcc/nvcc works. 75 | If you subclass UnixCCompiler, it's not trivial to get your subclass 76 | injected in, and still have the right customizations (i.e. 77 | distutils.sysconfig.customize_compiler) run on it. So instead of going 78 | the OO route, I have this. Note, it's kindof like a wierd functional 79 | subclassing going on.""" 80 | 81 | # tell the compiler it can processes .cu 82 | self.src_extensions.append('.cu') 83 | 84 | # save references to the default compiler_so and _comple methods 85 | default_compiler_so = self.compiler_so 86 | super = self._compile 87 | 88 | # now redefine the _compile method. This gets executed for each 89 | # object but distutils doesn't have the ability to change compilers 90 | # based on source extension: we add it. 91 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 92 | if os.path.splitext(src)[1] == '.cu': 93 | # use the cuda for .cu files 94 | self.set_executable('compiler_so', CUDA['nvcc']) 95 | # use only a subset of the extra_postargs, which are 1-1 translated 96 | # from the extra_compile_args in the Extension class 97 | postargs = extra_postargs['nvcc'] 98 | else: 99 | postargs = extra_postargs['gcc'] 100 | 101 | super(obj, src, ext, cc_args, postargs, pp_opts) 102 | # reset the default compiler_so, which we might have changed for cuda 103 | self.compiler_so = default_compiler_so 104 | 105 | # inject our redefined _compile method into the class 106 | self._compile = _compile 107 | 108 | 109 | # run the customize_compiler 110 | class custom_build_ext(build_ext): 111 | def build_extensions(self): 112 | customize_compiler_for_nvcc(self.compiler) 113 | build_ext.build_extensions(self) 114 | 115 | 116 | ext_modules = [ 117 | Extension( 118 | "cpu_nms", 119 | ["cpu_nms.pyx"], 120 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 121 | include_dirs = [numpy_include] 122 | ), 123 | Extension('gpu_nms', 124 | ['nms_kernel.cu', 'gpu_nms.pyx'], 125 | library_dirs=[CUDA['lib64']], 126 | libraries=['cudart'], 127 | language='c++', 128 | runtime_library_dirs=[CUDA['lib64']], 129 | # this syntax is specific to this build system 130 | # we're only going to use certain compiler args with nvcc and not with 131 | # gcc the implementation of this trick is in customize_compiler() below 132 | extra_compile_args={'gcc': ["-Wno-unused-function"], 133 | 'nvcc': ['-arch=sm_35', 134 | '--ptxas-options=-v', 135 | '-c', 136 | '--compiler-options', 137 | "'-fPIC'"]}, 138 | include_dirs = [numpy_include, CUDA['include']] 139 | ), 140 | ] 141 | 142 | setup( 143 | name='nms', 144 | ext_modules=ext_modules, 145 | # inject our custom trigger 146 | cmdclass={'build_ext': custom_build_ext}, 147 | ) 148 | -------------------------------------------------------------------------------- /lib/nms/setup_windows.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # py-faster-rcnn 9 | # Copyright (c) 2015 Microsoft 10 | # Licence under The MIT License 11 | # https://github.com/rbgirshick/py-faster-rcnn 12 | # -------------------------------------------------------- 13 | 14 | import numpy as np 15 | import os 16 | from os.path import join as pjoin 17 | #from distutils.core import setup 18 | from setuptools import setup 19 | from distutils.extension import Extension 20 | from Cython.Distutils import build_ext 21 | import subprocess 22 | 23 | #change for windows, by MrX 24 | nvcc_bin = 'nvcc.exe' 25 | lib_dir = 'lib/x64' 26 | 27 | import distutils.msvc9compiler 28 | distutils.msvc9compiler.VERSION = 14.0 29 | 30 | 31 | def find_in_path(name, path): 32 | "Find a file in a search path" 33 | # Adapted fom 34 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 35 | for dir in path.split(os.pathsep): 36 | binpath = pjoin(dir, name) 37 | if os.path.exists(binpath): 38 | return os.path.abspath(binpath) 39 | return None 40 | 41 | 42 | def locate_cuda(): 43 | """Locate the CUDA environment on the system 44 | 45 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 46 | and values giving the absolute path to each directory. 47 | 48 | Starts by looking for the CUDAHOME env variable. If not found, everything 49 | is based on finding 'nvcc' in the PATH. 50 | """ 51 | 52 | # first check if the CUDAHOME env variable is in use 53 | if 'CUDA_PATH' in os.environ: 54 | home = os.environ['CUDA_PATH'] 55 | print("home = %s\n" % home) 56 | nvcc = pjoin(home, 'bin', nvcc_bin) 57 | else: 58 | # otherwise, search the PATH for NVCC 59 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 60 | nvcc = find_in_path(nvcc_bin, os.environ['PATH'] + os.pathsep + default_path) 61 | if nvcc is None: 62 | raise EnvironmentError('The nvcc binary could not be ' 63 | 'located in your $PATH. Either add it to your path, or set $CUDA_PATH') 64 | home = os.path.dirname(os.path.dirname(nvcc)) 65 | print("home = %s, nvcc = %s\n" % (home, nvcc)) 66 | 67 | 68 | cudaconfig = {'home':home, 'nvcc':nvcc, 69 | 'include': pjoin(home, 'include'), 70 | 'lib64': pjoin(home, lib_dir)} 71 | for k, v in cudaconfig.iteritems(): 72 | if not os.path.exists(v): 73 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 74 | 75 | return cudaconfig 76 | CUDA = locate_cuda() 77 | 78 | 79 | # Obtain the numpy include directory. This logic works across numpy versions. 80 | try: 81 | numpy_include = np.get_include() 82 | except AttributeError: 83 | numpy_include = np.get_numpy_include() 84 | 85 | 86 | def customize_compiler_for_nvcc(self): 87 | """inject deep into distutils to customize how the dispatch 88 | to gcc/nvcc works. 89 | 90 | If you subclass UnixCCompiler, it's not trivial to get your subclass 91 | injected in, and still have the right customizations (i.e. 92 | distutils.sysconfig.customize_compiler) run on it. So instead of going 93 | the OO route, I have this. Note, it's kindof like a wierd functional 94 | subclassing going on.""" 95 | 96 | # tell the compiler it can processes .cu 97 | #self.src_extensions.append('.cu') 98 | 99 | 100 | # save references to the default compiler_so and _comple methods 101 | #default_compiler_so = self.spawn 102 | #default_compiler_so = self.rc 103 | super = self.compile 104 | 105 | # now redefine the _compile method. This gets executed for each 106 | # object but distutils doesn't have the ability to change compilers 107 | # based on source extension: we add it. 108 | def compile(sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None): 109 | postfix=os.path.splitext(sources[0])[1] 110 | 111 | if postfix == '.cu': 112 | # use the cuda for .cu files 113 | #self.set_executable('compiler_so', CUDA['nvcc']) 114 | # use only a subset of the extra_postargs, which are 1-1 translated 115 | # from the extra_compile_args in the Extension class 116 | postargs = extra_postargs['nvcc'] 117 | else: 118 | postargs = extra_postargs['gcc'] 119 | 120 | 121 | return super(sources, output_dir, macros, include_dirs, debug, extra_preargs, postargs, depends) 122 | # reset the default compiler_so, which we might have changed for cuda 123 | #self.rc = default_compiler_so 124 | 125 | # inject our redefined _compile method into the class 126 | self.compile = compile 127 | 128 | 129 | # run the customize_compiler 130 | class custom_build_ext(build_ext): 131 | def build_extensions(self): 132 | customize_compiler_for_nvcc(self.compiler) 133 | build_ext.build_extensions(self) 134 | 135 | 136 | ext_modules = [ 137 | # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts 138 | Extension( 139 | "cpu_nms", 140 | sources=["cpu_nms.pyx"], 141 | extra_compile_args={'gcc': []}, 142 | include_dirs = [numpy_include], 143 | ), 144 | ] 145 | 146 | setup( 147 | name='fast_rcnn', 148 | ext_modules=ext_modules, 149 | # inject our custom trigger 150 | cmdclass={'build_ext': custom_build_ext}, 151 | ) 152 | -------------------------------------------------------------------------------- /lib/rpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/lib/rpn/__init__.py -------------------------------------------------------------------------------- /lib/rpn/generate_anchor.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | """ 15 | Generate base anchors on index 0 16 | """ 17 | 18 | import numpy as np 19 | 20 | 21 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 22 | scales=2 ** np.arange(3, 6)): 23 | """ 24 | Generate anchor (reference) windows by enumerating aspect ratios X 25 | scales wrt a reference (0, 0, 15, 15) window. 26 | """ 27 | 28 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 29 | ratio_anchors = _ratio_enum(base_anchor, ratios) 30 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 31 | for i in xrange(ratio_anchors.shape[0])]) 32 | return anchors 33 | 34 | 35 | def _whctrs(anchor): 36 | """ 37 | Return width, height, x center, and y center for an anchor (window). 38 | """ 39 | 40 | w = anchor[2] - anchor[0] + 1 41 | h = anchor[3] - anchor[1] + 1 42 | x_ctr = anchor[0] + 0.5 * (w - 1) 43 | y_ctr = anchor[1] + 0.5 * (h - 1) 44 | return w, h, x_ctr, y_ctr 45 | 46 | 47 | def _mkanchors(ws, hs, x_ctr, y_ctr): 48 | """ 49 | Given a vector of widths (ws) and heights (hs) around a center 50 | (x_ctr, y_ctr), output a set of anchors (windows). 51 | """ 52 | 53 | ws = ws[:, np.newaxis] 54 | hs = hs[:, np.newaxis] 55 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 56 | y_ctr - 0.5 * (hs - 1), 57 | x_ctr + 0.5 * (ws - 1), 58 | y_ctr + 0.5 * (hs - 1))) 59 | return anchors 60 | 61 | 62 | def _ratio_enum(anchor, ratios): 63 | """ 64 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 65 | """ 66 | 67 | w, h, x_ctr, y_ctr = _whctrs(anchor) 68 | size = w * h 69 | size_ratios = size / ratios 70 | ws = np.round(np.sqrt(size_ratios)) 71 | hs = np.round(ws * ratios) 72 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 73 | return anchors 74 | 75 | 76 | def _scale_enum(anchor, scales): 77 | """ 78 | Enumerate a set of anchors for each scale wrt an anchor. 79 | """ 80 | 81 | w, h, x_ctr, y_ctr = _whctrs(anchor) 82 | ws = w * scales 83 | hs = h * scales 84 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 85 | return anchors 86 | -------------------------------------------------------------------------------- /lib/utils/PrefetchingIter.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import mxnet as mx 15 | from mxnet.io import DataDesc, DataBatch 16 | import threading 17 | 18 | 19 | class PrefetchingIter(mx.io.DataIter): 20 | """Base class for prefetching iterators. Takes one or more DataIters ( 21 | or any class with "reset" and "next" methods) and combine them with 22 | prefetching. For example: 23 | 24 | Parameters 25 | ---------- 26 | iters : DataIter or list of DataIter 27 | one or more DataIters (or any class with "reset" and "next" methods) 28 | rename_data : None or list of dict 29 | i-th element is a renaming map for i-th iter, in the form of 30 | {'original_name' : 'new_name'}. Should have one entry for each entry 31 | in iter[i].provide_data 32 | rename_label : None or list of dict 33 | Similar to rename_data 34 | 35 | Examples 36 | -------- 37 | iter = PrefetchingIter([NDArrayIter({'data': X1}), NDArrayIter({'data': X2})], 38 | rename_data=[{'data': 'data1'}, {'data': 'data2'}]) 39 | """ 40 | def __init__(self, iters, rename_data=None, rename_label=None): 41 | super(PrefetchingIter, self).__init__() 42 | if not isinstance(iters, list): 43 | iters = [iters] 44 | self.n_iter = len(iters) 45 | assert self.n_iter ==1, "Our prefetching iter only support 1 DataIter" 46 | self.iters = iters 47 | self.rename_data = rename_data 48 | self.rename_label = rename_label 49 | self.batch_size = len(self.provide_data) * self.provide_data[0][0][1][0] 50 | self.data_ready = [threading.Event() for i in range(self.n_iter)] 51 | self.data_taken = [threading.Event() for i in range(self.n_iter)] 52 | for e in self.data_taken: 53 | e.set() 54 | self.started = True 55 | self.current_batch = [None for _ in range(self.n_iter)] 56 | self.next_batch = [None for _ in range(self.n_iter)] 57 | def prefetch_func(self, i): 58 | """Thread entry""" 59 | while True: 60 | self.data_taken[i].wait() 61 | if not self.started: 62 | break 63 | try: 64 | self.next_batch[i] = self.iters[i].next() 65 | except StopIteration: 66 | self.next_batch[i] = None 67 | self.data_taken[i].clear() 68 | self.data_ready[i].set() 69 | self.prefetch_threads = [threading.Thread(target=prefetch_func, args=[self, i]) \ 70 | for i in range(self.n_iter)] 71 | for thread in self.prefetch_threads: 72 | thread.setDaemon(True) 73 | thread.start() 74 | 75 | def __del__(self): 76 | self.started = False 77 | for e in self.data_taken: 78 | e.set() 79 | for thread in self.prefetch_threads: 80 | thread.join() 81 | 82 | @property 83 | def provide_data(self): 84 | """The name and shape of data provided by this iterator""" 85 | if self.rename_data is None: 86 | return sum([i.provide_data for i in self.iters], []) 87 | else: 88 | return sum([[ 89 | DataDesc(r[x.name], x.shape, x.dtype) 90 | if isinstance(x, DataDesc) else DataDesc(*x) 91 | for x in i.provide_data 92 | ] for r, i in zip(self.rename_data, self.iters)], []) 93 | 94 | @property 95 | def provide_label(self): 96 | """The name and shape of label provided by this iterator""" 97 | if self.rename_label is None: 98 | return sum([i.provide_label for i in self.iters], []) 99 | else: 100 | return sum([[ 101 | DataDesc(r[x.name], x.shape, x.dtype) 102 | if isinstance(x, DataDesc) else DataDesc(*x) 103 | for x in i.provide_label 104 | ] for r, i in zip(self.rename_label, self.iters)], []) 105 | 106 | def reset(self): 107 | for e in self.data_ready: 108 | e.wait() 109 | for i in self.iters: 110 | i.reset() 111 | for e in self.data_ready: 112 | e.clear() 113 | for e in self.data_taken: 114 | e.set() 115 | 116 | def iter_next(self): 117 | for e in self.data_ready: 118 | e.wait() 119 | if self.next_batch[0] is None: 120 | return False 121 | else: 122 | self.current_batch = self.next_batch[0] 123 | for e in self.data_ready: 124 | e.clear() 125 | for e in self.data_taken: 126 | e.set() 127 | return True 128 | 129 | def next(self): 130 | if self.iter_next(): 131 | return self.current_batch 132 | else: 133 | raise StopIteration 134 | 135 | def getdata(self): 136 | return self.current_batch.data 137 | 138 | def getlabel(self): 139 | return self.current_batch.label 140 | 141 | def getindex(self): 142 | return self.current_batch.index 143 | 144 | def getpad(self): 145 | return self.current_batch.pad 146 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/combine_model.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | from load_model import load_checkpoint 9 | from save_model import save_checkpoint 10 | 11 | 12 | def combine_model(prefix1, epoch1, prefix2, epoch2, prefix_out, epoch_out): 13 | args1, auxs1 = load_checkpoint(prefix1, epoch1) 14 | args2, auxs2 = load_checkpoint(prefix2, epoch2) 15 | arg_names = args1.keys() + args2.keys() 16 | aux_names = auxs1.keys() + auxs2.keys() 17 | args = dict() 18 | for arg in arg_names: 19 | if arg in args1: 20 | args[arg] = args1[arg] 21 | if arg in args2: 22 | args[arg] = args2[arg] 23 | auxs = dict() 24 | for aux in aux_names: 25 | if aux in auxs1: 26 | auxs[aux] = auxs1[aux] 27 | if aux in auxs2: 28 | auxs[aux] = auxs2[aux] 29 | save_checkpoint(prefix_out, epoch_out, args, auxs) 30 | -------------------------------------------------------------------------------- /lib/utils/create_logger.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Bin Xiao 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import logging 10 | import time 11 | 12 | def create_logger(root_output_path, cfg, image_set): 13 | # set up logger 14 | if not os.path.exists(root_output_path): 15 | os.makedirs(root_output_path) 16 | assert os.path.exists(root_output_path), '{} does not exist'.format(root_output_path) 17 | 18 | cfg_name = os.path.basename(cfg).split('.')[0] 19 | config_output_path = os.path.join(root_output_path, '{}'.format(cfg_name)) 20 | if not os.path.exists(config_output_path): 21 | os.makedirs(config_output_path) 22 | 23 | image_sets = [iset for iset in image_set.split('+')] 24 | final_output_path = os.path.join(config_output_path, '{}'.format('_'.join(image_sets))) 25 | if not os.path.exists(final_output_path): 26 | os.makedirs(final_output_path) 27 | 28 | log_file = '{}_{}.log'.format(cfg_name, time.strftime('%Y-%m-%d-%H-%M')) 29 | head = '%(asctime)-15s %(message)s' 30 | logging.basicConfig(filename=os.path.join(final_output_path, log_file), format=head) 31 | logger = logging.getLogger() 32 | logger.setLevel(logging.INFO) 33 | 34 | return logger, final_output_path 35 | 36 | -------------------------------------------------------------------------------- /lib/utils/image_processing.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import cv2 10 | 11 | 12 | def resize(im, target_size, max_size): 13 | """ 14 | only resize input image to target size and return scale 15 | :param im: BGR image input by opencv 16 | :param target_size: one dimensional size (the short side) 17 | :param max_size: one dimensional max size (the long side) 18 | :return: 19 | """ 20 | im_shape = im.shape 21 | im_size_min = np.min(im_shape[0:2]) 22 | im_size_max = np.max(im_shape[0:2]) 23 | im_scale = float(target_size) / float(im_size_min) 24 | # prevent bigger axis from being more than max_size: 25 | if np.round(im_scale * im_size_max) > max_size: 26 | im_scale = float(max_size) / float(im_size_max) 27 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) 28 | return im, im_scale 29 | 30 | 31 | def transform(im, pixel_means, need_mean=False): 32 | """ 33 | transform into mxnet tensor 34 | subtract pixel size and transform to correct format 35 | :param im: [height, width, channel] in BGR 36 | :param pixel_means: [[[R, G, B pixel means]]] 37 | :return: [batch, channel, height, width] 38 | """ 39 | assert False, "shouldn't reach here." 40 | im = im.copy() 41 | im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)] 42 | im = im.astype(float) 43 | if need_mean: 44 | im -= pixel_means 45 | im_tensor = im[np.newaxis, :] 46 | # put channel first 47 | channel_swap = (0, 3, 1, 2) 48 | im_tensor = im_tensor.transpose(channel_swap) 49 | return im_tensor 50 | 51 | 52 | def transform_inverse(im_tensor, pixel_means): 53 | """ 54 | transform from mxnet im_tensor to ordinary RGB image 55 | im_tensor is limited to one image 56 | :param im_tensor: [batch, channel, height, width] 57 | :param pixel_means: [[[R, G, B pixel means]]] 58 | :return: im [height, width, channel(RGB)] 59 | """ 60 | assert im_tensor.shape[0] == 1 61 | im_tensor = im_tensor.copy() 62 | # put channel back 63 | channel_swap = (0, 2, 3, 1) 64 | im_tensor = im_tensor.transpose(channel_swap) 65 | im = im_tensor[0] 66 | assert im.shape[2] == 3 67 | im += pixel_means 68 | im = im.astype(np.uint8) 69 | return im 70 | 71 | 72 | def tensor_vstack(tensor_list, pad=0): 73 | """ 74 | vertically stack tensors 75 | :param tensor_list: list of tensor to be stacked vertically 76 | :param pad: label to pad with 77 | :return: tensor with max shape 78 | """ 79 | ndim = len(tensor_list[0].shape) 80 | if ndim == 1: 81 | return np.hstack(tensor_list) 82 | dimensions = [0] 83 | for dim in range(1, ndim): 84 | dimensions.append(max([tensor.shape[dim] for tensor in tensor_list])) 85 | for ind, tensor in enumerate(tensor_list): 86 | pad_shape = [(0, 0)] 87 | for dim in range(1, ndim): 88 | pad_shape.append((0, dimensions[dim] - tensor.shape[dim])) 89 | tensor_list[ind] = np.lib.pad(tensor, pad_shape, 'constant', constant_values=pad) 90 | all_tensor = np.vstack(tensor_list) 91 | return all_tensor 92 | -------------------------------------------------------------------------------- /lib/utils/load_data.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | from dataset import * 10 | 11 | 12 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 13 | flip=False): 14 | """ load ground truth roidb """ 15 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path) 16 | roidb = imdb.gt_roidb() 17 | if flip: 18 | roidb = imdb.append_flipped_images(roidb) 19 | return roidb 20 | 21 | 22 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 23 | proposal='rpn', append_gt=True, flip=False): 24 | """ load proposal roidb (append_gt when training) """ 25 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path) 26 | 27 | gt_roidb = imdb.gt_roidb() 28 | roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt) 29 | if flip: 30 | roidb = imdb.append_flipped_images(roidb) 31 | return roidb 32 | 33 | 34 | def merge_roidb(roidbs): 35 | """ roidb are list, concat them together """ 36 | roidb = roidbs[0] 37 | for r in roidbs[1:]: 38 | roidb.extend(r) 39 | return roidb 40 | 41 | 42 | def filter_roidb(roidb, config): 43 | """ remove roidb entries without usable rois """ 44 | 45 | def is_valid(entry): 46 | """ valid images have at least 1 fg or bg roi """ 47 | overlaps = entry['max_overlaps'] 48 | fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] 49 | bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] 50 | valid = len(fg_inds) > 0 or len(bg_inds) > 0 51 | return valid 52 | 53 | num = len(roidb) 54 | filtered_roidb = [entry for entry in roidb if is_valid(entry)] 55 | num_after = len(filtered_roidb) 56 | print 'filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after) 57 | 58 | return filtered_roidb 59 | 60 | 61 | def load_gt_segdb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 62 | flip=False): 63 | """ load ground truth segdb """ 64 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path) 65 | segdb = imdb.gt_segdb() 66 | if flip: 67 | segdb = imdb.append_flipped_images_for_segmentation(segdb) 68 | return segdb 69 | 70 | 71 | def merge_segdb(segdbs): 72 | """ segdb are list, concat them together """ 73 | segdb = segdbs[0] 74 | for r in segdbs[1:]: 75 | segdb.extend(r) 76 | return segdb 77 | -------------------------------------------------------------------------------- /lib/utils/load_model.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import mxnet as mx 9 | 10 | 11 | def load_checkpoint(prefix, epoch): 12 | """ 13 | Load model checkpoint from file. 14 | :param prefix: Prefix of model name. 15 | :param epoch: Epoch number of model we would like to load. 16 | :return: (arg_params, aux_params) 17 | arg_params : dict of str to NDArray 18 | Model parameter, dict of name to NDArray of net's weights. 19 | aux_params : dict of str to NDArray 20 | Model parameter, dict of name to NDArray of net's auxiliary states. 21 | """ 22 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 23 | arg_params = {} 24 | aux_params = {} 25 | for k, v in save_dict.items(): 26 | tp, name = k.split(':', 1) 27 | if tp == 'arg': 28 | arg_params[name] = v 29 | if tp == 'aux': 30 | aux_params[name] = v 31 | return arg_params, aux_params 32 | 33 | 34 | def convert_context(params, ctx): 35 | """ 36 | :param params: dict of str to NDArray 37 | :param ctx: the context to convert to 38 | :return: dict of str of NDArray with context ctx 39 | """ 40 | new_params = dict() 41 | for k, v in params.items(): 42 | new_params[k] = v.as_in_context(ctx) 43 | return new_params 44 | 45 | 46 | def load_param(prefix, epoch, convert=False, ctx=None, process=False): 47 | """ 48 | wrapper for load checkpoint 49 | :param prefix: Prefix of model name. 50 | :param epoch: Epoch number of model we would like to load. 51 | :param convert: reference model should be converted to GPU NDArray first 52 | :param ctx: if convert then ctx must be designated. 53 | :param process: model should drop any test 54 | :return: (arg_params, aux_params) 55 | """ 56 | arg_params, aux_params = load_checkpoint(prefix, epoch) 57 | if convert: 58 | if ctx is None: 59 | ctx = mx.cpu() 60 | arg_params = convert_context(arg_params, ctx) 61 | aux_params = convert_context(aux_params, ctx) 62 | if process: 63 | tests = [k for k in arg_params.keys() if '_test' in k] 64 | for test in tests: 65 | arg_params[test.replace('_test', '')] = arg_params.pop(test) 66 | return arg_params, aux_params 67 | -------------------------------------------------------------------------------- /lib/utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | 9 | import logging 10 | from mxnet.lr_scheduler import LRScheduler 11 | 12 | class WarmupMultiFactorScheduler(LRScheduler): 13 | """Reduce learning rate in factor at steps specified in a list 14 | 15 | Assume the weight has been updated by n times, then the learning rate will 16 | be 17 | 18 | base_lr * factor^(sum((step/n)<=1)) # step is an array 19 | 20 | Parameters 21 | ---------- 22 | step: list of int 23 | schedule learning rate after n updates 24 | factor: float 25 | the factor for reducing the learning rate 26 | """ 27 | def __init__(self, step, factor=1, warmup=False, warmup_lr=0, warmup_step=0): 28 | super(WarmupMultiFactorScheduler, self).__init__() 29 | assert isinstance(step, list) and len(step) >= 1 30 | for i, _step in enumerate(step): 31 | if i != 0 and step[i] <= step[i-1]: 32 | raise ValueError("Schedule step must be an increasing integer list") 33 | if _step < 1: 34 | raise ValueError("Schedule step must be greater or equal than 1 round") 35 | if factor > 1.0: 36 | raise ValueError("Factor must be no more than 1 to make lr reduce") 37 | self.step = step 38 | self.cur_step_ind = 0 39 | self.factor = factor 40 | self.count = 0 41 | self.warmup = warmup 42 | self.warmup_lr = warmup_lr 43 | self.warmup_step = warmup_step 44 | 45 | def __call__(self, num_update): 46 | """ 47 | Call to schedule current learning rate 48 | 49 | Parameters 50 | ---------- 51 | num_update: int 52 | the maximal number of updates applied to a weight. 53 | """ 54 | 55 | # NOTE: use while rather than if (for continuing training via load_epoch) 56 | if self.warmup and num_update < self.warmup_step: 57 | return self.warmup_lr 58 | while self.cur_step_ind <= len(self.step)-1: 59 | if num_update > self.step[self.cur_step_ind]: 60 | self.count = self.step[self.cur_step_ind] 61 | self.cur_step_ind += 1 62 | self.base_lr *= self.factor 63 | logging.info("Update[%d]: Change learning rate to %0.5e", 64 | num_update, self.base_lr) 65 | else: 66 | return self.base_lr 67 | return self.base_lr 68 | -------------------------------------------------------------------------------- /lib/utils/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | """ 9 | roidb 10 | basic format [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] 11 | extended ['image', 'max_classes', 'max_overlaps', 'bbox_targets'] 12 | """ 13 | 14 | import cv2 15 | import numpy as np 16 | 17 | from bbox.bbox_regression import compute_bbox_regression_targets 18 | 19 | 20 | def prepare_roidb(imdb, roidb, cfg): 21 | """ 22 | add image path, max_classes, max_overlaps to roidb 23 | :param imdb: image database, provide path 24 | :param roidb: roidb 25 | :return: None 26 | """ 27 | print 'prepare roidb' 28 | for i in range(len(roidb)): # image_index 29 | roidb[i]['image'] = imdb.image_path_from_index(imdb.image_set_index[i]) 30 | if cfg.TRAIN.ASPECT_GROUPING: 31 | size = cv2.imread(roidb[i]['image']).shape 32 | roidb[i]['height'] = size[0] 33 | roidb[i]['width'] = size[1] 34 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 35 | max_overlaps = gt_overlaps.max(axis=1) 36 | max_classes = gt_overlaps.argmax(axis=1) 37 | roidb[i]['max_overlaps'] = max_overlaps 38 | roidb[i]['max_classes'] = max_classes 39 | 40 | # background roi => background class 41 | zero_indexes = np.where(max_overlaps == 0)[0] 42 | assert all(max_classes[zero_indexes] == 0) 43 | # foreground roi => foreground class 44 | nonzero_indexes = np.where(max_overlaps > 0)[0] 45 | assert all(max_classes[nonzero_indexes] != 0) 46 | -------------------------------------------------------------------------------- /lib/utils/save_model.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import mxnet as mx 9 | 10 | 11 | def save_checkpoint(prefix, epoch, arg_params, aux_params): 12 | """Checkpoint the model data into file. 13 | :param prefix: Prefix of model name. 14 | :param epoch: The epoch number of the model. 15 | :param arg_params: dict of str to NDArray 16 | Model parameter, dict of name to NDArray of net's weights. 17 | :param aux_params: dict of str to NDArray 18 | Model parameter, dict of name to NDArray of net's auxiliary states. 19 | :return: None 20 | prefix-epoch.params will be saved for parameters. 21 | """ 22 | save_dict = {('arg:%s' % k) : v for k, v in arg_params.items()} 23 | save_dict.update({('aux:%s' % k) : v for k, v in aux_params.items()}) 24 | param_name = '%s-%04d.params' % (prefix, epoch) 25 | mx.nd.save(param_name, save_dict) 26 | -------------------------------------------------------------------------------- /lib/utils/show_boxes.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yi Li, Haocheng Zhang, Xizhou Zhu 6 | # -------------------------------------------------------- 7 | 8 | import matplotlib.pyplot as plt 9 | import cv2 10 | import random 11 | 12 | def show_boxes(im, dets, classes, scale = 1.0): 13 | plt.cla() 14 | plt.axis("off") 15 | plt.imshow(im) 16 | for cls_idx, cls_name in enumerate(classes): 17 | cls_dets = dets[cls_idx] 18 | for det in cls_dets: 19 | bbox = det[:4] * scale 20 | color = (random.random(), random.random(), random.random()) 21 | rect = plt.Rectangle((bbox[0], bbox[1]), 22 | bbox[2] - bbox[0], 23 | bbox[3] - bbox[1], fill=False, 24 | edgecolor=color, linewidth=2.5) 25 | plt.gca().add_patch(rect) 26 | 27 | if cls_dets.shape[1] == 5: 28 | score = det[-1] 29 | plt.gca().text(bbox[0], bbox[1], 30 | '{:s} {:.3f}'.format(cls_name, score), 31 | bbox=dict(facecolor=color, alpha=0.5), fontsize=9, color='white') 32 | plt.show() 33 | return im 34 | 35 | 36 | def draw_boxes(im, dets, classes, scale = 1.0): 37 | color_white = (255, 255, 255) 38 | for cls_idx, cls_name in enumerate(classes): 39 | cls_dets = dets[cls_idx] 40 | for det in cls_dets: 41 | bbox = det[:4] * scale 42 | bbox = map(int, bbox) 43 | color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256)) 44 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=color, thickness=3) 45 | 46 | if cls_dets.shape[1] == 5: 47 | score = det[-1] 48 | cv2.putText(im, '%s %.3f' % (cls_name, score), (bbox[0], bbox[1]+10), 49 | color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=1, thickness=2) 50 | return im 51 | -------------------------------------------------------------------------------- /lib/utils/symbol.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | class Symbol: 10 | def __init__(self): 11 | self.arg_shape_dict = None 12 | self.out_shape_dict = None 13 | self.aux_shape_dict = None 14 | self.sym = None 15 | 16 | @property 17 | def symbol(self): 18 | return self.sym 19 | 20 | def get_symbol(self, cfg, is_train=True): 21 | """ 22 | return a generated symbol, it also need to be assigned to self.sym 23 | """ 24 | raise NotImplementedError() 25 | 26 | def init_weights(self, cfg, arg_params, aux_params): 27 | raise NotImplementedError() 28 | 29 | def get_msra_std(self, shape): 30 | fan_in = float(shape[1]) 31 | if len(shape) > 2: 32 | fan_in *= np.prod(shape[2:]) 33 | print(np.sqrt(2 / fan_in)) 34 | return np.sqrt(2 / fan_in) 35 | 36 | def infer_shape(self, data_shape_dict): 37 | # infer shape 38 | arg_shape, out_shape, aux_shape = self.sym.infer_shape(**data_shape_dict) 39 | self.arg_shape_dict = dict(zip(self.sym.list_arguments(), arg_shape)) 40 | self.out_shape_dict = dict(zip(self.sym.list_outputs(), out_shape)) 41 | self.aux_shape_dict = dict(zip(self.sym.list_auxiliary_states(), aux_shape)) 42 | 43 | def check_parameter_shapes(self, arg_params, aux_params, data_shape_dict, is_train=True): 44 | for k in self.sym.list_arguments(): 45 | if k in data_shape_dict or (False if is_train else 'label' in k): 46 | continue 47 | assert k in arg_params, k + ' not initialized' 48 | assert arg_params[k].shape == self.arg_shape_dict[k], \ 49 | 'shape inconsistent for ' + k + ' inferred ' + str(self.arg_shape_dict[k]) + ' provided ' + str( 50 | arg_params[k].shape) 51 | for k in self.sym.list_auxiliary_states(): 52 | assert k in aux_params, k + ' not initialized' 53 | assert aux_params[k].shape == self.aux_shape_dict[k], \ 54 | 'shape inconsistent for ' + k + ' inferred ' + str(self.aux_shape_dict[k]) + ' provided ' + str( 55 | aux_params[k].shape) 56 | -------------------------------------------------------------------------------- /lib/utils/tictoc.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | def tic(): 11 | import time 12 | global startTime_for_tictoc 13 | startTime_for_tictoc = time.time() 14 | return startTime_for_tictoc 15 | 16 | def toc(): 17 | if 'startTime_for_tictoc' in globals(): 18 | endTime = time.time() 19 | return endTime - startTime_for_tictoc 20 | else: 21 | return None 22 | -------------------------------------------------------------------------------- /rfcn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/rfcn/__init__.py -------------------------------------------------------------------------------- /rfcn/_init_paths.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Xizhou Zhu 6 | # -------------------------------------------------------- 7 | 8 | import os.path as osp 9 | import sys 10 | 11 | def add_path(path): 12 | if path not in sys.path: 13 | sys.path.insert(0, path) 14 | 15 | this_dir = osp.dirname(__file__) 16 | 17 | lib_path = osp.join(this_dir, '..', 'lib') 18 | add_path(lib_path) 19 | -------------------------------------------------------------------------------- /rfcn/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/rfcn/config/__init__.py -------------------------------------------------------------------------------- /rfcn/config/config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Xizhou Zhu, Yuwen Xiong, Bin Xiao 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import yaml 15 | import numpy as np 16 | from easydict import EasyDict as edict 17 | 18 | config = edict() 19 | 20 | config.MXNET_VERSION = '' 21 | config.output_path = '' 22 | config.symbol = '' 23 | config.gpus = '' 24 | config.CLASS_AGNOSTIC = True 25 | config.SCALES = [(600, 1000)] # first is scale (the shorter side); second is max size 26 | 27 | # default training 28 | config.default = edict() 29 | config.default.frequent = 20 30 | config.default.kvstore = 'device' 31 | 32 | # network related params 33 | config.network = edict() 34 | config.network.pretrained = '' 35 | config.network.pretrained_epoch = 0 36 | config.network.PIXEL_MEANS = np.array([0, 0, 0]) 37 | config.network.IMAGE_STRIDE = 0 38 | config.network.RPN_FEAT_STRIDE = 16 39 | config.network.RCNN_FEAT_STRIDE = 16 40 | config.network.FIXED_PARAMS = ['gamma', 'beta'] 41 | config.network.ANCHOR_SCALES = (8, 16, 32) 42 | config.network.ANCHOR_RATIOS = (0.5, 1, 2) 43 | config.network.NORMALIZE_RPN = True 44 | config.network.ANCHOR_MEANS = (0.0, 0.0, 0.0, 0.0) 45 | config.network.ANCHOR_STDS = (0.1, 0.1, 0.4, 0.4) 46 | config.network.NUM_ANCHORS = len(config.network.ANCHOR_SCALES) * len(config.network.ANCHOR_RATIOS) 47 | 48 | # dataset related params 49 | config.dataset = edict() 50 | config.dataset.dataset = 'ImageNetVID' 51 | config.dataset.image_set = 'DET_train_30classes+VID_train_15frames' 52 | config.dataset.test_image_set = 'VID_val_frames' 53 | config.dataset.root_path = './data' 54 | config.dataset.dataset_path = './data/ILSVRC2015' 55 | config.dataset.NUM_CLASSES = 31 56 | 57 | 58 | config.TRAIN = edict() 59 | 60 | config.TRAIN.lr = 0 61 | config.TRAIN.lr_step = '' 62 | config.TRAIN.lr_factor = 0.1 63 | config.TRAIN.warmup = False 64 | config.TRAIN.warmup_lr = 0 65 | config.TRAIN.warmup_step = 0 66 | config.TRAIN.momentum = 0.9 67 | config.TRAIN.wd = 0.0005 68 | config.TRAIN.begin_epoch = 0 69 | config.TRAIN.end_epoch = 0 70 | config.TRAIN.model_prefix = '' 71 | 72 | # whether resume training 73 | config.TRAIN.RESUME = False 74 | # whether flip image 75 | config.TRAIN.FLIP = True 76 | # whether shuffle image 77 | config.TRAIN.SHUFFLE = True 78 | # whether use OHEM 79 | config.TRAIN.ENABLE_OHEM = False 80 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 81 | config.TRAIN.BATCH_IMAGES = 2 82 | # e2e changes behavior of anchor loader and metric 83 | config.TRAIN.END2END = False 84 | # group images with similar aspect ratio 85 | config.TRAIN.ASPECT_GROUPING = True 86 | 87 | # R-CNN 88 | # rcnn rois batch size 89 | config.TRAIN.BATCH_ROIS = 128 90 | config.TRAIN.BATCH_ROIS_OHEM = 128 91 | # rcnn rois sampling params 92 | config.TRAIN.FG_FRACTION = 0.25 93 | config.TRAIN.FG_THRESH = 0.5 94 | config.TRAIN.BG_THRESH_HI = 0.5 95 | config.TRAIN.BG_THRESH_LO = 0.0 96 | # rcnn bounding box regression params 97 | config.TRAIN.BBOX_REGRESSION_THRESH = 0.5 98 | config.TRAIN.BBOX_WEIGHTS = np.array([1.0, 1.0, 1.0, 1.0]) 99 | 100 | # RPN anchor loader 101 | # rpn anchors batch size 102 | config.TRAIN.RPN_BATCH_SIZE = 256 103 | # rpn anchors sampling params 104 | config.TRAIN.RPN_FG_FRACTION = 0.5 105 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 106 | config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 107 | config.TRAIN.RPN_CLOBBER_POSITIVES = False 108 | # rpn bounding box regression params 109 | config.TRAIN.RPN_BBOX_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 110 | config.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 111 | 112 | # used for end2end training 113 | # RPN proposal 114 | config.TRAIN.CXX_PROPOSAL = True 115 | config.TRAIN.RPN_NMS_THRESH = 0.7 116 | config.TRAIN.RPN_PRE_NMS_TOP_N = 12000 117 | config.TRAIN.RPN_POST_NMS_TOP_N = 2000 118 | config.TRAIN.RPN_MIN_SIZE = config.network.RPN_FEAT_STRIDE 119 | # approximate bounding box regression 120 | config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True 121 | config.TRAIN.BBOX_MEANS = (0.0, 0.0, 0.0, 0.0) 122 | config.TRAIN.BBOX_STDS = (0.1, 0.1, 0.2, 0.2) 123 | 124 | config.TEST = edict() 125 | 126 | # R-CNN testing 127 | # use rpn to generate proposal 128 | config.TEST.HAS_RPN = False 129 | # size of images for each device 130 | config.TEST.BATCH_IMAGES = 1 131 | 132 | # RPN proposal 133 | config.TEST.CXX_PROPOSAL = True 134 | config.TEST.RPN_NMS_THRESH = 0.7 135 | config.TEST.RPN_PRE_NMS_TOP_N = 6000 136 | config.TEST.RPN_POST_NMS_TOP_N = 300 137 | config.TEST.RPN_MIN_SIZE = config.network.RPN_FEAT_STRIDE 138 | 139 | # RCNN nms 140 | config.TEST.NMS = 0.3 141 | 142 | config.TEST.max_per_image = 300 143 | 144 | # Test Model Epoch 145 | config.TEST.test_epoch = 0 146 | 147 | 148 | def update_config(config_file): 149 | exp_config = None 150 | with open(config_file) as f: 151 | exp_config = edict(yaml.load(f)) 152 | for k, v in exp_config.items(): 153 | if k in config: 154 | if isinstance(v, dict): 155 | if k == 'TRAIN': 156 | if 'BBOX_WEIGHTS' in v: 157 | v['BBOX_WEIGHTS'] = np.array(v['BBOX_WEIGHTS']) 158 | elif k == 'network': 159 | if 'PIXEL_MEANS' in v: 160 | v['PIXEL_MEANS'] = np.array(v['PIXEL_MEANS']) 161 | for vk, vv in v.items(): 162 | config[k][vk] = vv 163 | else: 164 | if k == 'SCALES': 165 | config[k][0] = (tuple(v)) 166 | else: 167 | config[k] = v 168 | else: 169 | raise ValueError("key must exist in config.py") 170 | -------------------------------------------------------------------------------- /rfcn/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/rfcn/core/__init__.py -------------------------------------------------------------------------------- /rfcn/core/callback.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import time 15 | import logging 16 | import mxnet as mx 17 | 18 | 19 | class Speedometer(object): 20 | def __init__(self, batch_size, frequent=50): 21 | self.batch_size = batch_size 22 | self.frequent = frequent 23 | self.init = False 24 | self.tic = 0 25 | self.last_count = 0 26 | 27 | def __call__(self, param): 28 | """Callback to Show speed.""" 29 | count = param.nbatch 30 | if self.last_count > count: 31 | self.init = False 32 | self.last_count = count 33 | 34 | if self.init: 35 | if count % self.frequent == 0: 36 | speed = self.frequent * self.batch_size / (time.time() - self.tic) 37 | s = '' 38 | if param.eval_metric is not None: 39 | name, value = param.eval_metric.get() 40 | s = "Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-" % (param.epoch, count, speed) 41 | for n, v in zip(name, value): 42 | s += "%s=%f,\t" % (n, v) 43 | else: 44 | s = "Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec" % (param.epoch, count, speed) 45 | 46 | logging.info(s) 47 | print(s) 48 | self.tic = time.time() 49 | else: 50 | self.init = True 51 | self.tic = time.time() 52 | 53 | 54 | def do_checkpoint(prefix, means, stds): 55 | def _callback(iter_no, sym, arg, aux): 56 | weight = arg['rfcn_bbox_weight'] 57 | bias = arg['rfcn_bbox_bias'] 58 | repeat = bias.shape[0] / means.shape[0] 59 | 60 | arg['rfcn_bbox_weight_test'] = weight * mx.nd.repeat(mx.nd.array(stds), repeats=repeat).reshape((bias.shape[0], 1, 1, 1)) 61 | arg['rfcn_bbox_bias_test'] = arg['rfcn_bbox_bias'] * mx.nd.repeat(mx.nd.array(stds), repeats=repeat) + mx.nd.repeat(mx.nd.array(means), repeats=repeat) 62 | mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux) 63 | arg.pop('rfcn_bbox_weight_test') 64 | arg.pop('rfcn_bbox_bias_test') 65 | return _callback -------------------------------------------------------------------------------- /rfcn/demo.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Xizhou Zhu, Yi Li, Haochen Zhang 6 | # -------------------------------------------------------- 7 | 8 | import _init_paths 9 | 10 | import argparse 11 | import os 12 | import glob 13 | import sys 14 | import logging 15 | import pprint 16 | import cv2 17 | from config.config import config, update_config 18 | from utils.image import resize, transform 19 | import numpy as np 20 | # get config 21 | os.environ['PYTHONUNBUFFERED'] = '1' 22 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 23 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 24 | cur_path = os.path.abspath(os.path.dirname(__file__)) 25 | update_config(cur_path + '/../experiments/rfcn/cfgs/rfcn_vid_demo.yaml') 26 | 27 | sys.path.insert(0, os.path.join(cur_path, '../external/mxnet', config.MXNET_VERSION)) 28 | import mxnet as mx 29 | from core.tester import im_detect, Predictor 30 | from symbols import * 31 | from utils.load_model import load_param 32 | from utils.show_boxes import show_boxes, draw_boxes 33 | from utils.tictoc import tic, toc 34 | from nms.nms import py_nms_wrapper, cpu_nms_wrapper, gpu_nms_wrapper 35 | 36 | def parse_args(): 37 | parser = argparse.ArgumentParser(description='Show Deep Feature Flow demo') 38 | args = parser.parse_args() 39 | return args 40 | 41 | args = parse_args() 42 | 43 | def main(): 44 | # get symbol 45 | pprint.pprint(config) 46 | config.symbol = 'resnet_v1_101_rfcn' 47 | model = '/../model/rfcn_vid' 48 | sym_instance = eval(config.symbol + '.' + config.symbol)() 49 | sym = sym_instance.get_test_symbol(config) 50 | 51 | # set up class names 52 | num_classes = 31 53 | classes = ['airplane', 'antelope', 'bear', 'bicycle', 54 | 'bird', 'bus', 'car', 'cattle', 55 | 'dog', 'domestic_cat', 'elephant', 'fox', 56 | 'giant_panda', 'hamster', 'horse', 'lion', 57 | 'lizard', 'monkey', 'motorcycle', 'rabbit', 58 | 'red_panda', 'sheep', 'snake', 'squirrel', 59 | 'tiger', 'train', 'turtle', 'watercraft', 60 | 'whale', 'zebra'] 61 | 62 | # load demo data 63 | image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') 64 | output_dir = cur_path + '/../demo/rfcn/' 65 | if not os.path.exists(output_dir): 66 | os.makedirs(output_dir) 67 | 68 | # 69 | 70 | data = [] 71 | for im_name in image_names: 72 | assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) 73 | im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) 74 | target_size = config.SCALES[0][0] 75 | max_size = config.SCALES[0][1] 76 | im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) 77 | im_tensor = transform(im, config.network.PIXEL_MEANS) 78 | im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) 79 | data.append({'data': im_tensor, 'im_info': im_info}) 80 | 81 | 82 | # get predictor 83 | data_names = ['data', 'im_info'] 84 | label_names = [] 85 | data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] 86 | max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] 87 | provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] 88 | provide_label = [None for i in xrange(len(data))] 89 | arg_params, aux_params = load_param(cur_path + model, 0, process=True) 90 | predictor = Predictor(sym, data_names, label_names, 91 | context=[mx.gpu(0)], max_data_shapes=max_data_shape, 92 | provide_data=provide_data, provide_label=provide_label, 93 | arg_params=arg_params, aux_params=aux_params) 94 | nms = gpu_nms_wrapper(config.TEST.NMS, 0) 95 | 96 | # warm up 97 | for j in xrange(2): 98 | data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, 99 | provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], 100 | provide_label=[None]) 101 | scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] 102 | scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) 103 | 104 | # test 105 | time = 0 106 | count = 0 107 | for idx, im_name in enumerate(image_names): 108 | data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, 109 | provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], 110 | provide_label=[None]) 111 | scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] 112 | 113 | tic() 114 | scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) 115 | time += toc() 116 | count += 1 117 | print 'testing {} {:.4f}s'.format(im_name, time/count) 118 | 119 | boxes = boxes[0].astype('f') 120 | scores = scores[0].astype('f') 121 | dets_nms = [] 122 | for j in range(1, scores.shape[1]): 123 | cls_scores = scores[:, j, np.newaxis] 124 | cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] 125 | cls_dets = np.hstack((cls_boxes, cls_scores)) 126 | keep = nms(cls_dets) 127 | cls_dets = cls_dets[keep, :] 128 | cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] 129 | dets_nms.append(cls_dets) 130 | 131 | # visualize 132 | im = cv2.imread(im_name) 133 | im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) 134 | # show_boxes(im, dets_nms, classes, 1) 135 | out_im = draw_boxes(im, dets_nms, classes, 1) 136 | _, filename = os.path.split(im_name) 137 | cv2.imwrite(output_dir + filename,out_im) 138 | 139 | print 'done' 140 | 141 | if __name__ == '__main__': 142 | main() 143 | -------------------------------------------------------------------------------- /rfcn/function/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/rfcn/function/__init__.py -------------------------------------------------------------------------------- /rfcn/function/test_rcnn.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import argparse 15 | import pprint 16 | import logging 17 | import time 18 | import os 19 | import mxnet as mx 20 | 21 | from symbols import * 22 | from dataset import * 23 | from core.loader import TestLoader 24 | from core.tester import Predictor, pred_eval 25 | from utils.load_model import load_param 26 | 27 | 28 | def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, 29 | ctx, prefix, epoch, 30 | vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): 31 | if not logger: 32 | assert False, 'require a logger' 33 | 34 | # print cfg 35 | pprint.pprint(cfg) 36 | logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) 37 | 38 | # load symbol and testing data 39 | sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() 40 | sym = sym_instance.get_test_symbol(cfg) 41 | imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) 42 | roidb = imdb.gt_roidb() 43 | 44 | # get test data iter 45 | test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn) 46 | 47 | # load model 48 | arg_params, aux_params = load_param(prefix, epoch, process=True) 49 | 50 | # infer shape 51 | data_shape_dict = dict(test_data.provide_data_single) 52 | sym_instance.infer_shape(data_shape_dict) 53 | 54 | sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) 55 | 56 | # decide maximum shape 57 | data_names = [k[0] for k in test_data.provide_data_single] 58 | label_names = None 59 | max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] 60 | if not has_rpn: 61 | max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) 62 | 63 | # create predictor 64 | predictor = Predictor(sym, data_names, label_names, 65 | context=ctx, max_data_shapes=max_data_shape, 66 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 67 | arg_params=arg_params, aux_params=aux_params) 68 | 69 | # start detection 70 | pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger) 71 | 72 | -------------------------------------------------------------------------------- /rfcn/function/test_rpn.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import argparse 15 | import pprint 16 | import logging 17 | import mxnet as mx 18 | 19 | from symbols import * 20 | from dataset import * 21 | from core.loader import TestLoader 22 | from core.tester import Predictor, generate_proposals 23 | from utils.load_model import load_param 24 | 25 | 26 | def test_rpn(cfg, dataset, image_set, root_path, dataset_path, 27 | ctx, prefix, epoch, 28 | vis, shuffle, thresh, logger=None, output_path=None): 29 | # set up logger 30 | if not logger: 31 | logging.basicConfig() 32 | logger = logging.getLogger() 33 | logger.setLevel(logging.INFO) 34 | 35 | # rpn generate proposal cfg 36 | cfg.TEST.HAS_RPN = True 37 | 38 | # print cfg 39 | pprint.pprint(cfg) 40 | logger.info('testing rpn cfg:{}\n'.format(pprint.pformat(cfg))) 41 | 42 | # load symbol 43 | sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() 44 | sym = sym_instance.get_symbol_rpn(cfg, is_train=False) 45 | 46 | # load dataset and prepare imdb for training 47 | imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) 48 | roidb = imdb.gt_roidb() 49 | test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=True) 50 | 51 | # load model 52 | arg_params, aux_params = load_param(prefix, epoch) 53 | 54 | # infer shape 55 | data_shape_dict = dict(test_data.provide_data_single) 56 | sym_instance.infer_shape(data_shape_dict) 57 | 58 | # check parameters 59 | sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) 60 | 61 | # decide maximum shape 62 | data_names = [k[0] for k in test_data.provide_data[0]] 63 | label_names = None if test_data.provide_label[0] is None else [k[0] for k in test_data.provide_label[0]] 64 | max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] 65 | 66 | # create predictor 67 | predictor = Predictor(sym, data_names, label_names, 68 | context=ctx, max_data_shapes=max_data_shape, 69 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 70 | arg_params=arg_params, aux_params=aux_params) 71 | 72 | # start testing 73 | imdb_boxes = generate_proposals(predictor, test_data, imdb, cfg, vis=vis, thresh=thresh) 74 | 75 | all_log_info = imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes) 76 | logger.info(all_log_info) 77 | -------------------------------------------------------------------------------- /rfcn/operator_cxx/multi_proposal.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2017 Microsoft 3 | * Licensed under The MIT License [see LICENSE for details] 4 | * \file multi_proposal.cc 5 | * \brief 6 | * \author Xizhou Zhu 7 | */ 8 | 9 | #include "./multi_proposal-inl.h" 10 | 11 | 12 | namespace mxnet { 13 | namespace op { 14 | 15 | template 16 | class MultiProposalOp : public Operator{ 17 | public: 18 | explicit MultiProposalOp(MultiProposalParam param) { 19 | this->param_ = param; 20 | } 21 | 22 | virtual void Forward(const OpContext &ctx, 23 | const std::vector &in_data, 24 | const std::vector &req, 25 | const std::vector &out_data, 26 | const std::vector &aux_states) { 27 | LOG(FATAL) << "not implemented"; 28 | } 29 | 30 | virtual void Backward(const OpContext &ctx, 31 | const std::vector &out_grad, 32 | const std::vector &in_data, 33 | const std::vector &out_data, 34 | const std::vector &req, 35 | const std::vector &in_grad, 36 | const std::vector &aux_states) { 37 | LOG(FATAL) << "not implemented"; 38 | } 39 | 40 | private: 41 | MultiProposalParam param_; 42 | }; // class MultiProposalOp 43 | 44 | template<> 45 | Operator *CreateOp(MultiProposalParam param) { 46 | return new MultiProposalOp(param); 47 | } 48 | 49 | Operator* MultiProposalProp::CreateOperator(Context ctx) const { 50 | DO_BIND_DISPATCH(CreateOp, param_); 51 | } 52 | 53 | DMLC_REGISTER_PARAMETER(MultiProposalParam); 54 | 55 | MXNET_REGISTER_OP_PROPERTY(_contrib_MultiProposal, MultiProposalProp) 56 | .describe("Generate region proposals via RPN") 57 | .add_argument("cls_score", "NDArray-or-Symbol", "Score of how likely proposal is object.") 58 | .add_argument("bbox_pred", "NDArray-or-Symbol", "BBox Predicted deltas from anchors for proposals") 59 | .add_argument("im_info", "NDArray-or-Symbol", "Image size and scale.") 60 | .add_arguments(MultiProposalParam::__FIELDS__()); 61 | 62 | } // namespace op 63 | } // namespace mxnet 64 | -------------------------------------------------------------------------------- /rfcn/operator_cxx/psroi_pooling.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2017 by Contributors 3 | * Copyright (c) 2017 Microsoft 4 | * Licensed under The MIT License [see LICENSE for details] 5 | * \file psroi_pooling.cc 6 | * \brief psroi pooling operator 7 | * \author Yi Li, Tairui Chen, Guodong Zhang, Jifeng Dai 8 | */ 9 | #include "./psroi_pooling-inl.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using std::max; 17 | using std::min; 18 | using std::floor; 19 | using std::ceil; 20 | 21 | namespace mshadow { 22 | template 23 | inline void PSROIPoolForward(const Tensor &out, 24 | const Tensor &data, 25 | const Tensor &bbox, 26 | const Tensor &mapping_channel, 27 | const float spatial_scale_, 28 | const int output_dim_, 29 | const int group_size_) { 30 | // NOT_IMPLEMENTED; 31 | return; 32 | } 33 | 34 | template 35 | inline void PSROIPoolBackwardAcc(const Tensor &in_grad, 36 | const Tensor &out_grad, 37 | const Tensor &bbox, 38 | const Tensor &mapping_channel, 39 | const float spatial_scale_, 40 | const int output_dim_) { 41 | // NOT_IMPLEMENTED; 42 | return; 43 | } 44 | } // namespace mshadow 45 | 46 | namespace mxnet { 47 | namespace op { 48 | 49 | template<> 50 | Operator *CreateOp(PSROIPoolingParam param, int dtype) { 51 | Operator* op = NULL; 52 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 53 | op = new PSROIPoolingOp(param); 54 | }); 55 | return op; 56 | } 57 | 58 | Operator *PSROIPoolingProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 59 | std::vector *in_type) const { 60 | std::vector out_shape, aux_shape; 61 | std::vector out_type, aux_type; 62 | CHECK(InferType(in_type, &out_type, &aux_type)); 63 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 64 | DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); 65 | } 66 | 67 | DMLC_REGISTER_PARAMETER(PSROIPoolingParam); 68 | 69 | MXNET_REGISTER_OP_PROPERTY(_contrib_PSROIPooling, PSROIPoolingProp) 70 | .describe("Performs region-of-interest pooling on inputs. Resize bounding box coordinates by " 71 | "spatial_scale and crop input feature maps accordingly. The cropped feature maps are pooled " 72 | "by max pooling to a fixed size output indicated by pooled_size. batch_size will change to " 73 | "the number of region bounding boxes after PSROIPooling") 74 | .add_argument("data", "Symbol", "Input data to the pooling operator, a 4D Feature maps") 75 | .add_argument("rois", "Symbol", "Bounding box coordinates, a 2D array of " 76 | "[[batch_index, x1, y1, x2, y2]]. (x1, y1) and (x2, y2) are top left and down right corners " 77 | "of designated region of interest. batch_index indicates the index of corresponding image " 78 | "in the input data") 79 | .add_arguments(PSROIPoolingParam::__FIELDS__()); 80 | } // namespace op 81 | } // namespace mxnet -------------------------------------------------------------------------------- /rfcn/operator_py/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msracver/Deep-Feature-Flow/297293cbe728f817b62c82d3abfbd226300086ef/rfcn/operator_py/__init__.py -------------------------------------------------------------------------------- /rfcn/operator_py/box_annotator_ohem.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | """ 9 | Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them. 10 | """ 11 | 12 | import mxnet as mx 13 | import numpy as np 14 | from distutils.util import strtobool 15 | 16 | 17 | 18 | 19 | class BoxAnnotatorOHEMOperator(mx.operator.CustomOp): 20 | def __init__(self, num_classes, num_reg_classes, roi_per_img): 21 | super(BoxAnnotatorOHEMOperator, self).__init__() 22 | self._num_classes = num_classes 23 | self._num_reg_classes = num_reg_classes 24 | self._roi_per_img = roi_per_img 25 | 26 | def forward(self, is_train, req, in_data, out_data, aux): 27 | 28 | cls_score = in_data[0] 29 | bbox_pred = in_data[1] 30 | labels = in_data[2].asnumpy() 31 | bbox_targets = in_data[3] 32 | bbox_weights = in_data[4] 33 | 34 | per_roi_loss_cls = mx.nd.SoftmaxActivation(cls_score) + 1e-14 35 | per_roi_loss_cls = per_roi_loss_cls.asnumpy() 36 | per_roi_loss_cls = per_roi_loss_cls[np.arange(per_roi_loss_cls.shape[0], dtype='int'), labels.astype('int')] 37 | per_roi_loss_cls = -1 * np.log(per_roi_loss_cls) 38 | per_roi_loss_cls = np.reshape(per_roi_loss_cls, newshape=(-1,)) 39 | 40 | per_roi_loss_bbox = bbox_weights * mx.nd.smooth_l1((bbox_pred - bbox_targets), scalar=1.0) 41 | per_roi_loss_bbox = mx.nd.sum(per_roi_loss_bbox, axis=1).asnumpy() 42 | 43 | top_k_per_roi_loss = np.argsort(per_roi_loss_cls + per_roi_loss_bbox) 44 | labels_ohem = labels 45 | labels_ohem[top_k_per_roi_loss[::-1][self._roi_per_img:]] = -1 46 | bbox_weights_ohem = bbox_weights.asnumpy() 47 | bbox_weights_ohem[top_k_per_roi_loss[::-1][self._roi_per_img:]] = 0 48 | 49 | labels_ohem = mx.nd.array(labels_ohem) 50 | bbox_weights_ohem = mx.nd.array(bbox_weights_ohem) 51 | 52 | for ind, val in enumerate([labels_ohem, bbox_weights_ohem]): 53 | self.assign(out_data[ind], req[ind], val) 54 | 55 | 56 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 57 | for i in range(len(in_grad)): 58 | self.assign(in_grad[i], req[i], 0) 59 | 60 | 61 | @mx.operator.register('BoxAnnotatorOHEM') 62 | class BoxAnnotatorOHEMProp(mx.operator.CustomOpProp): 63 | def __init__(self, num_classes, num_reg_classes, roi_per_img): 64 | super(BoxAnnotatorOHEMProp, self).__init__(need_top_grad=False) 65 | self._num_classes = int(num_classes) 66 | self._num_reg_classes = int(num_reg_classes) 67 | self._roi_per_img = int(roi_per_img) 68 | 69 | def list_arguments(self): 70 | return ['cls_score', 'bbox_pred', 'labels', 'bbox_targets', 'bbox_weights'] 71 | 72 | def list_outputs(self): 73 | return ['labels_ohem', 'bbox_weights_ohem'] 74 | 75 | def infer_shape(self, in_shape): 76 | labels_shape = in_shape[2] 77 | bbox_weights_shape = in_shape[4] 78 | 79 | return in_shape, \ 80 | [labels_shape, bbox_weights_shape] 81 | 82 | def create_operator(self, ctx, shapes, dtypes): 83 | return BoxAnnotatorOHEMOperator(self._num_classes, self._num_reg_classes, self._roi_per_img) 84 | 85 | def declare_backward_dependency(self, out_grad, in_data, out_data): 86 | return [] 87 | -------------------------------------------------------------------------------- /rfcn/operator_py/proposal_target.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | """ 15 | Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them. 16 | """ 17 | 18 | import mxnet as mx 19 | import numpy as np 20 | from distutils.util import strtobool 21 | from easydict import EasyDict as edict 22 | import cPickle 23 | 24 | 25 | from core.rcnn import sample_rois 26 | 27 | DEBUG = False 28 | 29 | 30 | class ProposalTargetOperator(mx.operator.CustomOp): 31 | def __init__(self, num_classes, batch_images, batch_rois, cfg, fg_fraction): 32 | super(ProposalTargetOperator, self).__init__() 33 | self._num_classes = num_classes 34 | self._batch_images = batch_images 35 | self._batch_rois = batch_rois 36 | self._cfg = cfg 37 | self._fg_fraction = fg_fraction 38 | 39 | if DEBUG: 40 | self._count = 0 41 | self._fg_num = 0 42 | self._bg_num = 0 43 | 44 | def forward(self, is_train, req, in_data, out_data, aux): 45 | assert self._batch_rois == -1 or self._batch_rois % self._batch_images == 0, \ 46 | 'batchimages {} must devide batch_rois {}'.format(self._batch_images, self._batch_rois) 47 | all_rois = in_data[0].asnumpy() 48 | gt_boxes = in_data[1].asnumpy() 49 | 50 | if self._batch_rois == -1: 51 | rois_per_image = all_rois.shape[0] + gt_boxes.shape[0] 52 | fg_rois_per_image = rois_per_image 53 | else: 54 | rois_per_image = self._batch_rois / self._batch_images 55 | fg_rois_per_image = np.round(self._fg_fraction * rois_per_image).astype(int) 56 | 57 | 58 | # Include ground-truth boxes in the set of candidate rois 59 | zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) 60 | all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1])))) 61 | # Sanity check: single batch only 62 | assert np.all(all_rois[:, 0] == 0), 'Only single item batches are supported' 63 | 64 | rois, labels, bbox_targets, bbox_weights = \ 65 | sample_rois(all_rois, fg_rois_per_image, rois_per_image, self._num_classes, self._cfg, gt_boxes=gt_boxes) 66 | 67 | if DEBUG: 68 | print "labels=", labels 69 | print 'num fg: {}'.format((labels > 0).sum()) 70 | print 'num bg: {}'.format((labels == 0).sum()) 71 | self._count += 1 72 | self._fg_num += (labels > 0).sum() 73 | self._bg_num += (labels == 0).sum() 74 | print "self._count=", self._count 75 | print 'num fg avg: {}'.format(self._fg_num / self._count) 76 | print 'num bg avg: {}'.format(self._bg_num / self._count) 77 | print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num)) 78 | 79 | for ind, val in enumerate([rois, labels, bbox_targets, bbox_weights]): 80 | self.assign(out_data[ind], req[ind], val) 81 | 82 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 83 | self.assign(in_grad[0], req[0], 0) 84 | self.assign(in_grad[1], req[1], 0) 85 | 86 | 87 | @mx.operator.register('proposal_target') 88 | class ProposalTargetProp(mx.operator.CustomOpProp): 89 | def __init__(self, num_classes, batch_images, batch_rois, cfg, fg_fraction='0.25'): 90 | super(ProposalTargetProp, self).__init__(need_top_grad=False) 91 | self._num_classes = int(num_classes) 92 | self._batch_images = int(batch_images) 93 | self._batch_rois = int(batch_rois) 94 | self._cfg = cPickle.loads(cfg) 95 | self._fg_fraction = float(fg_fraction) 96 | 97 | def list_arguments(self): 98 | return ['rois', 'gt_boxes'] 99 | 100 | def list_outputs(self): 101 | return ['rois_output', 'label', 'bbox_target', 'bbox_weight'] 102 | 103 | def infer_shape(self, in_shape): 104 | rpn_rois_shape = in_shape[0] 105 | gt_boxes_shape = in_shape[1] 106 | 107 | rois = rpn_rois_shape[0] + gt_boxes_shape[0] if self._batch_rois == -1 else self._batch_rois 108 | 109 | output_rois_shape = (rois, 5) 110 | label_shape = (rois, ) 111 | bbox_target_shape = (rois, self._num_classes * 4) 112 | bbox_weight_shape = (rois, self._num_classes * 4) 113 | 114 | return [rpn_rois_shape, gt_boxes_shape], \ 115 | [output_rois_shape, label_shape, bbox_target_shape, bbox_weight_shape] 116 | 117 | def create_operator(self, ctx, shapes, dtypes): 118 | return ProposalTargetOperator(self._num_classes, self._batch_images, self._batch_rois, self._cfg, self._fg_fraction) 119 | 120 | def declare_backward_dependency(self, out_grad, in_data, out_data): 121 | return [] 122 | -------------------------------------------------------------------------------- /rfcn/operator_py/rpn_inv_normalize.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Xizhou Zhu 6 | # -------------------------------------------------------- 7 | 8 | import mxnet as mx 9 | import numpy as np 10 | from distutils.util import strtobool 11 | 12 | class RPNInvNormalizeOperator(mx.operator.CustomOp): 13 | def __init__(self, num_anchors, bbox_mean, bbox_std): 14 | super(RPNInvNormalizeOperator, self).__init__() 15 | self._num_anchors = num_anchors 16 | self._bbox_mean = mx.ndarray.Reshape(mx.nd.array(bbox_mean), shape=(1,4,1,1)) 17 | self._bbox_std = mx.ndarray.Reshape(mx.nd.array(bbox_std), shape=(1,4,1,1)) 18 | 19 | def forward(self, is_train, req, in_data, out_data, aux): 20 | bbox_pred = in_data[0] 21 | tile_shape = (bbox_pred.shape[0], self._num_anchors, bbox_pred.shape[2], bbox_pred.shape[3]) 22 | bbox_mean = mx.ndarray.tile(self._bbox_mean.as_in_context(bbox_pred.context), reps=tile_shape) 23 | bbox_std = mx.ndarray.tile(self._bbox_std.as_in_context(bbox_pred.context), reps=tile_shape) 24 | bbox_pred = bbox_pred * bbox_std + bbox_mean 25 | 26 | self.assign(out_data[0], req[0], bbox_pred) 27 | 28 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 29 | self.assign(in_grad[0], req[0], 0) 30 | 31 | @mx.operator.register('rpn_inv_normalize') 32 | class RPNInvNormalizeProp(mx.operator.CustomOpProp): 33 | def __init__(self, num_anchors, bbox_mean='(0.0, 0.0, 0.0, 0.0)', bbox_std='0.1, 0.1, 0.2, 0.2'): 34 | super(RPNInvNormalizeProp, self).__init__(need_top_grad=False) 35 | self._num_anchors = int(num_anchors) 36 | self._bbox_mean = np.fromstring(bbox_mean[1:-1], dtype=float, sep=',') 37 | self._bbox_std = np.fromstring(bbox_std[1:-1], dtype=float, sep=',') 38 | 39 | def list_arguments(self): 40 | return ['bbox_pred'] 41 | 42 | def list_outputs(self): 43 | return ['out_bbox_pred'] 44 | 45 | def infer_shape(self, in_shape): 46 | 47 | return [in_shape[0]], \ 48 | [in_shape[0]] 49 | 50 | def create_operator(self, ctx, shapes, dtypes): 51 | return RPNInvNormalizeOperator(self._num_anchors, self._bbox_mean, self._bbox_std) 52 | 53 | def declare_backward_dependency(self, out_grad, in_data, out_data): 54 | return [] 55 | -------------------------------------------------------------------------------- /rfcn/symbols/__init__.py: -------------------------------------------------------------------------------- 1 | import resnet_v1_101_rfcn 2 | -------------------------------------------------------------------------------- /rfcn/test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deep Feature Flow 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | # Based on: 8 | # MX-RCNN 9 | # Copyright (c) 2016 by Contributors 10 | # Licence under The Apache 2.0 License 11 | # https://github.com/ijkguo/mx-rcnn/ 12 | # -------------------------------------------------------- 13 | 14 | import _init_paths 15 | 16 | import cv2 17 | import argparse 18 | import os 19 | import sys 20 | import time 21 | import logging 22 | from config.config import config, update_config 23 | 24 | def parse_args(): 25 | parser = argparse.ArgumentParser(description='Test a R-FCN network') 26 | # general 27 | parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str) 28 | 29 | args, rest = parser.parse_known_args() 30 | update_config(args.cfg) 31 | 32 | # rcnn 33 | parser.add_argument('--vis', help='turn on visualization', action='store_true') 34 | parser.add_argument('--ignore_cache', help='ignore cached results boxes', action='store_true') 35 | parser.add_argument('--thresh', help='valid detection threshold', default=1e-3, type=float) 36 | parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true') 37 | args = parser.parse_args() 38 | return args 39 | 40 | args = parse_args() 41 | curr_path = os.path.abspath(os.path.dirname(__file__)) 42 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION)) 43 | 44 | import mxnet as mx 45 | from function.test_rcnn import test_rcnn 46 | from utils.create_logger import create_logger 47 | 48 | 49 | def main(): 50 | ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] 51 | print args 52 | 53 | logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set) 54 | 55 | test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path, 56 | ctx, os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix), config.TEST.test_epoch, 57 | args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path) 58 | 59 | if __name__ == '__main__': 60 | main() 61 | --------------------------------------------------------------------------------