├── .gitignore ├── DeeplabCityscapes.tgn ├── DockerConfigs ├── MXNet │ ├── Dockerfile.python.gpu │ └── install │ │ ├── cpp.sh │ │ ├── python.sh │ │ └── source.list └── README.md ├── LICENSE ├── README.md ├── data └── cityscapes ├── deeplab ├── __init__.py ├── _init_paths.py ├── config │ ├── __init__.py │ ├── config.py │ └── dff_config.py ├── core │ ├── DataParallelExecutorGroup.py │ ├── __init__.py │ ├── callback.py │ ├── loader.py │ ├── metric.py │ ├── module.py │ └── tester.py ├── demo.py ├── dff_test.py ├── dff_train.py ├── function │ ├── __init__.py │ ├── reeval.py │ └── test_deeplab.py ├── symbols │ ├── __init__.py │ ├── deeplabv3 │ │ ├── aspp_temp.py │ │ ├── xception_65_deeplab_v3_plus.py │ │ └── xception_temp.py │ ├── print_summary.py │ ├── resnet_v1_101_deeplab.py │ ├── resnet_v1_101_deeplab_dcn.py │ ├── resnet_v1_101_deeplab_dcn_duc.py │ ├── resnet_v1_101_deeplab_video.py │ └── resnet_v1_101_deeplab_video_dcn.py ├── test.py └── train.py ├── experiments ├── deeplab │ ├── cfgs │ │ ├── deeplab_cityscapes_demo.yaml │ │ ├── deeplab_resnet_v1_101_cityscapes_segmentation_base.yaml │ │ ├── deeplab_resnet_v1_101_cityscapes_segmentation_capsule.yaml │ │ ├── deeplab_resnet_v1_101_cityscapes_segmentation_dcn.yaml │ │ ├── deeplab_resnet_v1_101_voc12_segmentation_base.yaml │ │ └── deeplab_resnet_v1_101_voc12_segmentation_dcn.yaml │ ├── deeplab_test.py │ └── deeplab_train_test.py ├── deeplab_dff │ ├── cfgs │ │ ├── deeplab_resnet_v1_101_cityscapes_segmentation_video.yaml │ │ └── deeplab_resnet_v1_101_cityscapes_segmentation_video_duc.yaml │ ├── deeplab_dff_test.py │ └── deeplab_dff_train.py ├── faster_rcnn │ ├── cfgs │ │ ├── resnet_v1_101_coco_trainval_rcnn_dcn_end2end.yaml │ │ ├── resnet_v1_101_coco_trainval_rcnn_end2end.yaml │ │ ├── resnet_v1_101_voc0712_rcnn_dcn_end2end.yaml │ │ └── resnet_v1_101_voc0712_rcnn_end2end.yaml │ ├── rcnn_end2end_train_test.py │ ├── rcnn_test.py │ └── rcnn_train_test.py └── rfcn │ ├── cfgs │ ├── deform_conv_demo.yaml │ ├── deform_psroi_demo.yaml │ ├── resnet_v1_101_coco_trainval_rfcn_dcn_end2end_ohem.yaml │ ├── resnet_v1_101_coco_trainval_rfcn_end2end_ohem.yaml │ ├── resnet_v1_101_voc0712_rfcn_dcn_end2end_ohem.yaml │ ├── resnet_v1_101_voc0712_rfcn_end2end_ohem.yaml │ └── rfcn_coco_demo.yaml │ ├── rfcn_alternate_train_test.py │ ├── rfcn_end2end_train_test.py │ ├── rfcn_test.py │ └── rfcn_train_test.py ├── init.sh ├── lib ├── Makefile ├── __init__.py ├── bbox │ ├── .gitignore │ ├── __init__.py │ ├── bbox.pyx │ ├── bbox_regression.py │ ├── bbox_transform.py │ ├── setup_linux.py │ └── setup_windows.py ├── dataset │ ├── __init__.py │ ├── cityscape.py │ ├── cityscape_video.py │ ├── coco.py │ ├── ds_utils.py │ ├── imdb.py │ ├── pascal_voc.py │ ├── pascal_voc_eval.py │ └── pycocotools │ │ ├── .gitignore │ │ ├── UPSTREAM_REV │ │ ├── __init__.py │ │ ├── _mask.pyx │ │ ├── coco.py │ │ ├── cocoeval.py │ │ ├── mask.py │ │ ├── maskApi.c │ │ ├── maskApi.h │ │ ├── setup_linux.py │ │ └── setup_windows.py ├── logger │ ├── __init__.py │ ├── logger.py │ ├── readme.md │ └── visdomlogger.py ├── mask │ ├── __init__.py │ └── mask_transform.py ├── nms │ ├── __init__.py │ ├── cpu_nms.c │ ├── cpu_nms.pyx │ ├── gpu_nms.cpp │ ├── gpu_nms.cu │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms.py │ ├── nms_kernel.cu │ ├── setup_linux.py │ ├── setup_windows.py │ └── setup_windows_cuda.py ├── rpn │ ├── __init__.py │ ├── generate_anchor.py │ └── rpn.py ├── segmentation │ ├── __init__.py │ └── segmentation.py └── utils │ ├── PrefetchingIter.py │ ├── __init__.py │ ├── combine_model.py │ ├── create_logger.py │ ├── image.py │ ├── image_processing.py │ ├── load_data.py │ ├── load_model.py │ ├── lr_scheduler.py │ ├── mask_coco2voc.py │ ├── mask_voc2coco.py │ ├── network_visualization.py │ ├── roidb.py │ ├── save_model.py │ ├── show_boxes.py │ ├── show_masks.py │ ├── show_offset.py │ ├── symbol.py │ └── tictoc.py ├── model └── pretrained_model │ └── resnet_v1_101-0000.params └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | .vscode/ 3 | model/ 4 | .mxnet_0.12/ 5 | output/ 6 | data/ 7 | 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | Deformable-origin/ 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | # lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # dotenv 89 | .env 90 | 91 | # virtualenv 92 | .venv 93 | venv/ 94 | ENV/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | -------------------------------------------------------------------------------- /DockerConfigs/MXNet/Dockerfile.python.gpu: -------------------------------------------------------------------------------- 1 | # -*- mode: dockerfile -*- 2 | # dockerfile to build libmxnet.so on GPU 3 | # Use cuda 9.0 4 | # FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 5 | FROM nvidia/cuda:latest 6 | MAINTAINER SonayangZhang 7 | 8 | COPY install/cpp.sh install/ 9 | RUN chmod +x install/cpp.sh 10 | RUN install/cpp.sh 11 | 12 | # ENV BUILD_OPTS "USE_CUDA=1 USE_CUDA_PATH=/usr/local/cuda USE_CUDNN=1" 13 | # RUN git clone --recursive https://github.com/dmlc/mxnet && cd mxnet && \ 14 | # make -j$(nproc) $BUILD_OPTS 15 | 16 | # OpenCV 17 | RUN apt-get update && \ 18 | apt-get install -y \ 19 | build-essential \ 20 | cmake \ 21 | git \ 22 | wget \ 23 | unzip \ 24 | yasm \ 25 | pkg-config \ 26 | libswscale-dev \ 27 | libtbb2 \ 28 | libtbb-dev \ 29 | libjpeg-dev \ 30 | libpng-dev \ 31 | libtiff-dev \ 32 | libjasper-dev \ 33 | libavformat-dev \ 34 | libpq-dev 35 | 36 | WORKDIR / 37 | ENV OPENCV_VERSION="3.4.1" 38 | RUN wget https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip \ 39 | && unzip ${OPENCV_VERSION}.zip \ 40 | && mkdir /opencv-${OPENCV_VERSION}/cmake_binary \ 41 | && cd /opencv-${OPENCV_VERSION}/cmake_binary \ 42 | && cmake -DBUILD_TIFF=ON \ 43 | -DBUILD_opencv_java=OFF \ 44 | -DWITH_CUDA=OFF \ 45 | -DENABLE_AVX=ON \ 46 | -DWITH_OPENGL=ON \ 47 | -DWITH_OPENCL=ON \ 48 | -DWITH_IPP=ON \ 49 | -DWITH_TBB=ON \ 50 | -DWITH_EIGEN=ON \ 51 | -DWITH_V4L=ON \ 52 | -DBUILD_TESTS=OFF \ 53 | -DBUILD_PERF_TESTS=OFF \ 54 | -DCMAKE_BUILD_TYPE=RELEASE \ 55 | -DCMAKE_INSTALL_PREFIX=$(python2.7 -c "import sys; print(sys.prefix)") \ 56 | -DPYTHON_EXECUTABLE=$(which python2.7) \ 57 | -DPYTHON_INCLUDE_DIR=$(python2.7 -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \ 58 | -DPYTHON_PACKAGES_PATH=$(python2.7 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") .. \ 59 | && make install -j ${nproc} \ 60 | && rm /${OPENCV_VERSION}.zip \ 61 | && rm -r /opencv-${OPENCV_VERSION} 62 | 63 | # -*- mode: dockerfile -*- 64 | # part of the dockerfile to install the python binding 65 | 66 | COPY install/python.sh install/ 67 | RUN chmod +x install/python.sh 68 | RUN install/python.sh 69 | 70 | RUN pip2 install nose numpy==1.14.0 nose-timer requests==2.18.4 Pillow easydict pyyaml sacred visdom Cython matplotlib scikit-image tqdm mxnet-cu90 -i https://pypi.tuna.tsinghua.edu.cn/simple 71 | RUN pip3 install nose pylint numpy==1.14.0 nose-timer requests==2.18.4 Pillow easydict pyyaml sacred visdom Cython matplotlib scikit-image tqdm mxnet-cu90 -i https://pypi.tuna.tsinghua.edu.cn/simple 72 | 73 | RUN pip2 install opencv-python==3.4.1.15 -i https://pypi.tuna.tsinghua.edu.cn/simple 74 | RUN pip3 install opencv-python==3.4.1.15 -i https://pypi.tuna.tsinghua.edu.cn/simple 75 | 76 | RUN apt-get -y install python-tk 77 | RUN apt-get -y install python3-tk 78 | 79 | ENV PYTHONPATH=/mxnet/python 80 | CMD sh -c 'ln -s /dev/null /dev/raw1394'; bash 81 | -------------------------------------------------------------------------------- /DockerConfigs/MXNet/install/cpp.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env bash 3 | 4 | # Licensed to the Apache Software Foundation (ASF) under one 5 | # or more contributor license agreements. See the NOTICE file 6 | # distributed with this work for additional information 7 | # regarding copyright ownership. The ASF licenses this file 8 | # to you under the Apache License, Version 2.0 (the 9 | # "License"); you may not use this file except in compliance 10 | # with the License. You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, 15 | # software distributed under the License is distributed on an 16 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | # KIND, either express or implied. See the License for the 18 | # specific language governing permissions and limitations 19 | # under the License. 20 | 21 | # libraries for building mxnet c++ core on ubuntu 22 | 23 | cp source.list /etc/apt/source.list 24 | 25 | apt-get update && apt-get install -y \ 26 | build-essential git libatlas-base-dev libopencv-dev python-opencv \ 27 | libcurl4-openssl-dev libgtest-dev cmake wget unzip 28 | 29 | cd /usr/src/gtest && cmake CMakeLists.txt && make && cp *.a /usr/lib -------------------------------------------------------------------------------- /DockerConfigs/MXNet/install/python.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | # install libraries for mxnet's python package on ubuntu 21 | 22 | apt-get update && apt-get install -y python-dev python3-dev 23 | 24 | # the version of the pip shipped with ubuntu may be too lower, install a recent version here 25 | cd /tmp && wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py && python2 get-pip.py 26 | 27 | pip2 install nose pylint numpy nose-timer requests Pillow 28 | pip3 install nose pylint numpy nose-timer requests Pillow 29 | 30 | # For segmentation 31 | pip2 install easydict pyyaml sacred visdom Cython matplotlib scikit-image tqdm mxnet-cu90 32 | pip3 install easydict pyyaml sacred visdom Cython matplotlib scikit-image tqdm mxnet-cu90 33 | 34 | -------------------------------------------------------------------------------- /DockerConfigs/MXNet/install/source.list: -------------------------------------------------------------------------------- 1 | # deb cdrom:[Ubuntu 16.04 LTS _Xenial Xerus_ - Release amd64 (20160420.1)]/ xenial main restricted 2 | deb-src http://archive.ubuntu.com/ubuntu xenial main restricted #Added by software-properties 3 | deb http://mirrors.aliyun.com/ubuntu/ xenial main restricted 4 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial main restricted multiverse universe #Added by software-properties 5 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted 6 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted multiverse universe #Added by software-properties 7 | deb http://mirrors.aliyun.com/ubuntu/ xenial universe 8 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates universe 9 | deb http://mirrors.aliyun.com/ubuntu/ xenial multiverse 10 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates multiverse 11 | deb http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse 12 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse #Added by software-properties 13 | deb http://archive.canonical.com/ubuntu xenial partner 14 | deb-src http://archive.canonical.com/ubuntu xenial partner 15 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted 16 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted multiverse universe #Added by software-properties 17 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security universe 18 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security multiverse -------------------------------------------------------------------------------- /DockerConfigs/README.md: -------------------------------------------------------------------------------- 1 | # Build Docker to run Deep Feature Flow 2 | Build Image use our Dockerfile configuration 3 | ```bash 4 | cd ./MXNet 5 | docker build -t mxnet_dff/python:gpu -f ./Dockerfile.python.gpu . 6 | ``` 7 | - `-t` is for tag 8 | - `-f` is for dockerfile path 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Songyang Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Feature Flow for Video Semantic Segmentation 2 | Based on Deeplab V2 3 | 4 | ## 1. Setup environment 5 | - If you use our dockerfile, you can run the code easily. 6 | - If you want to set up your own env, please follow these steps: 7 | - We only support `python2.7` now 8 | - Install tk: `sudo apt-get -y install python-tk` 9 | - Install OpenCV 3.4.1 10 | - Install needed python packages with `pip install -r requirements.txt` 11 | - If you are in China Mainland, you can use these to speedup 12 | `pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple` 13 | - Then `sh init.sh` to build the lib for faster-rcnn 14 | Because we use the code from Deformable ConvNets and the dataloader has some dependencies on faster-rcnn, so you need to build the lib first. 15 | ## 2. Prepare Data and Pretrained Model 16 | ### Cityscapes Data 17 | You need to download the cityscapes data from the official webpapge and unzip the data 18 | Put the data into `data/cityscapes`, you can use soft link to set the data path as the following: 19 | `ln -s Dataset_path ./data/cityscapes` 20 | 21 | If you want to try DFF, you should download cityscapes video data and put it into `data/cityscapes_video` 22 | 23 | ### Pretrained Model 24 | Download pretrained resnet model flow net from [Onedrive](https://onedrive.live.com/?authkey=%21AAXQgYjWim3Iz6w&cid=F371D9563727B96F&id=F371D9563727B96F%21102798&parId=F371D9563727B96F%21102795&action=locate), and put the model into `mode/pretrained_model/` 25 | ```bash 26 | ./model/pretrained_model/resnet_v1_101-0000.params 27 | ./model/pretrained_model/flownet-0000.params 28 | ``` 29 | 30 | ## 3. Train and Test 31 | ### Training Deeplab V2 32 | `python ./experiments/deeplab/deeplab_train_test.py --cfg ./experiments/deeplab/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_base.yaml` 33 | ### Training Deeplab V2 Deformable 34 | `python ./experiments/deeplab/deeplab_train_test.py --cfg ./experiments/deeplab/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_dcn.yaml` 35 | ### Training DFF Deeplab V2 36 | `python ./experiments/deeplab_dff/deeplab_dff_train.py --cfg ./experiments/deeplab_dff/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_video.yaml` 37 | 38 | ## 4. Performance 39 | TBD 40 | ## 5. TODO List 41 | - [x] Add Scripts 42 | - [ ] Add experiment results 43 | - [ ] Add support for Deeplab V3+ 44 | - [ ] Add BiSeNet 45 | ## 6. FAQ 46 | - Program hang if your system opencv is 2.x and your opencv-python is 3.x 47 | 48 | ## 7. Acknowledgement 49 | 50 | Thanks for the official deep featuere flow implementation and deeplab implementation from MSRACVER 51 | - [Deep Feature Flow](https://github.com/msracver/Deep-Feature-Flow) 52 | - [Deformable ConvNets](https://github.com/msracver/Deformable-ConvNets) 53 | -------------------------------------------------------------------------------- /data/cityscapes: -------------------------------------------------------------------------------- 1 | /home/PublicDataset/cityscapes -------------------------------------------------------------------------------- /deeplab/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/deeplab/__init__.py -------------------------------------------------------------------------------- /deeplab/_init_paths.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Modified by Zheng Zhang 7 | # -------------------------------------------------------- 8 | 9 | import os.path as osp 10 | import sys 11 | 12 | def add_path(path): 13 | if path not in sys.path: 14 | sys.path.insert(0, path) 15 | 16 | this_dir = osp.dirname(__file__) 17 | 18 | lib_path = osp.join(this_dir, '..', 'lib') 19 | add_path(lib_path) 20 | -------------------------------------------------------------------------------- /deeplab/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/deeplab/config/__init__.py -------------------------------------------------------------------------------- /deeplab/config/config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import yaml 12 | import numpy as np 13 | from easydict import EasyDict as edict 14 | 15 | config = edict() 16 | 17 | config.MXNET_VERSION = '' 18 | config.output_path = '' 19 | config.symbol = '' 20 | config.gpus = '' 21 | config.CLASS_AGNOSTIC = True 22 | config.SCALES = [(360, 600)] # first is scale (the shorter side); second is max size 23 | 24 | # default training 25 | config.default = edict() 26 | config.default.frequent = 1000 27 | config.default.kvstore = 'device' 28 | 29 | # network related params 30 | config.network = edict() 31 | config.network.pretrained = '../model/pretrained_model/resnet_v1-101' 32 | config.network.pretrained_epoch = 0 33 | config.network.PIXEL_MEANS = np.array([103.06, 115.90, 123.15]) 34 | config.network.IMAGE_STRIDE = 0 35 | config.network.FIXED_PARAMS = ['conv1', 'bn_conv1', 'res2', 'bn2', 'gamma', 'beta'] 36 | 37 | # dataset related params 38 | config.dataset = edict() 39 | config.dataset.dataset = 'cityscapes' 40 | config.dataset.image_set = 'leftImg8bit_train' 41 | config.dataset.test_image_set = 'leftImg8bit_val' 42 | config.dataset.root_path = '../data' 43 | config.dataset.dataset_path = '../data/cityscapes' 44 | config.dataset.NUM_CLASSES = 19 45 | config.dataset.annotation_prefix = 'gtFine' 46 | 47 | config.TRAIN = edict() 48 | config.TRAIN.lr = 0 49 | config.TRAIN.lr_step = '' 50 | config.TRAIN.warmup = False 51 | config.TRAIN.warmup_lr = 0 52 | config.TRAIN.warmup_step = 0 53 | config.TRAIN.momentum = 0.9 54 | config.TRAIN.wd = 0.0005 55 | config.TRAIN.begin_epoch = 0 56 | config.TRAIN.end_epoch = 0 57 | config.TRAIN.model_prefix = 'deeplab' 58 | 59 | # whether resume training 60 | config.TRAIN.RESUME = False 61 | # whether flip image 62 | config.TRAIN.FLIP = True 63 | # whether shuffle image 64 | config.TRAIN.SHUFFLE = True 65 | # whether use OHEM 66 | config.TRAIN.ENABLE_OHEM = False 67 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 68 | config.TRAIN.BATCH_IMAGES = 1 69 | 70 | config.TEST = edict() 71 | # size of images for each device 72 | config.TEST.BATCH_IMAGES = 1 73 | 74 | # Test Model Epoch 75 | config.TEST.test_epoch = 0 76 | 77 | def update_config(config_file): 78 | exp_config = None 79 | with open(config_file) as f: 80 | exp_config = edict(yaml.load(f)) 81 | for k, v in exp_config.items(): 82 | if k in config: 83 | if isinstance(v, dict): 84 | if k == 'TRAIN': 85 | if 'BBOX_WEIGHTS' in v: 86 | v['BBOX_WEIGHTS'] = np.array(v['BBOX_WEIGHTS']) 87 | elif k == 'network': 88 | if 'PIXEL_MEANS' in v: 89 | v['PIXEL_MEANS'] = np.array(v['PIXEL_MEANS']) 90 | for vk, vv in v.items(): 91 | config[k][vk] = vv 92 | else: 93 | if k == 'SCALES': 94 | config[k][0] = (tuple(v)) 95 | else: 96 | config[k] = v 97 | else: 98 | raise ValueError("key must exist in config.py") 99 | -------------------------------------------------------------------------------- /deeplab/config/dff_config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import yaml 12 | import numpy as np 13 | from easydict import EasyDict as edict 14 | 15 | config = edict() 16 | 17 | config.MXNET_VERSION = '' 18 | config.output_path = '' 19 | config.symbol = '' 20 | config.gpus = '' 21 | config.CLASS_AGNOSTIC = True 22 | config.SCALES = [(360, 600)] # first is scale (the shorter side); second is max size 23 | config.final_output_path = '' 24 | # default training 25 | config.default = edict() 26 | config.default.frequent = 1000 27 | config.default.kvstore = 'device' 28 | 29 | # network related params 30 | config.network = edict() 31 | config.network.pretrained = '../model/pretrained_model/flownet' 32 | config.network.pretrained_epoch = 0 33 | config.network.PIXEL_MEANS = np.array([103.06, 115.90, 123.15]) 34 | config.network.IMAGE_STRIDE = 0 35 | config.network.FIXED_PARAMS = ['conv1', 'bn_conv1', 'res2', 'bn2', 'gamma', 'beta'] 36 | 37 | # dataset related params 38 | config.dataset = edict() 39 | config.dataset.dataset = 'cityscapes' 40 | config.dataset.image_set = 'leftImg8bit_train' 41 | config.dataset.test_image_set = 'leftImg8bit_val' 42 | config.dataset.root_path = '../data' 43 | config.dataset.dataset_path = '../data/cityscapes' 44 | config.dataset.NUM_CLASSES = 19 45 | config.dataset.annotation_prefix = 'gtFine' 46 | config.dataset.dataset_video_path = './data/cityscapes_video/leftImg8bit_sequence' 47 | config.dataset.video_gt_path = './data/cityscapes_video/gtFine_sequence_own' # video sequence gt folder 48 | 49 | config.TRAIN = edict() 50 | config.TRAIN.lr = 0 51 | config.TRAIN.lr_step = '' 52 | config.TRAIN.warmup = False 53 | config.TRAIN.warmup_lr = 0 54 | config.TRAIN.warmup_step = 0 55 | config.TRAIN.momentum = 0.9 56 | config.TRAIN.wd = 0.0005 57 | config.TRAIN.begin_epoch = 0 58 | config.TRAIN.end_epoch = 0 59 | config.TRAIN.model_prefix = 'deeplab' 60 | 61 | # for video segmentation 62 | config.TRAIN.MIN_OFFSET = -5 63 | config.TRAIN.MAX_OFFSET = 1 64 | 65 | # whether resume training 66 | config.TRAIN.RESUME = False 67 | # whether flip image 68 | config.TRAIN.FLIP = True 69 | # whether shuffle image 70 | config.TRAIN.SHUFFLE = True 71 | # whether use OHEM 72 | config.TRAIN.ENABLE_OHEM = False 73 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 74 | config.TRAIN.BATCH_IMAGES = 1 75 | 76 | # Video mode:(for load data) 77 | config.TRAIN.VIDEO = True 78 | # for tensorboard file 79 | config.TRAIN.TB_DIR = '' 80 | 81 | config.TEST = edict() 82 | # size of images for each device 83 | config.TEST.BATCH_IMAGES = 1 84 | config.TEST.OFFSET = -1 85 | # Test Model Epoch 86 | config.TEST.test_epoch = 0 87 | 88 | def update_config(config_file): 89 | exp_config = None 90 | with open(config_file) as f: 91 | exp_config = edict(yaml.load(f)) 92 | for k, v in exp_config.items(): 93 | if k in config: 94 | if isinstance(v, dict): 95 | if k == 'TRAIN': 96 | if 'BBOX_WEIGHTS' in v: 97 | v['BBOX_WEIGHTS'] = np.array(v['BBOX_WEIGHTS']) 98 | elif k == 'network': 99 | if 'PIXEL_MEANS' in v: 100 | v['PIXEL_MEANS'] = np.array(v['PIXEL_MEANS']) 101 | for vk, vv in v.items(): 102 | config[k][vk] = vv 103 | else: 104 | if k == 'SCALES': 105 | config[k][0] = (tuple(v)) 106 | else: 107 | config[k] = v 108 | else: 109 | raise ValueError("key must exist in config.py") 110 | -------------------------------------------------------------------------------- /deeplab/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/deeplab/core/__init__.py -------------------------------------------------------------------------------- /deeplab/core/callback.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import time 12 | import logging 13 | import mxnet as mx 14 | # from lib.logger.visdomlogger import VisdomPlotLogger 15 | class Speedometer(object): 16 | def __init__(self, batch_size, frequent=50): 17 | self.batch_size = batch_size 18 | self.frequent = frequent 19 | self.init = False 20 | self.tic = 0 21 | self.last_count = 0 22 | self.loss_idx_abs = 0 # remember total index from the first epoch 23 | # self.train_loss_logger = VisdomPlotLogger('line', env='deeplab_duc_dff',opts={'title': 'Train FCNLoss'}) 24 | def __call__(self, param): 25 | """Callback to Show speed.""" 26 | count = param.nbatch 27 | if self.last_count > count: 28 | self.init = False 29 | self.last_count = count 30 | 31 | if self.init: 32 | if count % self.frequent == 0: 33 | speed = self.frequent * self.batch_size / (time.time() - self.tic) 34 | s = '' 35 | if param.eval_metric is not None: 36 | name, value = param.eval_metric.get() 37 | s = "Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-" % (param.epoch, count, speed) 38 | for n, v in zip(name, value): 39 | s += "%s=%f,\t" % (n, v) 40 | if n == 'FCNLogLoss': 41 | self.loss_idx_abs += count 42 | FCNLogLoss = v 43 | # self.train_loss_logger.log(self.loss_idx_abs,FCNLogLoss) 44 | else: 45 | s = "Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec" % (param.epoch, count, speed) 46 | 47 | logging.info(s) 48 | print(s) 49 | self.tic = time.time() 50 | else: 51 | self.init = True 52 | self.tic = time.time() 53 | -------------------------------------------------------------------------------- /deeplab/core/metric.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import mxnet as mx 12 | import numpy as np 13 | 14 | class FCNLogLossMetric(mx.metric.EvalMetric): 15 | def __init__(self, show_interval): 16 | super(FCNLogLossMetric, self).__init__('FCNLogLoss') 17 | self.show_interval = show_interval 18 | self.sum_metric = 0 19 | self.num_inst = 0 20 | 21 | def update(self, labels, preds): 22 | pred = preds[0] 23 | label = labels[0] 24 | 25 | # label (b, p) 26 | label = label.asnumpy().astype('int32').reshape((-1)) 27 | # pred (b, c, p) or (b, c, h, w) --> (b, p, c) --> (b*p, c) 28 | pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1)) 29 | pred = pred.reshape((label.shape[0], -1)) 30 | 31 | # filter with keep_inds 32 | keep_inds = np.where(label != 255)[0] 33 | label = label[keep_inds] 34 | cls = pred[keep_inds, label] 35 | 36 | cls += 1e-14 37 | cls_loss = -1 * np.log(cls) 38 | cls_loss = np.sum(cls_loss) 39 | 40 | self.sum_metric += cls_loss 41 | self.num_inst += label.shape[0] 42 | -------------------------------------------------------------------------------- /deeplab/core/tester.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import cPickle 12 | import os 13 | import time 14 | import mxnet as mx 15 | import numpy as np 16 | 17 | from PIL import Image 18 | from module import MutableModule 19 | # from config.config import config 20 | from utils import image 21 | from utils.PrefetchingIter import PrefetchingIter 22 | 23 | 24 | class Predictor(object): 25 | def __init__(self, symbol, data_names, label_names, 26 | context=mx.cpu(), max_data_shapes=None, 27 | provide_data=None, provide_label=None, 28 | arg_params=None, aux_params=None): 29 | self._mod = MutableModule(symbol, data_names, label_names, 30 | context=context, max_data_shapes=max_data_shapes) 31 | self._mod.bind(provide_data, provide_label, for_training=False) 32 | self._mod.init_params(arg_params=arg_params, aux_params=aux_params) 33 | 34 | def predict(self, data_batch): 35 | self._mod.forward(data_batch) 36 | # [dict(zip(self._mod.output_names, _)) for _ in zip(*self._mod.get_outputs(merge_multi_context=False))] 37 | return [dict(zip(self._mod.output_names, _)) for _ in zip(*self._mod.get_outputs(merge_multi_context=False))] 38 | 39 | def pred_eval(predictor, test_data, imdb, vis=False, ignore_cache=None, logger=None): 40 | """ 41 | wrapper for calculating offline validation for faster data analysis 42 | in this example, all threshold are set by hand 43 | :param predictor: Predictor 44 | :param test_data: data iterator, must be non-shuffle 45 | :param imdb: image database 46 | :param vis: controls visualization 47 | :param ignore_cache: ignore the saved cache file 48 | :param logger: the logger instance 49 | :return: 50 | """ 51 | res_file = os.path.join(imdb.result_path, imdb.name + '_segmentations.pkl') 52 | if os.path.exists(res_file) and not ignore_cache: 53 | with open(res_file , 'rb') as fid: 54 | evaluation_results = cPickle.load(fid) 55 | print 'evaluate segmentation: \n' 56 | if logger: 57 | logger.info('evaluate segmentation: \n') 58 | 59 | meanIU = evaluation_results['meanIU'] 60 | IU_array = evaluation_results['IU_array'] 61 | print 'IU_array:\n' 62 | if logger: 63 | logger.info('IU_array:\n') 64 | for i in range(len(IU_array)): 65 | print '%.5f'%IU_array[i] 66 | if logger: 67 | logger.info('%.5f'%IU_array[i]) 68 | print 'meanIU:%.5f'%meanIU 69 | if logger: 70 | logger.info( 'meanIU:%.5f'%meanIU) 71 | return 72 | 73 | assert vis or not test_data.shuffle 74 | if not isinstance(test_data, PrefetchingIter): 75 | test_data = PrefetchingIter(test_data) 76 | 77 | num_images = imdb.num_images 78 | all_segmentation_result = [[] for _ in xrange(num_images)] 79 | idx = 0 80 | 81 | data_time, net_time, post_time = 0.0, 0.0, 0.0 82 | t = time.time() 83 | for data_batch in test_data: 84 | t1 = time.time() - t 85 | t = time.time() 86 | output_all = predictor.predict(data_batch) 87 | output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all] 88 | t2 = time.time() - t 89 | t = time.time() 90 | 91 | all_segmentation_result[idx: idx+test_data.batch_size] = [output.astype('int8') for output in output_all] 92 | 93 | idx += test_data.batch_size 94 | t3 = time.time() - t 95 | t = time.time() 96 | 97 | data_time += t1 98 | net_time += t2 99 | post_time += t3 100 | print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size) 101 | if logger: 102 | logger.info('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) 103 | 104 | evaluation_results = imdb.evaluate_segmentations(all_segmentation_result) 105 | 106 | if not os.path.exists(res_file) or ignore_cache: 107 | with open(res_file, 'wb') as f: 108 | cPickle.dump(evaluation_results, f, protocol=cPickle.HIGHEST_PROTOCOL) 109 | 110 | print 'evaluate segmentation: \n' 111 | if logger: 112 | logger.info('evaluate segmentation: \n') 113 | 114 | meanIU = evaluation_results['meanIU'] 115 | IU_array = evaluation_results['IU_array'] 116 | print 'IU_array:\n' 117 | if logger: 118 | logger.info('IU_array:\n') 119 | for i in range(len(IU_array)): 120 | print '%.5f'%IU_array[i] 121 | if logger: 122 | logger.info('%.5f'%IU_array[i]) 123 | print 'meanIU:%.5f'%meanIU 124 | if logger: 125 | logger.info( 'meanIU:%.5f'%meanIU) 126 | -------------------------------------------------------------------------------- /deeplab/dff_test.py: -------------------------------------------------------------------------------- 1 | 2 | import _init_paths 3 | 4 | import argparse 5 | import os 6 | import sys 7 | import time 8 | import logging 9 | from config.dff_config import config, update_config 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description='Test a Deeplab Network') 13 | # general 14 | parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str) 15 | 16 | args, rest = parser.parse_known_args() 17 | update_config(args.cfg) 18 | 19 | # testing 20 | parser.add_argument('--vis', help='turn on visualization', action='store_true') 21 | parser.add_argument('--ignore_cache', help='ignore cached results boxes', action='store_true') 22 | parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true') 23 | args = parser.parse_args() 24 | return args 25 | 26 | args = parse_args() 27 | curr_path = os.path.abspath(os.path.dirname(__file__)) 28 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION)) 29 | 30 | import pprint 31 | import mxnet as mx 32 | 33 | from symbols import * 34 | from dataset import * 35 | from core.loader import TestDataLoaderVideo 36 | 37 | from core.tester import Predictor, pred_eval 38 | from utils.load_data import load_gt_segdb, merge_segdb 39 | from utils.load_model import load_param 40 | from utils.create_logger import create_logger 41 | 42 | def test_deeplab(): 43 | epoch = config.TEST.test_epoch 44 | ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] 45 | image_set = config.dataset.test_image_set 46 | root_path = config.dataset.root_path 47 | dataset = config.dataset.dataset 48 | dataset_path = config.dataset.dataset_path 49 | 50 | logger, final_output_path = create_logger(config.output_path, args.cfg, image_set) 51 | prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix) 52 | 53 | # print config 54 | pprint.pprint(config) 55 | logger.info('testing config:{}\n'.format(pprint.pformat(config))) 56 | 57 | # load symbol and testing data 58 | sym_instance = eval(config.symbol + '.' + config.symbol)() 59 | sym = sym_instance.get_symbol(config, is_train=False) 60 | 61 | imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=final_output_path) 62 | segdb = imdb.gt_segdb() 63 | 64 | # get test data iter 65 | test_data = TestDataLoaderVideo(segdb, config=config, batch_size=len(ctx)) 66 | 67 | # infer shape 68 | data_shape_dict = dict(test_data.provide_data_single) 69 | print data_shape_dict 70 | sym_instance.infer_shape(data_shape_dict) 71 | 72 | # load model and check parameters 73 | arg_params, aux_params = load_param(prefix, epoch, process=True) 74 | 75 | sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) 76 | 77 | # decide maximum shape 78 | data_names = [k[0] for k in test_data.provide_data_single] 79 | label_names = ['softmax_label'] 80 | max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))),]] 81 | 82 | # create predictor 83 | predictor = Predictor(sym, data_names, label_names, 84 | context=ctx, max_data_shapes=max_data_shape, 85 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 86 | arg_params=arg_params, aux_params=aux_params) 87 | 88 | # start detection 89 | pred_eval(predictor, test_data, imdb, vis=args.vis, ignore_cache=args.ignore_cache, logger=logger) 90 | 91 | def main(): 92 | print args 93 | test_deeplab() 94 | 95 | 96 | if __name__ == '__main__': 97 | main() 98 | -------------------------------------------------------------------------------- /deeplab/function/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/deeplab/function/__init__.py -------------------------------------------------------------------------------- /deeplab/function/reeval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import argparse 12 | import cPickle 13 | import os 14 | import mxnet as mx 15 | 16 | from config.config import config, generate_config 17 | from dataset import * 18 | 19 | 20 | def reeval(args): 21 | # load imdb 22 | imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) 23 | 24 | # load detection results 25 | cache_file = os.path.join(imdb.cache_path, imdb.name, 'detections.pkl') 26 | with open(cache_file) as f: 27 | detections = cPickle.load(f) 28 | 29 | # eval 30 | imdb.evaluate_detections(detections) 31 | 32 | 33 | def parse_args(): 34 | parser = argparse.ArgumentParser(description='imdb test') 35 | # general 36 | parser.add_argument('--network', help='network name', default=default.network, type=str) 37 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 38 | args, rest = parser.parse_known_args() 39 | generate_config(args.network, args.dataset) 40 | parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str) 41 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 42 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 43 | # other 44 | parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true') 45 | args = parser.parse_args() 46 | return args 47 | 48 | 49 | def main(): 50 | args = parse_args() 51 | print 'Called with argument:', args 52 | reeval(args) 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /deeplab/function/test_deeplab.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import argparse 12 | import pprint 13 | import logging 14 | import time 15 | import os 16 | import mxnet as mx 17 | 18 | from config.config import config, generate_config, update_config 19 | from config.dataset_conf import dataset 20 | from config.network_conf import network 21 | from symbols import * 22 | from dataset import * 23 | from core.loader import TestDataLoader 24 | from core.tester import Predictor, pred_eval 25 | from utils.load_model import load_param 26 | 27 | def test_deeplab(network, dataset, image_set, root_path, dataset_path, 28 | ctx, prefix, epoch, 29 | vis, logger=None, output_path=None): 30 | if not logger: 31 | assert False, 'require a logger' 32 | 33 | # print config 34 | pprint.pprint(config) 35 | logger.info('testing config:{}\n'.format(pprint.pformat(config))) 36 | 37 | # load symbol and testing data 38 | sym = eval('get_' + network + '_test')(num_classes=config.dataset.NUM_CLASSES) 39 | imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) 40 | segdb = imdb.gt_segdb() 41 | 42 | # get test data iter 43 | test_data = TestDataLoader(segdb, batch_size=len(ctx)) 44 | 45 | # load model 46 | # arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) 47 | arg_params, aux_params = load_param(prefix, epoch, process=True) 48 | 49 | # infer shape 50 | data_shape_dict = dict(test_data.provide_data_single) 51 | arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict) 52 | arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) 53 | aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) 54 | 55 | # check parameters 56 | for k in sym.list_arguments(): 57 | if k in data_shape_dict or k in ['softmax_label']: 58 | continue 59 | assert k in arg_params, k + ' not initialized' 60 | assert arg_params[k].shape == arg_shape_dict[k], \ 61 | 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) 62 | for k in sym.list_auxiliary_states(): 63 | assert k in aux_params, k + ' not initialized' 64 | assert aux_params[k].shape == aux_shape_dict[k], \ 65 | 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) 66 | 67 | # decide maximum shape 68 | data_names = [k[0] for k in test_data.provide_data_single] 69 | label_names = ['softmax_label'] 70 | max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] 71 | 72 | # create predictor 73 | predictor = Predictor(sym, data_names, label_names, 74 | context=ctx, max_data_shapes=max_data_shape, 75 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 76 | arg_params=arg_params, aux_params=aux_params) 77 | 78 | # start detection 79 | pred_eval(predictor, test_data, imdb, vis=vis, logger=logger) 80 | 81 | -------------------------------------------------------------------------------- /deeplab/symbols/__init__.py: -------------------------------------------------------------------------------- 1 | import resnet_v1_101_deeplab 2 | import resnet_v1_101_deeplab_dcn 3 | import resnet_v1_101_deeplab_video 4 | import resnet_v1_101_deeplab_video_dcn 5 | # Capsule Layer For Segmentation 6 | # import resnet_v1_101_deeplab_capsule 7 | # import densenet_bc_deeplab_base 8 | # import densenet_bc_deeplab_base_v2 -------------------------------------------------------------------------------- /deeplab/symbols/print_summary.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import json 12 | from mxnet import Symbol 13 | def print_summary(symbol, shape=None, line_length=120, positions=[.44, .64, .74, 1.]): 14 | """Convert symbol for detail information. 15 | 16 | Parameters 17 | ---------- 18 | symbol: Symbol 19 | Symbol to be visualized. 20 | shape: dict 21 | A dict of shapes, str->shape (tuple), given input shapes. 22 | line_length: int 23 | Rotal length of printed lines 24 | positions: list 25 | Relative or absolute positions of log elements in each line. 26 | Returns 27 | ------ 28 | None 29 | """ 30 | if not isinstance(symbol, Symbol): 31 | raise TypeError("symbol must be Symbol") 32 | show_shape = False 33 | if shape is not None: 34 | show_shape = True 35 | interals = symbol.get_internals() 36 | _, out_shapes, _ = interals.infer_shape(**shape) 37 | if out_shapes is None: 38 | raise ValueError("Input shape is incomplete") 39 | shape_dict = dict(zip(interals.list_outputs(), out_shapes)) 40 | conf = json.loads(symbol.tojson()) 41 | nodes = conf["nodes"] 42 | heads = set(conf["heads"][0]) 43 | if positions[-1] <= 1: 44 | positions = [int(line_length * p) for p in positions] 45 | # header names for the different log elements 46 | to_display = ['Layer (type)', 'Output Shape', 'Param #', 'Previous Layer'] 47 | def print_row(fields, positions): 48 | """Print format row. 49 | 50 | Parameters 51 | ---------- 52 | fields: list 53 | Information field. 54 | positions: list 55 | Field length ratio. 56 | Returns 57 | ------ 58 | None 59 | """ 60 | line = '' 61 | for i, field in enumerate(fields): 62 | line += str(field) 63 | line = line[:positions[i]] 64 | line += ' ' * (positions[i] - len(line)) 65 | print(line) 66 | print('_' * line_length) 67 | print_row(to_display, positions) 68 | print('=' * line_length) 69 | def print_layer_summary(node, out_shape): 70 | """print layer information 71 | 72 | Parameters 73 | ---------- 74 | node: dict 75 | Node information. 76 | out_shape: dict 77 | Node shape information. 78 | Returns 79 | ------ 80 | Node total parameters. 81 | """ 82 | op = node["op"] 83 | pre_node = [] 84 | pre_filter = 0 85 | if op != "null": 86 | inputs = node["inputs"] 87 | for item in inputs: 88 | input_node = nodes[item[0]] 89 | input_name = input_node["name"] 90 | if input_node["op"] != "null" or item[0] in heads: 91 | # add precede 92 | pre_node.append(input_name) 93 | if show_shape: 94 | if input_node["op"] != "null": 95 | key = input_name + "_output" 96 | else: 97 | key = input_name 98 | if key in shape_dict: 99 | shape = shape_dict[key][1:] 100 | pre_filter = pre_filter + int(shape[0]) 101 | cur_param = 0 102 | if op == 'Convolution': 103 | if ("no_bias" in node["attrs"]) and (isinstance(node["attrs"]["no_bias"],(bool,int)) and int(node["attrs"]["no_bias"])) or ((isinstance(node["attrs"]["no_bias"],(str)) and bool(node["attrs"]["no_bias"]))): 104 | cur_param = pre_filter * int(node["attrs"]["num_filter"]) 105 | for k in _str2tuple(node["attrs"]["kernel"]): 106 | cur_param *= int(k) 107 | else: 108 | cur_param = pre_filter * int(node["attrs"]["num_filter"]) 109 | for k in _str2tuple(node["attrs"]["kernel"]): 110 | cur_param *= int(k) 111 | cur_param += int(node["attrs"]["num_filter"]) 112 | elif op == 'FullyConnected': 113 | if ("no_bias" in node["attrs"]) and int(node["attrs"]["no_bias"]): 114 | cur_param = pre_filter * (int(node["attrs"]["num_hidden"])) 115 | else: 116 | cur_param = (pre_filter+1) * (int(node["attrs"]["num_hidden"])) 117 | elif op == 'BatchNorm': 118 | key = node["name"] + "_output" 119 | if show_shape: 120 | num_filter = shape_dict[key][1] 121 | cur_param = int(num_filter) * 2 122 | if not pre_node: 123 | first_connection = '' 124 | else: 125 | first_connection = pre_node[0] 126 | fields = [node['name'] + '(' + op + ')', 127 | "x".join([str(x) for x in out_shape]), 128 | cur_param, 129 | first_connection] 130 | print_row(fields, positions) 131 | if len(pre_node) > 1: 132 | for i in range(1, len(pre_node)): 133 | fields = ['', '', '', pre_node[i]] 134 | print_row(fields, positions) 135 | return cur_param 136 | total_params = 0 137 | for i, node in enumerate(nodes): 138 | out_shape = [] 139 | op = node["op"] 140 | if op == "null" and i > 0: 141 | continue 142 | if op != "null" or i in heads: 143 | if show_shape: 144 | if op != "null": 145 | key = node["name"] + "_output" 146 | else: 147 | key = node["name"] 148 | if key in shape_dict: 149 | out_shape = shape_dict[key][1:] 150 | total_params += print_layer_summary(nodes[i], out_shape) 151 | if i == len(nodes) - 1: 152 | print('=' * line_length) 153 | else: 154 | print('_' * line_length) 155 | print('Total params: %s' % total_params) 156 | print('_' * line_length) -------------------------------------------------------------------------------- /deeplab/test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # -------------------------------------------------------- 8 | 9 | import _init_paths 10 | 11 | import argparse 12 | import os 13 | import sys 14 | import time 15 | import logging 16 | from config.config import config, update_config 17 | 18 | def parse_args(): 19 | parser = argparse.ArgumentParser(description='Test a Deeplab Network') 20 | # general 21 | parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str) 22 | 23 | args, rest = parser.parse_known_args() 24 | update_config(args.cfg) 25 | 26 | # testing 27 | parser.add_argument('--vis', help='turn on visualization', action='store_true') 28 | parser.add_argument('--ignore_cache', help='ignore cached results boxes', action='store_true') 29 | parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true') 30 | args = parser.parse_args() 31 | return args 32 | 33 | args = parse_args() 34 | curr_path = os.path.abspath(os.path.dirname(__file__)) 35 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION)) 36 | 37 | import pprint 38 | import mxnet as mx 39 | 40 | from symbols import * 41 | from dataset import * 42 | from core.loader import TestDataLoader 43 | from core.tester import Predictor, pred_eval 44 | from utils.load_data import load_gt_segdb, merge_segdb 45 | from utils.load_model import load_param 46 | from utils.create_logger import create_logger 47 | 48 | def test_deeplab(): 49 | epoch = config.TEST.test_epoch 50 | ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] 51 | image_set = config.dataset.test_image_set 52 | root_path = config.dataset.root_path 53 | dataset = config.dataset.dataset 54 | dataset_path = config.dataset.dataset_path 55 | 56 | logger, final_output_path = create_logger(config.output_path, args.cfg, image_set) 57 | prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix) 58 | 59 | # print config 60 | pprint.pprint(config) 61 | logger.info('testing config:{}\n'.format(pprint.pformat(config))) 62 | 63 | # load symbol and testing data 64 | sym_instance = eval(config.symbol + '.' + config.symbol)() 65 | sym = sym_instance.get_symbol(config, is_train=False) 66 | 67 | imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=final_output_path) 68 | segdb = imdb.gt_segdb() 69 | 70 | # get test data iter 71 | test_data = TestDataLoader(segdb, config=config, batch_size=len(ctx)) 72 | 73 | # infer shape 74 | data_shape_dict = dict(test_data.provide_data_single) 75 | sym_instance.infer_shape(data_shape_dict) 76 | 77 | # load model and check parameters 78 | arg_params, aux_params = load_param(prefix, epoch, process=True) 79 | 80 | sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) 81 | 82 | # decide maximum shape 83 | data_names = [k[0] for k in test_data.provide_data_single] 84 | label_names = ['softmax_label'] 85 | max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] 86 | 87 | # create predictor 88 | predictor = Predictor(sym, data_names, label_names, 89 | context=ctx, max_data_shapes=max_data_shape, 90 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 91 | arg_params=arg_params, aux_params=aux_params) 92 | 93 | # start detection 94 | pred_eval(predictor, test_data, imdb, vis=args.vis, ignore_cache=args.ignore_cache, logger=logger) 95 | 96 | def main(): 97 | print args 98 | test_deeplab() 99 | 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /experiments/deeplab/cfgs/deeplab_cityscapes_demo.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/cityscape" 4 | symbol: resnet_v1_101_deeplab 5 | gpus: '0' 6 | SCALES: 7 | - 1024 8 | - 2048 9 | default: 10 | frequent: 10 11 | kvstore: device 12 | dataset: 13 | NUM_CLASSES: 19 14 | dataset: CityScape 15 | dataset_path: "./data/cityscapes/" 16 | image_set: leftImg8bit_train 17 | root_path: "./data/" 18 | test_image_set: leftImg8bit_val 19 | network: 20 | FIXED_PARAMS: 21 | - conv1 22 | - bn_conv1 23 | - res2 24 | - bn2 25 | - gamma 26 | - beta 27 | FIXED_PARAMS_SHARED: 28 | - conv1 29 | - bn_conv1 30 | - res2 31 | - bn2 32 | - res3 33 | - bn3 34 | - res4 35 | - bn4 36 | - gamma 37 | - beta 38 | IMAGE_STRIDE: 0 39 | PIXEL_MEANS: 40 | - 103.06 41 | - 115.90 42 | - 123.15 43 | pretrained: "./model/pretrained_model/resnet_v1_101" 44 | pretrained_epoch: 0 45 | TRAIN: 46 | warmup: true 47 | warmup_lr: 0.00005 48 | # typically we will use 4000 warmup step for single GPU 49 | warmup_step: 1000 50 | begin_epoch: 0 51 | end_epoch: 53 52 | lr: 0.0005 53 | lr_step: '40.336' 54 | model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_dcn" 55 | # whether flip image 56 | FLIP: true 57 | # size of images for each device 58 | BATCH_IMAGES: 1 59 | # wheter crop image during training 60 | ENABLE_CROP: True 61 | # scale of cropped image during training 62 | CROP_HEIGHT: 768 63 | CROP_WIDTH: 1024 64 | # whether resume training 65 | RESUME: false 66 | # whether shuffle image 67 | SHUFFLE: true 68 | TEST: 69 | # size of images for each device 70 | BATCH_IMAGES: 1 71 | test_epoch: 53 72 | -------------------------------------------------------------------------------- /experiments/deeplab/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_base.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/cityscape" 4 | symbol: resnet_v1_101_deeplab 5 | gpus: '0,1,2,3' 6 | SCALES: 7 | - 1024 8 | - 2048 9 | default: 10 | frequent: 10 11 | kvstore: device 12 | dataset: 13 | NUM_CLASSES: 19 14 | dataset: CityScape 15 | dataset_path: "./data/cityscapes/" 16 | image_set: leftImg8bit_train 17 | root_path: "./data/" 18 | test_image_set: leftImg8bit_val 19 | network: 20 | FIXED_PARAMS: 21 | - conv1 22 | - bn_conv1 23 | - res2 24 | - bn2 25 | - gamma 26 | - beta 27 | FIXED_PARAMS_SHARED: 28 | - conv1 29 | - bn_conv1 30 | - res2 31 | - bn2 32 | - res3 33 | - bn3 34 | - res4 35 | - bn4 36 | - gamma 37 | - beta 38 | IMAGE_STRIDE: 0 39 | PIXEL_MEANS: 40 | - 103.06 41 | - 115.90 42 | - 123.15 43 | pretrained: "./model/pretrained_model/resnet_v1_101" 44 | pretrained_epoch: 0 45 | TRAIN: 46 | warmup: true 47 | warmup_lr: 0.00005 48 | # typically we will use 4000 warmup step for single GPU 49 | warmup_step: 1000 50 | begin_epoch: 0 51 | end_epoch: 53 52 | lr: 0.0005 53 | lr_step: '40.336' 54 | model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_base" 55 | # whether flip image 56 | FLIP: true 57 | # size of images for each device 58 | BATCH_IMAGES: 1 59 | # wheter crop image during training 60 | ENABLE_CROP: True 61 | # scale of cropped image during training 62 | CROP_HEIGHT: 768 63 | CROP_WIDTH: 1024 64 | # whether resume training 65 | RESUME: false 66 | # whether shuffle image 67 | SHUFFLE: true 68 | TEST: 69 | # size of images for each device 70 | BATCH_IMAGES: 1 71 | test_epoch: 53 72 | -------------------------------------------------------------------------------- /experiments/deeplab/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_capsule.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/cityscape" 4 | symbol: resnet_v1_101_deeplab_capsule 5 | gpus: '0,1,2,3' 6 | SCALES: 7 | - 1024 8 | - 2048 9 | default: 10 | frequent: 10 11 | kvstore: device 12 | dataset: 13 | NUM_CLASSES: 19 14 | dataset: CityScape 15 | dataset_path: "./data/cityscapes/" 16 | image_set: leftImg8bit_train 17 | root_path: "./data/" 18 | test_image_set: leftImg8bit_val 19 | network: 20 | FIXED_PARAMS: 21 | - conv1 22 | - bn_conv1 23 | - res2 24 | - bn2 25 | - gamma 26 | - beta 27 | FIXED_PARAMS_SHARED: 28 | - conv1 29 | - bn_conv1 30 | - res2 31 | - bn2 32 | - res3 33 | - bn3 34 | - res4 35 | - bn4 36 | - gamma 37 | - beta 38 | IMAGE_STRIDE: 0 39 | PIXEL_MEANS: 40 | - 103.06 41 | - 115.90 42 | - 123.15 43 | pretrained: "./model/pretrained_model/resnet_v1_101" 44 | pretrained_epoch: 0 45 | TRAIN: 46 | warmup: true 47 | warmup_lr: 0.00005 48 | # typically we will use 4000 warmup step for single GPU 49 | warmup_step: 1000 50 | begin_epoch: 0 51 | end_epoch: 53 52 | lr: 0.00005 53 | lr_step: '40.336' 54 | model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_capsule" 55 | # whether flip image 56 | FLIP: true 57 | # size of images for each device 58 | BATCH_IMAGES: 1 59 | # wheter crop image during training 60 | ENABLE_CROP: True 61 | # scale of cropped image during training 62 | CROP_HEIGHT: 512 63 | CROP_WIDTH: 1024 64 | # whether resume training 65 | RESUME: false 66 | # whether shuffle image 67 | SHUFFLE: true 68 | TEST: 69 | # size of images for each device 70 | BATCH_IMAGES: 1 71 | test_epoch: 53 72 | -------------------------------------------------------------------------------- /experiments/deeplab/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_dcn.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/cityscape" 4 | symbol: resnet_v1_101_deeplab_dcn 5 | gpus: '0,1,2,3' 6 | SCALES: 7 | - 1024 8 | - 2048 9 | default: 10 | frequent: 10 11 | kvstore: device 12 | dataset: 13 | NUM_CLASSES: 19 14 | dataset: CityScape 15 | dataset_path: "./data/cityscapes/" 16 | image_set: leftImg8bit_train 17 | root_path: "./data/" 18 | test_image_set: leftImg8bit_val 19 | network: 20 | FIXED_PARAMS: 21 | - conv1 22 | - bn_conv1 23 | - res2 24 | - bn2 25 | - gamma 26 | - beta 27 | FIXED_PARAMS_SHARED: 28 | - conv1 29 | - bn_conv1 30 | - res2 31 | - bn2 32 | - res3 33 | - bn3 34 | - res4 35 | - bn4 36 | - gamma 37 | - beta 38 | IMAGE_STRIDE: 0 39 | PIXEL_MEANS: 40 | - 103.06 41 | - 115.90 42 | - 123.15 43 | pretrained: "./model/pretrained_model/resnet_v1_101" 44 | pretrained_epoch: 0 45 | TRAIN: 46 | warmup: true 47 | warmup_lr: 0.00005 48 | # typically we will use 4000 warmup step for single GPU 49 | warmup_step: 1000 50 | begin_epoch: 0 51 | end_epoch: 53 52 | lr: 0.0005 53 | lr_step: '40.336' 54 | model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_dcn" 55 | # whether flip image 56 | FLIP: true 57 | # size of images for each device 58 | BATCH_IMAGES: 1 59 | # wheter crop image during training 60 | ENABLE_CROP: True 61 | # scale of cropped image during training 62 | CROP_HEIGHT: 768 63 | CROP_WIDTH: 1024 64 | # whether resume training 65 | RESUME: false 66 | # whether shuffle image 67 | SHUFFLE: true 68 | TEST: 69 | # size of images for each device 70 | BATCH_IMAGES: 1 71 | test_epoch: 53 72 | -------------------------------------------------------------------------------- /experiments/deeplab/cfgs/deeplab_resnet_v1_101_voc12_segmentation_base.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/voc12" 4 | symbol: resnet_v1_101_deeplab 5 | gpus: '0' 6 | SCALES: 7 | - 360 8 | - 600 9 | default: 10 | frequent: 10 11 | kvstore: device 12 | dataset: 13 | NUM_CLASSES: 21 14 | dataset: PascalVOC 15 | dataset_path: "./data/VOCdevkit2012/" 16 | image_set: 2012_train_seg 17 | root_path: "./data/" 18 | test_image_set: 2012_val_seg 19 | network: 20 | FIXED_PARAMS: 21 | - conv1 22 | - bn_conv1 23 | - res2 24 | - bn2 25 | - gamma 26 | - beta 27 | FIXED_PARAMS_SHARED: 28 | - conv1 29 | - bn_conv1 30 | - res2 31 | - bn2 32 | - res3 33 | - bn3 34 | - res4 35 | - bn4 36 | - gamma 37 | - beta 38 | IMAGE_STRIDE: 0 39 | PIXEL_MEANS: 40 | - 103.06 41 | - 115.90 42 | - 123.15 43 | pretrained: "./model/pretrained_model/resnet_v1_101" 44 | pretrained_epoch: 0 45 | TRAIN: 46 | warmup: false 47 | warmup_lr: 0.00005 48 | # typically we will use 4000 warmup step for single GPU 49 | warmup_step: 1000 50 | begin_epoch: 0 51 | end_epoch: 12 52 | lr: 0.0005 53 | lr_step: '8' 54 | model_prefix: "deeplab_resnet_v1_101_voc12_segmentation_base" 55 | # whether flip image 56 | FLIP: true 57 | # size of images for each device 58 | BATCH_IMAGES: 1 59 | # wheter crop image during training 60 | ENABLE_CROP: False 61 | # scale of cropped image during training 62 | CROP_HEIGHT: 768 63 | CROP_WIDTH: 1024 64 | # whether resume training 65 | RESUME: false 66 | # whether shuffle image 67 | SHUFFLE: true 68 | TEST: 69 | # size of images for each device 70 | BATCH_IMAGES: 1 71 | test_epoch: 12 72 | -------------------------------------------------------------------------------- /experiments/deeplab/cfgs/deeplab_resnet_v1_101_voc12_segmentation_dcn.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/voc12" 4 | symbol: resnet_v1_101_deeplab_dcn 5 | gpus: '0' 6 | SCALES: 7 | - 360 8 | - 600 9 | default: 10 | frequent: 10 11 | kvstore: device 12 | dataset: 13 | NUM_CLASSES: 21 14 | dataset: PascalVOC 15 | dataset_path: "./data/VOCdevkit2012/" 16 | image_set: 2012_train_seg 17 | root_path: "./data/" 18 | test_image_set: 2012_val_seg 19 | network: 20 | FIXED_PARAMS: 21 | - conv1 22 | - bn_conv1 23 | - res2 24 | - bn2 25 | - gamma 26 | - beta 27 | FIXED_PARAMS_SHARED: 28 | - conv1 29 | - bn_conv1 30 | - res2 31 | - bn2 32 | - res3 33 | - bn3 34 | - res4 35 | - bn4 36 | - gamma 37 | - beta 38 | IMAGE_STRIDE: 0 39 | PIXEL_MEANS: 40 | - 103.06 41 | - 115.90 42 | - 123.15 43 | pretrained: "./model/pretrained_model/resnet_v1_101" 44 | pretrained_epoch: 0 45 | TRAIN: 46 | warmup: false 47 | warmup_lr: 0.00005 48 | # typically we will use 4000 warmup step for single GPU 49 | warmup_step: 1000 50 | begin_epoch: 0 51 | end_epoch: 12 52 | lr: 0.0005 53 | lr_step: '8' 54 | model_prefix: "deeplab_resnet_v1_101_voc12_segmentation_dcn" 55 | # whether flip image 56 | FLIP: true 57 | # size of images for each device 58 | BATCH_IMAGES: 1 59 | # wheter crop image during training 60 | ENABLE_CROP: False 61 | # scale of cropped image during training 62 | CROP_HEIGHT: 768 63 | CROP_WIDTH: 1024 64 | # whether resume training 65 | RESUME: false 66 | # whether shuffle image 67 | SHUFFLE: true 68 | TEST: 69 | # size of images for each device 70 | BATCH_IMAGES: 1 71 | test_epoch: 12 72 | -------------------------------------------------------------------------------- /experiments/deeplab/deeplab_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import os 12 | import sys 13 | os.environ['PYTHONUNBUFFERED'] = '1' 14 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 15 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 16 | this_dir = os.path.dirname(__file__) 17 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'deeplab')) 18 | 19 | import test 20 | 21 | if __name__ == "__main__": 22 | test.main() 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /experiments/deeplab/deeplab_train_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import os 12 | import sys 13 | os.environ['PYTHONUNBUFFERED'] = '1' 14 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 15 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 16 | this_dir = os.path.dirname(__file__) 17 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'deeplab')) 18 | 19 | import train 20 | import test 21 | 22 | if __name__ == "__main__": 23 | train.main() 24 | # test.main() 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /experiments/deeplab_dff/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_video.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/cityscape_dff_dcn" 4 | symbol: resnet_v1_101_deeplab_video 5 | gpus: '0,1,2,3' 6 | SCALES: 7 | - 1024 8 | - 2048 9 | default: 10 | frequent: 10 11 | kvstore: device 12 | dataset: 13 | NUM_CLASSES: 19 14 | dataset: CityScape_Video 15 | dataset_path: "./data/cityscapes/" 16 | image_set: leftImg8bit_train 17 | root_path: "./data/" 18 | test_image_set: leftImg8bit_val 19 | network: 20 | FIXED_PARAMS: 21 | - conv1 22 | - bn_conv1 23 | - res2 24 | - bn2 25 | - gamma 26 | - beta 27 | FIXED_PARAMS_SHARED: 28 | - conv1 29 | - bn_conv1 30 | - res2 31 | - bn2 32 | - res3 33 | - bn3 34 | - res4 35 | - bn4 36 | - gamma 37 | - beta 38 | IMAGE_STRIDE: 0 39 | PIXEL_MEANS: 40 | - 103.06 41 | - 115.90 42 | - 123.15 43 | pretrained: "./model/pretrained_model/resnet_v1_101" 44 | pretrained_flow: "./model/pretrained_model/flownet" 45 | pretrained_epoch: 0 46 | TRAIN: 47 | warmup: true 48 | warmup_lr: 0.00005 49 | # typically we will use 4000 warmup step for single GPU 50 | warmup_step: 1000 51 | begin_epoch: 0 52 | end_epoch: 72 53 | lr: 0.0005 54 | lr_step: '40.336, 60.504' 55 | model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_video" 56 | # whether flip image 57 | FLIP: true 58 | # size of images for each device 59 | BATCH_IMAGES: 1 60 | # wheter crop image during training 61 | ENABLE_CROP: True 62 | # scale of cropped image during training 63 | CROP_HEIGHT: 768 64 | CROP_WIDTH: 1024 65 | # whether resume training 66 | RESUME: false 67 | # whether shuffle image 68 | SHUFFLE: true 69 | # tensorboard log dir 70 | TB_DIR: "./output/cityscape/tensorboard" 71 | TEST: 72 | # size of images for each device 73 | BATCH_IMAGES: 1 74 | OFFSET: -1 75 | test_epoch: 72 76 | -------------------------------------------------------------------------------- /experiments/deeplab_dff/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_video_duc.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/cityscape_dff_dcn" 4 | symbol: resnet_v1_101_deeplab_video_dcn 5 | gpus: '0,1,2,3' 6 | SCALES: 7 | - 1024 8 | - 2048 9 | default: 10 | frequent: 10 11 | kvstore: device 12 | dataset: 13 | NUM_CLASSES: 19 14 | dataset: CityScape_Video 15 | dataset_path: "./data/cityscapes/" 16 | image_set: leftImg8bit_train 17 | root_path: "./data/" 18 | test_image_set: leftImg8bit_val 19 | network: 20 | FIXED_PARAMS: 21 | - conv1 22 | - bn_conv1 23 | - res2 24 | - bn2 25 | - gamma 26 | - beta 27 | FIXED_PARAMS_SHARED: 28 | - conv1 29 | - bn_conv1 30 | - res2 31 | - bn2 32 | - res3 33 | - bn3 34 | - res4 35 | - bn4 36 | - gamma 37 | - beta 38 | IMAGE_STRIDE: 0 39 | PIXEL_MEANS: 40 | - 103.06 41 | - 115.90 42 | - 123.15 43 | pretrained: "./model/pretrained_model/resnet_v1_101" 44 | pretrained_flow: "./model/pretrained_model/flownet" 45 | pretrained_epoch: 0 46 | TRAIN: 47 | warmup: true 48 | warmup_lr: 0.00005 49 | # typically we will use 4000 warmup step for single GPU 50 | warmup_step: 1000 51 | begin_epoch: 0 52 | end_epoch: 72 53 | lr: 0.0005 54 | lr_step: '40.336, 60.504' 55 | model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_video_dcn" 56 | # whether flip imagep 57 | FLIP: true 58 | # size of images for each device 59 | BATCH_IMAGES: 1 60 | # wheter crop image during training 61 | ENABLE_CROP: True 62 | # scale of cropped image during training 63 | CROP_HEIGHT: 768 64 | CROP_WIDTH: 1024 65 | # whether resume training 66 | RESUME: false 67 | # whether shuffle image 68 | SHUFFLE: true 69 | # tensorboard log dir 70 | TB_DIR: "./output/cityscape/tensorboard" 71 | TEST: 72 | # size of images for each device 73 | BATCH_IMAGES: 1 74 | OFFSET: -3 75 | test_epoch: 64 76 | -------------------------------------------------------------------------------- /experiments/deeplab_dff/deeplab_dff_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import os 12 | import sys 13 | os.environ['PYTHONUNBUFFERED'] = '1' 14 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 15 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 16 | this_dir = os.path.dirname(__file__) 17 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'deeplab')) 18 | 19 | # import dff_train 20 | import dff_test 21 | 22 | if __name__ == "__main__": 23 | # dff_train.main() 24 | dff_test.main() 25 | -------------------------------------------------------------------------------- /experiments/deeplab_dff/deeplab_dff_train.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import os 12 | import sys 13 | os.environ['PYTHONUNBUFFERED'] = '1' 14 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 15 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 16 | this_dir = os.path.dirname(__file__) 17 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'deeplab')) 18 | 19 | import dff_train 20 | # import dff_test 21 | 22 | if __name__ == "__main__": 23 | dff_train.main() 24 | # test.main() 25 | -------------------------------------------------------------------------------- /experiments/faster_rcnn/cfgs/resnet_v1_101_coco_trainval_rcnn_dcn_end2end.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rcnn/coco" 4 | symbol: resnet_v1_101_rcnn_dcn 5 | gpus: '0,1,2,3' 6 | CLASS_AGNOSTIC: false 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 4 47 | - 8 48 | - 16 49 | - 32 50 | NUM_ANCHORS: 12 51 | dataset: 52 | NUM_CLASSES: 81 53 | dataset: coco 54 | dataset_path: "./data/coco" 55 | image_set: train2014+val2014 56 | root_path: "./data" 57 | test_image_set: test-dev2015 58 | proposal: rpn 59 | TRAIN: 60 | lr: 0.0005 61 | lr_step: '5.333' 62 | warmup: true 63 | warmup_lr: 0.00005 64 | # typically we will use 8000 warmup step for single GPU for COCO 65 | warmup_step: 1000 66 | begin_epoch: 0 67 | end_epoch: 8 68 | model_prefix: 'rcnn_coco' 69 | # whether resume training 70 | RESUME: false 71 | # whether flip image 72 | FLIP: true 73 | # whether shuffle image 74 | SHUFFLE: true 75 | # whether use OHEM 76 | ENABLE_OHEM: false 77 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 78 | BATCH_IMAGES: 1 79 | # e2e changes behavior of anchor loader and metric 80 | END2END: true 81 | # group images with similar aspect ratio 82 | ASPECT_GROUPING: true 83 | # R-CNN 84 | # rcnn rois batch size 85 | BATCH_ROIS: 128 86 | BATCH_ROIS_OHEM: 128 87 | # rcnn rois sampling params 88 | FG_FRACTION: 0.25 89 | FG_THRESH: 0.5 90 | BG_THRESH_HI: 0.5 91 | BG_THRESH_LO: 0.1 92 | # rcnn bounding box regression params 93 | BBOX_REGRESSION_THRESH: 0.5 94 | BBOX_WEIGHTS: 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | - 1.0 99 | 100 | # RPN anchor loader 101 | # rpn anchors batch size 102 | RPN_BATCH_SIZE: 256 103 | # rpn anchors sampling params 104 | RPN_FG_FRACTION: 0.5 105 | RPN_POSITIVE_OVERLAP: 0.7 106 | RPN_NEGATIVE_OVERLAP: 0.3 107 | RPN_CLOBBER_POSITIVES: false 108 | # rpn bounding box regression params 109 | RPN_BBOX_WEIGHTS: 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | - 1.0 114 | RPN_POSITIVE_WEIGHT: -1.0 115 | # used for end2end training 116 | # RPN proposal 117 | CXX_PROPOSAL: false 118 | RPN_NMS_THRESH: 0.7 119 | RPN_PRE_NMS_TOP_N: 6000 120 | RPN_POST_NMS_TOP_N: 300 121 | RPN_MIN_SIZE: 0 122 | # approximate bounding box regression 123 | BBOX_NORMALIZATION_PRECOMPUTED: true 124 | BBOX_MEANS: 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | - 0.0 129 | BBOX_STDS: 130 | - 0.1 131 | - 0.1 132 | - 0.2 133 | - 0.2 134 | TEST: 135 | # use rpn to generate proposal 136 | HAS_RPN: true 137 | # size of images for each device 138 | BATCH_IMAGES: 1 139 | # RPN proposal 140 | CXX_PROPOSAL: false 141 | RPN_NMS_THRESH: 0.7 142 | RPN_PRE_NMS_TOP_N: 6000 143 | RPN_POST_NMS_TOP_N: 300 144 | RPN_MIN_SIZE: 0 145 | # RPN generate proposal 146 | PROPOSAL_NMS_THRESH: 0.7 147 | PROPOSAL_PRE_NMS_TOP_N: 20000 148 | PROPOSAL_POST_NMS_TOP_N: 2000 149 | PROPOSAL_MIN_SIZE: 0 150 | # RCNN nms 151 | NMS: 0.3 152 | test_epoch: 8 153 | max_per_image: 100 154 | 155 | -------------------------------------------------------------------------------- /experiments/faster_rcnn/cfgs/resnet_v1_101_coco_trainval_rcnn_end2end.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rcnn/coco" 4 | symbol: resnet_v1_101_rcnn 5 | gpus: '0,1,2,3' 6 | CLASS_AGNOSTIC: false 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 4 47 | - 8 48 | - 16 49 | - 32 50 | NUM_ANCHORS: 12 51 | dataset: 52 | NUM_CLASSES: 81 53 | dataset: coco 54 | dataset_path: "./data/coco" 55 | image_set: train2014+val2014 56 | root_path: "./data" 57 | test_image_set: test-dev2015 58 | proposal: rpn 59 | TRAIN: 60 | lr: 0.0005 61 | lr_step: '5.333' 62 | warmup: true 63 | warmup_lr: 0.00005 64 | # typically we will use 8000 warmup step for single GPU for COCO 65 | warmup_step: 1000 66 | begin_epoch: 0 67 | end_epoch: 8 68 | model_prefix: 'rcnn_coco' 69 | # whether resume training 70 | RESUME: false 71 | # whether flip image 72 | FLIP: true 73 | # whether shuffle image 74 | SHUFFLE: true 75 | # whether use OHEM 76 | ENABLE_OHEM: false 77 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 78 | BATCH_IMAGES: 1 79 | # e2e changes behavior of anchor loader and metric 80 | END2END: true 81 | # group images with similar aspect ratio 82 | ASPECT_GROUPING: true 83 | # R-CNN 84 | # rcnn rois batch size 85 | BATCH_ROIS: 128 86 | BATCH_ROIS_OHEM: 128 87 | # rcnn rois sampling params 88 | FG_FRACTION: 0.25 89 | FG_THRESH: 0.5 90 | BG_THRESH_HI: 0.5 91 | BG_THRESH_LO: 0.1 92 | # rcnn bounding box regression params 93 | BBOX_REGRESSION_THRESH: 0.5 94 | BBOX_WEIGHTS: 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | - 1.0 99 | 100 | # RPN anchor loader 101 | # rpn anchors batch size 102 | RPN_BATCH_SIZE: 256 103 | # rpn anchors sampling params 104 | RPN_FG_FRACTION: 0.5 105 | RPN_POSITIVE_OVERLAP: 0.7 106 | RPN_NEGATIVE_OVERLAP: 0.3 107 | RPN_CLOBBER_POSITIVES: false 108 | # rpn bounding box regression params 109 | RPN_BBOX_WEIGHTS: 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | - 1.0 114 | RPN_POSITIVE_WEIGHT: -1.0 115 | # used for end2end training 116 | # RPN proposal 117 | CXX_PROPOSAL: false 118 | RPN_NMS_THRESH: 0.7 119 | RPN_PRE_NMS_TOP_N: 6000 120 | RPN_POST_NMS_TOP_N: 300 121 | RPN_MIN_SIZE: 0 122 | # approximate bounding box regression 123 | BBOX_NORMALIZATION_PRECOMPUTED: true 124 | BBOX_MEANS: 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | - 0.0 129 | BBOX_STDS: 130 | - 0.1 131 | - 0.1 132 | - 0.2 133 | - 0.2 134 | TEST: 135 | # use rpn to generate proposal 136 | HAS_RPN: true 137 | # size of images for each device 138 | BATCH_IMAGES: 1 139 | # RPN proposal 140 | CXX_PROPOSAL: false 141 | RPN_NMS_THRESH: 0.7 142 | RPN_PRE_NMS_TOP_N: 6000 143 | RPN_POST_NMS_TOP_N: 300 144 | RPN_MIN_SIZE: 0 145 | # RPN generate proposal 146 | PROPOSAL_NMS_THRESH: 0.7 147 | PROPOSAL_PRE_NMS_TOP_N: 20000 148 | PROPOSAL_POST_NMS_TOP_N: 2000 149 | PROPOSAL_MIN_SIZE: 0 150 | # RCNN nms 151 | NMS: 0.3 152 | test_epoch: 8 153 | max_per_image: 100 154 | 155 | -------------------------------------------------------------------------------- /experiments/faster_rcnn/cfgs/resnet_v1_101_voc0712_rcnn_dcn_end2end.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rcnn/voc" 4 | symbol: resnet_v1_101_rcnn_dcn 5 | gpus: '0,1,2,3' 6 | CLASS_AGNOSTIC: false 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 8 47 | - 16 48 | - 32 49 | NUM_ANCHORS: 9 50 | dataset: 51 | NUM_CLASSES: 21 52 | dataset: PascalVOC 53 | dataset_path: "./data/VOCdevkit" 54 | image_set: 2007_trainval+2012_trainval 55 | root_path: "./data" 56 | test_image_set: 2007_test 57 | proposal: rpn 58 | TRAIN: 59 | lr: 0.0005 60 | lr_step: '4.83' 61 | warmup: true 62 | warmup_lr: 0.00005 63 | # typically we will use 4000 warmup step for single GPU on VOC 64 | warmup_step: 1000 65 | begin_epoch: 0 66 | end_epoch: 7 67 | model_prefix: 'rcnn_voc' 68 | # whether resume training 69 | RESUME: false 70 | # whether flip image 71 | FLIP: true 72 | # whether shuffle image 73 | SHUFFLE: true 74 | # whether use OHEM 75 | ENABLE_OHEM: false 76 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 77 | BATCH_IMAGES: 1 78 | # e2e changes behavior of anchor loader and metric 79 | END2END: true 80 | # group images with similar aspect ratio 81 | ASPECT_GROUPING: true 82 | # R-CNN 83 | # rcnn rois batch size 84 | BATCH_ROIS: 128 85 | BATCH_ROIS_OHEM: 128 86 | # rcnn rois sampling params 87 | FG_FRACTION: 0.25 88 | FG_THRESH: 0.5 89 | BG_THRESH_HI: 0.5 90 | BG_THRESH_LO: 0.1 91 | # rcnn bounding box regression params 92 | BBOX_REGRESSION_THRESH: 0.5 93 | BBOX_WEIGHTS: 94 | - 1.0 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | 99 | # RPN anchor loader 100 | # rpn anchors batch size 101 | RPN_BATCH_SIZE: 256 102 | # rpn anchors sampling params 103 | RPN_FG_FRACTION: 0.5 104 | RPN_POSITIVE_OVERLAP: 0.7 105 | RPN_NEGATIVE_OVERLAP: 0.3 106 | RPN_CLOBBER_POSITIVES: false 107 | # rpn bounding box regression params 108 | RPN_BBOX_WEIGHTS: 109 | - 1.0 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | RPN_POSITIVE_WEIGHT: -1.0 114 | # used for end2end training 115 | # RPN proposal 116 | CXX_PROPOSAL: false 117 | RPN_NMS_THRESH: 0.7 118 | RPN_PRE_NMS_TOP_N: 6000 119 | RPN_POST_NMS_TOP_N: 300 120 | RPN_MIN_SIZE: 0 121 | # approximate bounding box regression 122 | BBOX_NORMALIZATION_PRECOMPUTED: true 123 | BBOX_MEANS: 124 | - 0.0 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | BBOX_STDS: 129 | - 0.1 130 | - 0.1 131 | - 0.2 132 | - 0.2 133 | TEST: 134 | # use rpn to generate proposal 135 | HAS_RPN: true 136 | # size of images for each device 137 | BATCH_IMAGES: 1 138 | # RPN proposal 139 | CXX_PROPOSAL: false 140 | RPN_NMS_THRESH: 0.7 141 | RPN_PRE_NMS_TOP_N: 6000 142 | RPN_POST_NMS_TOP_N: 300 143 | RPN_MIN_SIZE: 0 144 | # RPN generate proposal 145 | PROPOSAL_NMS_THRESH: 0.7 146 | PROPOSAL_PRE_NMS_TOP_N: 20000 147 | PROPOSAL_POST_NMS_TOP_N: 2000 148 | PROPOSAL_MIN_SIZE: 0 149 | # RCNN nms 150 | NMS: 0.3 151 | test_epoch: 7 152 | 153 | -------------------------------------------------------------------------------- /experiments/faster_rcnn/cfgs/resnet_v1_101_voc0712_rcnn_end2end.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rcnn/voc" 4 | symbol: resnet_v1_101_rcnn 5 | gpus: '0,1,2,3' 6 | CLASS_AGNOSTIC: false 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 8 47 | - 16 48 | - 32 49 | NUM_ANCHORS: 9 50 | dataset: 51 | NUM_CLASSES: 21 52 | dataset: PascalVOC 53 | dataset_path: "./data/VOCdevkit" 54 | image_set: 2007_trainval+2012_trainval 55 | root_path: "./data" 56 | test_image_set: 2007_test 57 | proposal: rpn 58 | TRAIN: 59 | lr: 0.0005 60 | lr_step: '4.83' 61 | warmup: true 62 | warmup_lr: 0.00005 63 | # typically we will use 4000 warmup step for single GPU on VOC 64 | warmup_step: 1000 65 | begin_epoch: 0 66 | end_epoch: 7 67 | model_prefix: 'rcnn_voc' 68 | # whether resume training 69 | RESUME: false 70 | # whether flip image 71 | FLIP: true 72 | # whether shuffle image 73 | SHUFFLE: true 74 | # whether use OHEM 75 | ENABLE_OHEM: false 76 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 77 | BATCH_IMAGES: 1 78 | # e2e changes behavior of anchor loader and metric 79 | END2END: true 80 | # group images with similar aspect ratio 81 | ASPECT_GROUPING: true 82 | # R-CNN 83 | # rcnn rois batch size 84 | BATCH_ROIS: 128 85 | BATCH_ROIS_OHEM: 128 86 | # rcnn rois sampling params 87 | FG_FRACTION: 0.25 88 | FG_THRESH: 0.5 89 | BG_THRESH_HI: 0.5 90 | BG_THRESH_LO: 0.1 91 | # rcnn bounding box regression params 92 | BBOX_REGRESSION_THRESH: 0.5 93 | BBOX_WEIGHTS: 94 | - 1.0 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | 99 | # RPN anchor loader 100 | # rpn anchors batch size 101 | RPN_BATCH_SIZE: 256 102 | # rpn anchors sampling params 103 | RPN_FG_FRACTION: 0.5 104 | RPN_POSITIVE_OVERLAP: 0.7 105 | RPN_NEGATIVE_OVERLAP: 0.3 106 | RPN_CLOBBER_POSITIVES: false 107 | # rpn bounding box regression params 108 | RPN_BBOX_WEIGHTS: 109 | - 1.0 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | RPN_POSITIVE_WEIGHT: -1.0 114 | # used for end2end training 115 | # RPN proposal 116 | CXX_PROPOSAL: false 117 | RPN_NMS_THRESH: 0.7 118 | RPN_PRE_NMS_TOP_N: 6000 119 | RPN_POST_NMS_TOP_N: 300 120 | RPN_MIN_SIZE: 0 121 | # approximate bounding box regression 122 | BBOX_NORMALIZATION_PRECOMPUTED: true 123 | BBOX_MEANS: 124 | - 0.0 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | BBOX_STDS: 129 | - 0.1 130 | - 0.1 131 | - 0.2 132 | - 0.2 133 | TEST: 134 | # use rpn to generate proposal 135 | HAS_RPN: true 136 | # size of images for each device 137 | BATCH_IMAGES: 1 138 | # RPN proposal 139 | CXX_PROPOSAL: false 140 | RPN_NMS_THRESH: 0.7 141 | RPN_PRE_NMS_TOP_N: 6000 142 | RPN_POST_NMS_TOP_N: 300 143 | RPN_MIN_SIZE: 0 144 | # RPN generate proposal 145 | PROPOSAL_NMS_THRESH: 0.7 146 | PROPOSAL_PRE_NMS_TOP_N: 20000 147 | PROPOSAL_POST_NMS_TOP_N: 2000 148 | PROPOSAL_MIN_SIZE: 0 149 | # RCNN nms 150 | NMS: 0.3 151 | test_epoch: 7 152 | 153 | -------------------------------------------------------------------------------- /experiments/faster_rcnn/rcnn_end2end_train_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Modified by Guodong Zhang 6 | # -------------------------------------------------------- 7 | import os 8 | import sys 9 | os.environ['PYTHONUNBUFFERED'] = '1' 10 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 11 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 12 | #os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine' 13 | this_dir = os.path.dirname(__file__) 14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'faster_rcnn')) 15 | 16 | import train_end2end 17 | import test 18 | 19 | if __name__ == "__main__": 20 | train_end2end.main() 21 | test.main() 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /experiments/faster_rcnn/rcnn_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Modified by Guodong Zhang 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | os.environ['PYTHONUNBUFFERED'] = '1' 11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 13 | this_dir = os.path.dirname(__file__) 14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'faster_rcnn')) 15 | 16 | import test 17 | 18 | if __name__ == "__main__": 19 | test.main() 20 | -------------------------------------------------------------------------------- /experiments/faster_rcnn/rcnn_train_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Modified by Guodong Zhang 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | os.environ['PYTHONUNBUFFERED'] = '1' 11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 13 | this_dir = os.path.dirname(__file__) 14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'faster_rcnn')) 15 | 16 | import train_rcnn 17 | import test 18 | 19 | if __name__ == "__main__": 20 | train_rcnn.main() 21 | test.main() 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /experiments/rfcn/cfgs/deform_conv_demo.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rfcn" 4 | symbol: deform_conv_demo 5 | gpus: '0' 6 | CLASS_AGNOSTIC: true 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 8 47 | - 16 48 | - 32 49 | NUM_ANCHORS: 9 50 | dataset: 51 | NUM_CLASSES: 21 52 | dataset: PascalVOC 53 | dataset_path: "./data/VOCdevkit" 54 | image_set: 2007_trainval+2012_trainval 55 | root_path: "./data" 56 | test_image_set: 2007_test 57 | proposal: rpn 58 | TRAIN: 59 | lr: 0.0005 60 | lr_step: '4.83' 61 | warmup: true 62 | warmup_lr: 0.00005 63 | # typically we will use 4000 warmup step for single GPU on VOC 64 | warmup_step: 1000 65 | begin_epoch: 0 66 | end_epoch: 8 67 | model_prefix: 'rfcn_voc' 68 | # whether resume training 69 | RESUME: false 70 | # whether flip image 71 | FLIP: true 72 | # whether shuffle image 73 | SHUFFLE: true 74 | # whether use OHEM 75 | ENABLE_OHEM: true 76 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 77 | BATCH_IMAGES: 1 78 | # e2e changes behavior of anchor loader and metric 79 | END2END: true 80 | # group images with similar aspect ratio 81 | ASPECT_GROUPING: true 82 | # R-CNN 83 | # rcnn rois batch size 84 | BATCH_ROIS: -1 85 | BATCH_ROIS_OHEM: 128 86 | # rcnn rois sampling params 87 | FG_FRACTION: 0.25 88 | FG_THRESH: 0.5 89 | BG_THRESH_HI: 0.5 90 | BG_THRESH_LO: 0.0 91 | # rcnn bounding box regression params 92 | BBOX_REGRESSION_THRESH: 0.5 93 | BBOX_WEIGHTS: 94 | - 1.0 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | 99 | # RPN anchor loader 100 | # rpn anchors batch size 101 | RPN_BATCH_SIZE: 256 102 | # rpn anchors sampling params 103 | RPN_FG_FRACTION: 0.5 104 | RPN_POSITIVE_OVERLAP: 0.7 105 | RPN_NEGATIVE_OVERLAP: 0.3 106 | RPN_CLOBBER_POSITIVES: false 107 | # rpn bounding box regression params 108 | RPN_BBOX_WEIGHTS: 109 | - 1.0 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | RPN_POSITIVE_WEIGHT: -1.0 114 | # used for end2end training 115 | # RPN proposal 116 | CXX_PROPOSAL: false 117 | RPN_NMS_THRESH: 0.7 118 | RPN_PRE_NMS_TOP_N: 6000 119 | RPN_POST_NMS_TOP_N: 300 120 | RPN_MIN_SIZE: 0 121 | # approximate bounding box regression 122 | BBOX_NORMALIZATION_PRECOMPUTED: true 123 | BBOX_MEANS: 124 | - 0.0 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | BBOX_STDS: 129 | - 0.1 130 | - 0.1 131 | - 0.2 132 | - 0.2 133 | TEST: 134 | # use rpn to generate proposal 135 | HAS_RPN: true 136 | # size of images for each device 137 | BATCH_IMAGES: 1 138 | # RPN proposal 139 | CXX_PROPOSAL: false 140 | RPN_NMS_THRESH: 0.7 141 | RPN_PRE_NMS_TOP_N: 6000 142 | RPN_POST_NMS_TOP_N: 300 143 | RPN_MIN_SIZE: 0 144 | # RPN generate proposal 145 | PROPOSAL_NMS_THRESH: 0.7 146 | PROPOSAL_PRE_NMS_TOP_N: 20000 147 | PROPOSAL_POST_NMS_TOP_N: 2000 148 | PROPOSAL_MIN_SIZE: 0 149 | # RCNN nms 150 | NMS: 0.3 151 | test_epoch: 7 152 | 153 | -------------------------------------------------------------------------------- /experiments/rfcn/cfgs/deform_psroi_demo.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rfcn" 4 | symbol: deform_psroi_demo 5 | gpus: '0' 6 | CLASS_AGNOSTIC: true 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 8 47 | - 16 48 | - 32 49 | NUM_ANCHORS: 9 50 | dataset: 51 | NUM_CLASSES: 21 52 | dataset: PascalVOC 53 | dataset_path: "./data/VOCdevkit" 54 | image_set: 2007_trainval+2012_trainval 55 | root_path: "./data" 56 | test_image_set: 2007_test 57 | proposal: selective_search 58 | TRAIN: 59 | lr: 0.0005 60 | lr_step: '4.83' 61 | warmup: true 62 | warmup_lr: 0.00005 63 | # typically we will use 4000 warmup step for single GPU on VOC 64 | warmup_step: 1000 65 | begin_epoch: 0 66 | end_epoch: 8 67 | model_prefix: 'rfcn_voc' 68 | # whether resume training 69 | RESUME: false 70 | # whether flip image 71 | FLIP: true 72 | # whether shuffle image 73 | SHUFFLE: true 74 | # whether use OHEM 75 | ENABLE_OHEM: true 76 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 77 | BATCH_IMAGES: 1 78 | # e2e changes behavior of anchor loader and metric 79 | END2END: false 80 | # group images with similar aspect ratio 81 | ASPECT_GROUPING: true 82 | # R-CNN 83 | # rcnn rois batch size 84 | BATCH_ROIS: -1 85 | BATCH_ROIS_OHEM: 128 86 | # rcnn rois sampling params 87 | FG_FRACTION: 0.25 88 | FG_THRESH: 0.5 89 | BG_THRESH_HI: 0.5 90 | BG_THRESH_LO: 0.0 91 | # rcnn bounding box regression params 92 | BBOX_REGRESSION_THRESH: 0.5 93 | BBOX_WEIGHTS: 94 | - 1.0 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | 99 | # RPN anchor loader 100 | # rpn anchors batch size 101 | RPN_BATCH_SIZE: 256 102 | # rpn anchors sampling params 103 | RPN_FG_FRACTION: 0.5 104 | RPN_POSITIVE_OVERLAP: 0.7 105 | RPN_NEGATIVE_OVERLAP: 0.3 106 | RPN_CLOBBER_POSITIVES: false 107 | # rpn bounding box regression params 108 | RPN_BBOX_WEIGHTS: 109 | - 1.0 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | RPN_POSITIVE_WEIGHT: -1.0 114 | # used for end2end training 115 | # RPN proposal 116 | CXX_PROPOSAL: false 117 | RPN_NMS_THRESH: 0.7 118 | RPN_PRE_NMS_TOP_N: 6000 119 | RPN_POST_NMS_TOP_N: 300 120 | RPN_MIN_SIZE: 0 121 | # approximate bounding box regression 122 | BBOX_NORMALIZATION_PRECOMPUTED: true 123 | BBOX_MEANS: 124 | - 0.0 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | BBOX_STDS: 129 | - 0.1 130 | - 0.1 131 | - 0.2 132 | - 0.2 133 | TEST: 134 | # use rpn to generate proposal 135 | HAS_RPN: false 136 | # size of images for each device 137 | BATCH_IMAGES: 1 138 | # RPN proposal 139 | CXX_PROPOSAL: false 140 | RPN_NMS_THRESH: 0.7 141 | RPN_PRE_NMS_TOP_N: 6000 142 | RPN_POST_NMS_TOP_N: 300 143 | RPN_MIN_SIZE: 0 144 | # RPN generate proposal 145 | PROPOSAL_NMS_THRESH: 0.7 146 | PROPOSAL_PRE_NMS_TOP_N: 20000 147 | PROPOSAL_POST_NMS_TOP_N: 2000 148 | PROPOSAL_MIN_SIZE: 0 149 | # RCNN nms 150 | NMS: 0.3 151 | test_epoch: 7 152 | 153 | -------------------------------------------------------------------------------- /experiments/rfcn/cfgs/resnet_v1_101_coco_trainval_rfcn_dcn_end2end_ohem.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rfcn_dcn/coco" 4 | symbol: resnet_v1_101_rfcn_dcn 5 | gpus: '0,1,2,3,4,5,6,7' 6 | CLASS_AGNOSTIC: true 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 4 47 | - 8 48 | - 16 49 | - 32 50 | NUM_ANCHORS: 12 51 | dataset: 52 | NUM_CLASSES: 81 53 | dataset: coco 54 | dataset_path: "./data/coco" 55 | image_set: train2014+val2014 56 | root_path: "./data" 57 | test_image_set: test-dev2015 58 | proposal: rpn 59 | TRAIN: 60 | lr: 0.0005 61 | lr_step: '5.333' 62 | warmup: true 63 | warmup_lr: 0.00005 64 | # typically we will use 8000 warmup step for single GPU for COCO 65 | warmup_step: 1000 66 | begin_epoch: 0 67 | end_epoch: 8 68 | model_prefix: 'rfcn_dcn_coco' 69 | # whether resume training 70 | RESUME: false 71 | # whether flip image 72 | FLIP: true 73 | # whether shuffle image 74 | SHUFFLE: true 75 | # whether use OHEM 76 | ENABLE_OHEM: true 77 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 78 | BATCH_IMAGES: 1 79 | # e2e changes behavior of anchor loader and metric 80 | END2END: true 81 | # group images with similar aspect ratio 82 | ASPECT_GROUPING: true 83 | # R-CNN 84 | # rcnn rois batch size 85 | BATCH_ROIS: -1 86 | BATCH_ROIS_OHEM: 128 87 | # rcnn rois sampling params 88 | FG_FRACTION: 0.25 89 | FG_THRESH: 0.5 90 | BG_THRESH_HI: 0.5 91 | BG_THRESH_LO: 0.0 92 | # rcnn bounding box regression params 93 | BBOX_REGRESSION_THRESH: 0.5 94 | BBOX_WEIGHTS: 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | - 1.0 99 | 100 | # RPN anchor loader 101 | # rpn anchors batch size 102 | RPN_BATCH_SIZE: 256 103 | # rpn anchors sampling params 104 | RPN_FG_FRACTION: 0.5 105 | RPN_POSITIVE_OVERLAP: 0.7 106 | RPN_NEGATIVE_OVERLAP: 0.3 107 | RPN_CLOBBER_POSITIVES: false 108 | # rpn bounding box regression params 109 | RPN_BBOX_WEIGHTS: 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | - 1.0 114 | RPN_POSITIVE_WEIGHT: -1.0 115 | # used for end2end training 116 | # RPN proposal 117 | CXX_PROPOSAL: false 118 | RPN_NMS_THRESH: 0.7 119 | RPN_PRE_NMS_TOP_N: 6000 120 | RPN_POST_NMS_TOP_N: 300 121 | RPN_MIN_SIZE: 0 122 | # approximate bounding box regression 123 | BBOX_NORMALIZATION_PRECOMPUTED: true 124 | BBOX_MEANS: 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | - 0.0 129 | BBOX_STDS: 130 | - 0.1 131 | - 0.1 132 | - 0.2 133 | - 0.2 134 | TEST: 135 | # use rpn to generate proposal 136 | HAS_RPN: true 137 | # size of images for each device 138 | BATCH_IMAGES: 1 139 | # RPN proposal 140 | CXX_PROPOSAL: false 141 | RPN_NMS_THRESH: 0.7 142 | RPN_PRE_NMS_TOP_N: 6000 143 | RPN_POST_NMS_TOP_N: 300 144 | RPN_MIN_SIZE: 0 145 | # RPN generate proposal 146 | PROPOSAL_NMS_THRESH: 0.7 147 | PROPOSAL_PRE_NMS_TOP_N: 20000 148 | PROPOSAL_POST_NMS_TOP_N: 2000 149 | PROPOSAL_MIN_SIZE: 0 150 | # RCNN nms 151 | NMS: 0.3 152 | test_epoch: 8 153 | max_per_image: 100 154 | 155 | -------------------------------------------------------------------------------- /experiments/rfcn/cfgs/resnet_v1_101_coco_trainval_rfcn_end2end_ohem.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rfcn/coco" 4 | symbol: resnet_v1_101_rfcn 5 | gpus: '0,1,2,3,4,5,6,7' 6 | CLASS_AGNOSTIC: true 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 4 47 | - 8 48 | - 16 49 | - 32 50 | NUM_ANCHORS: 12 51 | dataset: 52 | NUM_CLASSES: 81 53 | dataset: coco 54 | dataset_path: "./data/coco" 55 | image_set: train2014+val2014 56 | root_path: "./data" 57 | test_image_set: test-dev2015 58 | proposal: rpn 59 | TRAIN: 60 | lr: 0.0005 61 | lr_step: '5.333' 62 | warmup: true 63 | warmup_lr: 0.00005 64 | # typically we will use 8000 warmup step for single GPU for COCO 65 | warmup_step: 1000 66 | begin_epoch: 0 67 | end_epoch: 8 68 | model_prefix: 'rfcn_coco' 69 | # whether resume training 70 | RESUME: false 71 | # whether flip image 72 | FLIP: true 73 | # whether shuffle image 74 | SHUFFLE: true 75 | # whether use OHEM 76 | ENABLE_OHEM: true 77 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 78 | BATCH_IMAGES: 1 79 | # e2e changes behavior of anchor loader and metric 80 | END2END: true 81 | # group images with similar aspect ratio 82 | ASPECT_GROUPING: true 83 | # R-CNN 84 | # rcnn rois batch size 85 | BATCH_ROIS: -1 86 | BATCH_ROIS_OHEM: 128 87 | # rcnn rois sampling params 88 | FG_FRACTION: 0.25 89 | FG_THRESH: 0.5 90 | BG_THRESH_HI: 0.5 91 | BG_THRESH_LO: 0.0 92 | # rcnn bounding box regression params 93 | BBOX_REGRESSION_THRESH: 0.5 94 | BBOX_WEIGHTS: 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | - 1.0 99 | 100 | # RPN anchor loader 101 | # rpn anchors batch size 102 | RPN_BATCH_SIZE: 256 103 | # rpn anchors sampling params 104 | RPN_FG_FRACTION: 0.5 105 | RPN_POSITIVE_OVERLAP: 0.7 106 | RPN_NEGATIVE_OVERLAP: 0.3 107 | RPN_CLOBBER_POSITIVES: false 108 | # rpn bounding box regression params 109 | RPN_BBOX_WEIGHTS: 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | - 1.0 114 | RPN_POSITIVE_WEIGHT: -1.0 115 | # used for end2end training 116 | # RPN proposal 117 | CXX_PROPOSAL: false 118 | RPN_NMS_THRESH: 0.7 119 | RPN_PRE_NMS_TOP_N: 6000 120 | RPN_POST_NMS_TOP_N: 300 121 | RPN_MIN_SIZE: 0 122 | # approximate bounding box regression 123 | BBOX_NORMALIZATION_PRECOMPUTED: true 124 | BBOX_MEANS: 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | - 0.0 129 | BBOX_STDS: 130 | - 0.1 131 | - 0.1 132 | - 0.2 133 | - 0.2 134 | TEST: 135 | # use rpn to generate proposal 136 | HAS_RPN: true 137 | # size of images for each device 138 | BATCH_IMAGES: 1 139 | # RPN proposal 140 | CXX_PROPOSAL: false 141 | RPN_NMS_THRESH: 0.7 142 | RPN_PRE_NMS_TOP_N: 6000 143 | RPN_POST_NMS_TOP_N: 300 144 | RPN_MIN_SIZE: 0 145 | # RPN generate proposal 146 | PROPOSAL_NMS_THRESH: 0.7 147 | PROPOSAL_PRE_NMS_TOP_N: 20000 148 | PROPOSAL_POST_NMS_TOP_N: 2000 149 | PROPOSAL_MIN_SIZE: 0 150 | # RCNN nms 151 | NMS: 0.3 152 | test_epoch: 8 153 | max_per_image: 100 154 | 155 | -------------------------------------------------------------------------------- /experiments/rfcn/cfgs/resnet_v1_101_voc0712_rfcn_dcn_end2end_ohem.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rfcn_dcn/voc" 4 | symbol: resnet_v1_101_rfcn_dcn 5 | gpus: '0,1,2,3' 6 | CLASS_AGNOSTIC: true 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 8 47 | - 16 48 | - 32 49 | NUM_ANCHORS: 9 50 | dataset: 51 | NUM_CLASSES: 21 52 | dataset: PascalVOC 53 | dataset_path: "./data/VOCdevkit" 54 | image_set: 2007_trainval+2012_trainval 55 | root_path: "./data" 56 | test_image_set: 2007_test 57 | proposal: rpn 58 | TRAIN: 59 | lr: 0.0005 60 | lr_step: '4.83' 61 | warmup: true 62 | warmup_lr: 0.00005 63 | # typically we will use 4000 warmup step for single GPU on VOC 64 | warmup_step: 1000 65 | begin_epoch: 0 66 | end_epoch: 7 67 | model_prefix: 'rfcn_voc' 68 | # whether resume training 69 | RESUME: false 70 | # whether flip image 71 | FLIP: true 72 | # whether shuffle image 73 | SHUFFLE: true 74 | # whether use OHEM 75 | ENABLE_OHEM: true 76 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 77 | BATCH_IMAGES: 1 78 | # e2e changes behavior of anchor loader and metric 79 | END2END: true 80 | # group images with similar aspect ratio 81 | ASPECT_GROUPING: true 82 | # R-CNN 83 | # rcnn rois batch size 84 | BATCH_ROIS: -1 85 | BATCH_ROIS_OHEM: 128 86 | # rcnn rois sampling params 87 | FG_FRACTION: 0.25 88 | FG_THRESH: 0.5 89 | BG_THRESH_HI: 0.5 90 | BG_THRESH_LO: 0.0 91 | # rcnn bounding box regression params 92 | BBOX_REGRESSION_THRESH: 0.5 93 | BBOX_WEIGHTS: 94 | - 1.0 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | 99 | # RPN anchor loader 100 | # rpn anchors batch size 101 | RPN_BATCH_SIZE: 256 102 | # rpn anchors sampling params 103 | RPN_FG_FRACTION: 0.5 104 | RPN_POSITIVE_OVERLAP: 0.7 105 | RPN_NEGATIVE_OVERLAP: 0.3 106 | RPN_CLOBBER_POSITIVES: false 107 | # rpn bounding box regression params 108 | RPN_BBOX_WEIGHTS: 109 | - 1.0 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | RPN_POSITIVE_WEIGHT: -1.0 114 | # used for end2end training 115 | # RPN proposal 116 | CXX_PROPOSAL: false 117 | RPN_NMS_THRESH: 0.7 118 | RPN_PRE_NMS_TOP_N: 6000 119 | RPN_POST_NMS_TOP_N: 300 120 | RPN_MIN_SIZE: 0 121 | # approximate bounding box regression 122 | BBOX_NORMALIZATION_PRECOMPUTED: true 123 | BBOX_MEANS: 124 | - 0.0 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | BBOX_STDS: 129 | - 0.1 130 | - 0.1 131 | - 0.2 132 | - 0.2 133 | TEST: 134 | # use rpn to generate proposal 135 | HAS_RPN: true 136 | # size of images for each device 137 | BATCH_IMAGES: 1 138 | # RPN proposal 139 | CXX_PROPOSAL: false 140 | RPN_NMS_THRESH: 0.7 141 | RPN_PRE_NMS_TOP_N: 6000 142 | RPN_POST_NMS_TOP_N: 300 143 | RPN_MIN_SIZE: 0 144 | # RPN generate proposal 145 | PROPOSAL_NMS_THRESH: 0.7 146 | PROPOSAL_PRE_NMS_TOP_N: 20000 147 | PROPOSAL_POST_NMS_TOP_N: 2000 148 | PROPOSAL_MIN_SIZE: 0 149 | # RCNN nms 150 | NMS: 0.3 151 | test_epoch: 7 152 | 153 | -------------------------------------------------------------------------------- /experiments/rfcn/cfgs/resnet_v1_101_voc0712_rfcn_end2end_ohem.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rfcn/voc" 4 | symbol: resnet_v1_101_rfcn 5 | gpus: '0,1,2,3' 6 | CLASS_AGNOSTIC: true 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 100 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 8 47 | - 16 48 | - 32 49 | NUM_ANCHORS: 9 50 | dataset: 51 | NUM_CLASSES: 21 52 | dataset: PascalVOC 53 | dataset_path: "./data/VOCdevkit" 54 | image_set: 2007_trainval+2012_trainval 55 | root_path: "./data" 56 | test_image_set: 2007_test 57 | proposal: rpn 58 | TRAIN: 59 | lr: 0.0005 60 | lr_step: '4.83' 61 | warmup: true 62 | warmup_lr: 0.00005 63 | # typically we will use 4000 warmup step for single GPU on VOC 64 | warmup_step: 1000 65 | begin_epoch: 0 66 | end_epoch: 7 67 | model_prefix: 'rfcn_voc' 68 | # whether resume training 69 | RESUME: false 70 | # whether flip image 71 | FLIP: true 72 | # whether shuffle image 73 | SHUFFLE: true 74 | # whether use OHEM 75 | ENABLE_OHEM: true 76 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 77 | BATCH_IMAGES: 1 78 | # e2e changes behavior of anchor loader and metric 79 | END2END: true 80 | # group images with similar aspect ratio 81 | ASPECT_GROUPING: true 82 | # R-CNN 83 | # rcnn rois batch size 84 | BATCH_ROIS: -1 85 | BATCH_ROIS_OHEM: 128 86 | # rcnn rois sampling params 87 | FG_FRACTION: 0.25 88 | FG_THRESH: 0.5 89 | BG_THRESH_HI: 0.5 90 | BG_THRESH_LO: 0.0 91 | # rcnn bounding box regression params 92 | BBOX_REGRESSION_THRESH: 0.5 93 | BBOX_WEIGHTS: 94 | - 1.0 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | 99 | # RPN anchor loader 100 | # rpn anchors batch size 101 | RPN_BATCH_SIZE: 256 102 | # rpn anchors sampling params 103 | RPN_FG_FRACTION: 0.5 104 | RPN_POSITIVE_OVERLAP: 0.7 105 | RPN_NEGATIVE_OVERLAP: 0.3 106 | RPN_CLOBBER_POSITIVES: false 107 | # rpn bounding box regression params 108 | RPN_BBOX_WEIGHTS: 109 | - 1.0 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | RPN_POSITIVE_WEIGHT: -1.0 114 | # used for end2end training 115 | # RPN proposal 116 | CXX_PROPOSAL: false 117 | RPN_NMS_THRESH: 0.7 118 | RPN_PRE_NMS_TOP_N: 6000 119 | RPN_POST_NMS_TOP_N: 300 120 | RPN_MIN_SIZE: 0 121 | # approximate bounding box regression 122 | BBOX_NORMALIZATION_PRECOMPUTED: true 123 | BBOX_MEANS: 124 | - 0.0 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | BBOX_STDS: 129 | - 0.1 130 | - 0.1 131 | - 0.2 132 | - 0.2 133 | TEST: 134 | # use rpn to generate proposal 135 | HAS_RPN: true 136 | # size of images for each device 137 | BATCH_IMAGES: 1 138 | # RPN proposal 139 | CXX_PROPOSAL: false 140 | RPN_NMS_THRESH: 0.7 141 | RPN_PRE_NMS_TOP_N: 6000 142 | RPN_POST_NMS_TOP_N: 300 143 | RPN_MIN_SIZE: 0 144 | # RPN generate proposal 145 | PROPOSAL_NMS_THRESH: 0.7 146 | PROPOSAL_PRE_NMS_TOP_N: 20000 147 | PROPOSAL_POST_NMS_TOP_N: 2000 148 | PROPOSAL_MIN_SIZE: 0 149 | # RCNN nms 150 | NMS: 0.3 151 | test_epoch: 7 152 | 153 | -------------------------------------------------------------------------------- /experiments/rfcn/cfgs/rfcn_coco_demo.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/rfcn" 4 | symbol: resnet_v1_101_rfcn 5 | gpus: '0' 6 | CLASS_AGNOSTIC: true 7 | SCALES: 8 | - 600 9 | - 1000 10 | default: 11 | frequent: 20 12 | kvstore: device 13 | network: 14 | pretrained: "./model/pretrained_model/resnet_v1_101" 15 | pretrained_epoch: 0 16 | PIXEL_MEANS: 17 | - 103.06 18 | - 115.90 19 | - 123.15 20 | IMAGE_STRIDE: 0 21 | RCNN_FEAT_STRIDE: 16 22 | RPN_FEAT_STRIDE: 16 23 | FIXED_PARAMS: 24 | - conv1 25 | - bn_conv1 26 | - res2 27 | - bn2 28 | - gamma 29 | - beta 30 | FIXED_PARAMS_SHARED: 31 | - conv1 32 | - bn_conv1 33 | - res2 34 | - bn2 35 | - res3 36 | - bn3 37 | - res4 38 | - bn4 39 | - gamma 40 | - beta 41 | ANCHOR_RATIOS: 42 | - 0.5 43 | - 1 44 | - 2 45 | ANCHOR_SCALES: 46 | - 4 47 | - 8 48 | - 16 49 | - 32 50 | NUM_ANCHORS: 12 51 | dataset: 52 | NUM_CLASSES: 81 53 | dataset: coco 54 | dataset_path: "./data/coco" 55 | image_set: train2014+val2014 56 | root_path: "./data" 57 | test_image_set: test-dev2015 58 | proposal: rpn 59 | TRAIN: 60 | lr: 0.0005 61 | lr_step: '5.333' 62 | warmup: false 63 | warmup_lr: 0.00005 64 | # typically we will use 4000 warmup step for single GPU 65 | warmup_step: 1000 66 | begin_epoch: 5 67 | end_epoch: 8 68 | model_prefix: 'e2e' 69 | # whether resume training 70 | RESUME: true 71 | # whether flip image 72 | FLIP: true 73 | # whether shuffle image 74 | SHUFFLE: true 75 | # whether use OHEM 76 | ENABLE_OHEM: true 77 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 78 | BATCH_IMAGES: 1 79 | # e2e changes behavior of anchor loader and metric 80 | END2END: true 81 | # group images with similar aspect ratio 82 | ASPECT_GROUPING: true 83 | # R-CNN 84 | # rcnn rois batch size 85 | BATCH_ROIS: -1 86 | BATCH_ROIS_OHEM: 128 87 | # rcnn rois sampling params 88 | FG_FRACTION: 0.25 89 | FG_THRESH: 0.5 90 | BG_THRESH_HI: 0.5 91 | BG_THRESH_LO: 0.0 92 | # rcnn bounding box regression params 93 | BBOX_REGRESSION_THRESH: 0.5 94 | BBOX_WEIGHTS: 95 | - 1.0 96 | - 1.0 97 | - 1.0 98 | - 1.0 99 | 100 | # RPN anchor loader 101 | # rpn anchors batch size 102 | RPN_BATCH_SIZE: 256 103 | # rpn anchors sampling params 104 | RPN_FG_FRACTION: 0.5 105 | RPN_POSITIVE_OVERLAP: 0.7 106 | RPN_NEGATIVE_OVERLAP: 0.3 107 | RPN_CLOBBER_POSITIVES: false 108 | # rpn bounding box regression params 109 | RPN_BBOX_WEIGHTS: 110 | - 1.0 111 | - 1.0 112 | - 1.0 113 | - 1.0 114 | RPN_POSITIVE_WEIGHT: -1.0 115 | # used for end2end training 116 | # RPN proposal 117 | CXX_PROPOSAL: false 118 | RPN_NMS_THRESH: 0.7 119 | RPN_PRE_NMS_TOP_N: 6000 120 | RPN_POST_NMS_TOP_N: 300 121 | RPN_MIN_SIZE: 0 122 | # approximate bounding box regression 123 | BBOX_NORMALIZATION_PRECOMPUTED: true 124 | BBOX_MEANS: 125 | - 0.0 126 | - 0.0 127 | - 0.0 128 | - 0.0 129 | BBOX_STDS: 130 | - 0.1 131 | - 0.1 132 | - 0.2 133 | - 0.2 134 | TEST: 135 | # use rpn to generate proposal 136 | HAS_RPN: true 137 | # size of images for each device 138 | BATCH_IMAGES: 1 139 | # RPN proposal 140 | CXX_PROPOSAL: false 141 | RPN_NMS_THRESH: 0.7 142 | RPN_PRE_NMS_TOP_N: 6000 143 | RPN_POST_NMS_TOP_N: 300 144 | RPN_MIN_SIZE: 0 145 | # RPN generate proposal 146 | PROPOSAL_NMS_THRESH: 0.7 147 | PROPOSAL_PRE_NMS_TOP_N: 20000 148 | PROPOSAL_POST_NMS_TOP_N: 2000 149 | PROPOSAL_MIN_SIZE: 0 150 | # RCNN nms 151 | NMS: 0.3 152 | test_epoch: 8 153 | max_per_image: 100 154 | 155 | -------------------------------------------------------------------------------- /experiments/rfcn/rfcn_alternate_train_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | os.environ['PYTHONUNBUFFERED'] = '1' 11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 13 | this_dir = os.path.dirname(__file__) 14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'rfcn')) 15 | 16 | import train_alternate 17 | import test 18 | 19 | if __name__ == "__main__": 20 | train_alternate.main() 21 | test.main() 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /experiments/rfcn/rfcn_end2end_train_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | import os 8 | import sys 9 | os.environ['PYTHONUNBUFFERED'] = '1' 10 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 11 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 12 | this_dir = os.path.dirname(__file__) 13 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'rfcn')) 14 | 15 | import train_end2end 16 | import test 17 | 18 | if __name__ == "__main__": 19 | train_end2end.main() 20 | test.main() 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /experiments/rfcn/rfcn_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | os.environ['PYTHONUNBUFFERED'] = '1' 11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 13 | this_dir = os.path.dirname(__file__) 14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'rfcn')) 15 | 16 | import test 17 | 18 | if __name__ == "__main__": 19 | test.main() 20 | -------------------------------------------------------------------------------- /experiments/rfcn/rfcn_train_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | os.environ['PYTHONUNBUFFERED'] = '1' 11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 13 | this_dir = os.path.dirname(__file__) 14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'rfcn')) 15 | 16 | import train_rfcn 17 | import test 18 | 19 | if __name__ == "__main__": 20 | train_rfcn.main() 21 | test.main() 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p ./data 4 | mkdir -p ./output 5 | mkdir -p ./external/mxnet 6 | mkdir -p ./model/pretrained_model 7 | 8 | cd lib/bbox 9 | python setup_linux.py build_ext --inplace 10 | cd ../dataset/pycocotools 11 | python setup_linux.py build_ext --inplace 12 | cd ../../nms 13 | python setup_linux.py build_ext --inplace 14 | cd ../.. 15 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd nms/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 3 | cd bbox/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 4 | cd dataset/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 5 | clean: 6 | cd nms/; rm *.so *.c *.cpp; cd ../../ 7 | cd bbox/; rm *.so *.c *.cpp; cd ../../ 8 | cd dataset/pycocotools/; rm *.so; cd ../../ 9 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/__init__.py -------------------------------------------------------------------------------- /lib/bbox/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp -------------------------------------------------------------------------------- /lib/bbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/bbox/__init__.py -------------------------------------------------------------------------------- /lib/bbox/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Sergey Karayev 7 | # Modified by Yuwen Xiong, from from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 8 | # -------------------------------------------------------- 9 | 10 | cimport cython 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | DTYPE = np.float 15 | ctypedef np.float_t DTYPE_t 16 | 17 | def bbox_overlaps_cython( 18 | np.ndarray[DTYPE_t, ndim=2] boxes, 19 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 20 | """ 21 | Parameters 22 | ---------- 23 | boxes: (N, 4) ndarray of float 24 | query_boxes: (K, 4) ndarray of float 25 | Returns 26 | ------- 27 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 28 | """ 29 | cdef unsigned int N = boxes.shape[0] 30 | cdef unsigned int K = query_boxes.shape[0] 31 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 32 | cdef DTYPE_t iw, ih, box_area 33 | cdef DTYPE_t ua 34 | cdef unsigned int k, n 35 | for k in range(K): 36 | box_area = ( 37 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 38 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 39 | ) 40 | for n in range(N): 41 | iw = ( 42 | min(boxes[n, 2], query_boxes[k, 2]) - 43 | max(boxes[n, 0], query_boxes[k, 0]) + 1 44 | ) 45 | if iw > 0: 46 | ih = ( 47 | min(boxes[n, 3], query_boxes[k, 3]) - 48 | max(boxes[n, 1], query_boxes[k, 1]) + 1 49 | ) 50 | if ih > 0: 51 | ua = float( 52 | (boxes[n, 2] - boxes[n, 0] + 1) * 53 | (boxes[n, 3] - boxes[n, 1] + 1) + 54 | box_area - iw * ih 55 | ) 56 | overlaps[n, k] = iw * ih / ua 57 | return overlaps 58 | -------------------------------------------------------------------------------- /lib/bbox/bbox_regression.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Modified by Yuwen Xiong, from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 7 | # -------------------------------------------------------- 8 | 9 | 10 | """ 11 | This file has functions about generating bounding box regression targets 12 | """ 13 | 14 | import numpy as np 15 | 16 | from bbox_transform import bbox_overlaps, bbox_transform 17 | 18 | 19 | def compute_bbox_regression_targets(rois, overlaps, labels, cfg): 20 | """ 21 | given rois, overlaps, gt labels, compute bounding box regression targets 22 | :param rois: roidb[i]['boxes'] k * 4 23 | :param overlaps: roidb[i]['max_overlaps'] k * 1 24 | :param labels: roidb[i]['max_classes'] k * 1 25 | :return: targets[i][class, dx, dy, dw, dh] k * 5 26 | """ 27 | # Ensure ROIs are floats 28 | rois = rois.astype(np.float, copy=False) 29 | 30 | # Sanity check 31 | if len(rois) != len(overlaps): 32 | print 'bbox regression: this should not happen' 33 | 34 | # Indices of ground-truth ROIs 35 | gt_inds = np.where(overlaps == 1)[0] 36 | if len(gt_inds) == 0: 37 | print 'something wrong : zero ground truth rois' 38 | # Indices of examples for which we try to make predictions 39 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_REGRESSION_THRESH)[0] 40 | 41 | # Get IoU overlap between each ex ROI and gt ROI 42 | ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) 43 | 44 | # Find which gt ROI each ex ROI has max overlap with: 45 | # this will be the ex ROI's gt target 46 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 47 | gt_rois = rois[gt_inds[gt_assignment], :] 48 | ex_rois = rois[ex_inds, :] 49 | 50 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 51 | targets[ex_inds, 0] = labels[ex_inds] 52 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 53 | return targets 54 | 55 | 56 | def add_bbox_regression_targets(roidb, cfg): 57 | """ 58 | given roidb, add ['bbox_targets'] and normalize bounding box regression targets 59 | :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb 60 | :return: means, std variances of targets 61 | """ 62 | print 'add bounding box regression targets' 63 | assert len(roidb) > 0 64 | assert 'max_classes' in roidb[0] 65 | 66 | num_images = len(roidb) 67 | num_classes = 2 if cfg.CLASS_AGNOSTIC else roidb[0]['gt_overlaps'].shape[1] 68 | 69 | for im_i in range(num_images): 70 | rois = roidb[im_i]['boxes'] 71 | max_overlaps = roidb[im_i]['max_overlaps'] 72 | max_classes = roidb[im_i]['max_classes'] 73 | roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes, cfg) 74 | 75 | if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: 76 | # use fixed / precomputed means and stds instead of empirical values 77 | means = np.tile(np.array(cfg.TRAIN.BBOX_MEANS), (num_classes, 1)) 78 | stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS), (num_classes, 1)) 79 | else: 80 | # compute mean, std values 81 | class_counts = np.zeros((num_classes, 1)) + 1e-14 82 | sums = np.zeros((num_classes, 4)) 83 | squared_sums = np.zeros((num_classes, 4)) 84 | for im_i in range(num_images): 85 | targets = roidb[im_i]['bbox_targets'] 86 | for cls in range(1, num_classes): 87 | cls_indexes = np.where(targets[:, 0] > 0)[0] if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0] 88 | if cls_indexes.size > 0: 89 | class_counts[cls] += cls_indexes.size 90 | sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0) 91 | squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0) 92 | 93 | means = sums / class_counts 94 | # var(x) = E(x^2) - E(x)^2 95 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 96 | 97 | print 'bbox target means:' 98 | print means 99 | print means[1:, :].mean(axis=0) # ignore bg class 100 | print 'bbox target stdevs:' 101 | print stds 102 | print stds[1:, :].mean(axis=0) # ignore bg class 103 | 104 | 105 | # normalized targets 106 | for im_i in range(num_images): 107 | targets = roidb[im_i]['bbox_targets'] 108 | for cls in range(1, num_classes): 109 | cls_indexes = np.where(targets[:, 0] > 0) if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0] 110 | roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :] 111 | roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :] 112 | 113 | return means.ravel(), stds.ravel() 114 | 115 | 116 | def expand_bbox_regression_targets(bbox_targets_data, num_classes, cfg): 117 | """ 118 | expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets 119 | :param bbox_targets_data: [k * 5] 120 | :param num_classes: number of classes 121 | :return: bbox target processed [k * 4 num_classes] 122 | bbox_weights ! only foreground boxes have bbox regression computation! 123 | """ 124 | classes = bbox_targets_data[:, 0] 125 | if cfg.CLASS_AGNOSTIC: 126 | num_classes = 2 127 | bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32) 128 | bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 129 | indexes = np.where(classes > 0)[0] 130 | for index in indexes: 131 | cls = classes[index] 132 | start = int(4 * 1 if cls > 0 else 0) if cfg.CLASS_AGNOSTIC else int(4 * cls) 133 | end = start + 4 134 | bbox_targets[index, start:end] = bbox_targets_data[index, 1:] 135 | bbox_weights[index, start:end] = cfg.TRAIN.BBOX_WEIGHTS 136 | return bbox_targets, bbox_weights 137 | 138 | -------------------------------------------------------------------------------- /lib/bbox/bbox_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from bbox import bbox_overlaps_cython 3 | 4 | 5 | def bbox_overlaps(boxes, query_boxes): 6 | return bbox_overlaps_cython(boxes, query_boxes) 7 | 8 | 9 | def bbox_overlaps_py(boxes, query_boxes): 10 | """ 11 | determine overlaps between boxes and query_boxes 12 | :param boxes: n * 4 bounding boxes 13 | :param query_boxes: k * 4 bounding boxes 14 | :return: overlaps: n * k overlaps 15 | """ 16 | n_ = boxes.shape[0] 17 | k_ = query_boxes.shape[0] 18 | overlaps = np.zeros((n_, k_), dtype=np.float) 19 | for k in range(k_): 20 | query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) 21 | for n in range(n_): 22 | iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 23 | if iw > 0: 24 | ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 25 | if ih > 0: 26 | box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) 27 | all_area = float(box_area + query_box_area - iw * ih) 28 | overlaps[n, k] = iw * ih / all_area 29 | return overlaps 30 | 31 | 32 | def clip_boxes(boxes, im_shape): 33 | """ 34 | Clip boxes to image boundaries. 35 | :param boxes: [N, 4* num_classes] 36 | :param im_shape: tuple of 2 37 | :return: [N, 4* num_classes] 38 | """ 39 | # x1 >= 0 40 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 41 | # y1 >= 0 42 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 43 | # x2 < im_shape[1] 44 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 45 | # y2 < im_shape[0] 46 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 47 | return boxes 48 | 49 | def filter_boxes(boxes, min_size): 50 | """ 51 | filter small boxes. 52 | :param boxes: [N, 4* num_classes] 53 | :param min_size: 54 | :return: keep: 55 | """ 56 | ws = boxes[:, 2] - boxes[:, 0] + 1 57 | hs = boxes[:, 3] - boxes[:, 1] + 1 58 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 59 | return keep 60 | 61 | def nonlinear_transform(ex_rois, gt_rois): 62 | """ 63 | compute bounding box regression targets from ex_rois to gt_rois 64 | :param ex_rois: [N, 4] 65 | :param gt_rois: [N, 4] 66 | :return: [N, 4] 67 | """ 68 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 69 | 70 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 71 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 72 | ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) 73 | ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) 74 | 75 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 76 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 77 | gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) 78 | gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) 79 | 80 | targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) 81 | targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) 82 | targets_dw = np.log(gt_widths / ex_widths) 83 | targets_dh = np.log(gt_heights / ex_heights) 84 | 85 | targets = np.vstack( 86 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 87 | return targets 88 | 89 | 90 | def nonlinear_pred(boxes, box_deltas): 91 | """ 92 | Transform the set of class-agnostic boxes into class-specific boxes 93 | by applying the predicted offsets (box_deltas) 94 | :param boxes: !important [N 4] 95 | :param box_deltas: [N, 4 * num_classes] 96 | :return: [N 4 * num_classes] 97 | """ 98 | if boxes.shape[0] == 0: 99 | return np.zeros((0, box_deltas.shape[1])) 100 | 101 | boxes = boxes.astype(np.float, copy=False) 102 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 103 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 104 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 105 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 106 | 107 | dx = box_deltas[:, 0::4] 108 | dy = box_deltas[:, 1::4] 109 | dw = box_deltas[:, 2::4] 110 | dh = box_deltas[:, 3::4] 111 | 112 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 113 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 114 | pred_w = np.exp(dw) * widths[:, np.newaxis] 115 | pred_h = np.exp(dh) * heights[:, np.newaxis] 116 | 117 | pred_boxes = np.zeros(box_deltas.shape) 118 | # x1 119 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) 120 | # y1 121 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) 122 | # x2 123 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) 124 | # y2 125 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) 126 | 127 | return pred_boxes 128 | 129 | 130 | def iou_transform(ex_rois, gt_rois): 131 | """ return bbox targets, IoU loss uses gt_rois as gt """ 132 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 133 | return gt_rois 134 | 135 | 136 | def iou_pred(boxes, box_deltas): 137 | """ 138 | Transform the set of class-agnostic boxes into class-specific boxes 139 | by applying the predicted offsets (box_deltas) 140 | :param boxes: !important [N 4] 141 | :param box_deltas: [N, 4 * num_classes] 142 | :return: [N 4 * num_classes] 143 | """ 144 | if boxes.shape[0] == 0: 145 | return np.zeros((0, box_deltas.shape[1])) 146 | 147 | boxes = boxes.astype(np.float, copy=False) 148 | x1 = boxes[:, 0] 149 | y1 = boxes[:, 1] 150 | x2 = boxes[:, 2] 151 | y2 = boxes[:, 3] 152 | 153 | dx1 = box_deltas[:, 0::4] 154 | dy1 = box_deltas[:, 1::4] 155 | dx2 = box_deltas[:, 2::4] 156 | dy2 = box_deltas[:, 3::4] 157 | 158 | pred_boxes = np.zeros(box_deltas.shape) 159 | # x1 160 | pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis] 161 | # y1 162 | pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis] 163 | # x2 164 | pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis] 165 | # y2 166 | pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis] 167 | 168 | return pred_boxes 169 | 170 | 171 | # define bbox_transform and bbox_pred 172 | bbox_transform = nonlinear_transform 173 | bbox_pred = nonlinear_pred 174 | -------------------------------------------------------------------------------- /lib/bbox/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 7 | # -------------------------------------------------------- 8 | 9 | 10 | import os 11 | from os.path import join as pjoin 12 | from setuptools import setup 13 | from distutils.extension import Extension 14 | from Cython.Distutils import build_ext 15 | import numpy as np 16 | 17 | # Obtain the numpy include directory. This logic works across numpy versions. 18 | try: 19 | numpy_include = np.get_include() 20 | except AttributeError: 21 | numpy_include = np.get_numpy_include() 22 | 23 | 24 | def customize_compiler_for_nvcc(self): 25 | """inject deep into distutils to customize how the dispatch 26 | to gcc/nvcc works. 27 | If you subclass UnixCCompiler, it's not trivial to get your subclass 28 | injected in, and still have the right customizations (i.e. 29 | distutils.sysconfig.customize_compiler) run on it. So instead of going 30 | the OO route, I have this. Note, it's kindof like a wierd functional 31 | subclassing going on.""" 32 | 33 | # tell the compiler it can processes .cu 34 | self.src_extensions.append('.cu') 35 | 36 | # save references to the default compiler_so and _comple methods 37 | default_compiler_so = self.compiler_so 38 | super = self._compile 39 | 40 | # now redefine the _compile method. This gets executed for each 41 | # object but distutils doesn't have the ability to change compilers 42 | # based on source extension: we add it. 43 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 44 | if os.path.splitext(src)[1] == '.cu': 45 | # use the cuda for .cu files 46 | self.set_executable('compiler_so', CUDA['nvcc']) 47 | # use only a subset of the extra_postargs, which are 1-1 translated 48 | # from the extra_compile_args in the Extension class 49 | postargs = extra_postargs['nvcc'] 50 | else: 51 | postargs = extra_postargs['gcc'] 52 | 53 | super(obj, src, ext, cc_args, postargs, pp_opts) 54 | # reset the default compiler_so, which we might have changed for cuda 55 | self.compiler_so = default_compiler_so 56 | 57 | # inject our redefined _compile method into the class 58 | self._compile = _compile 59 | 60 | 61 | # run the customize_compiler 62 | class custom_build_ext(build_ext): 63 | def build_extensions(self): 64 | customize_compiler_for_nvcc(self.compiler) 65 | build_ext.build_extensions(self) 66 | 67 | 68 | ext_modules = [ 69 | Extension( 70 | "bbox", 71 | ["bbox.pyx"], 72 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 73 | include_dirs=[numpy_include] 74 | ), 75 | ] 76 | 77 | setup( 78 | name='bbox_cython', 79 | ext_modules=ext_modules, 80 | # inject our custom trigger 81 | cmdclass={'build_ext': custom_build_ext}, 82 | ) 83 | -------------------------------------------------------------------------------- /lib/bbox/setup_windows.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 7 | # -------------------------------------------------------- 8 | 9 | import numpy as np 10 | import os 11 | from os.path import join as pjoin 12 | #from distutils.core import setup 13 | from setuptools import setup 14 | from distutils.extension import Extension 15 | from Cython.Distutils import build_ext 16 | import subprocess 17 | 18 | #change for windows, by MrX 19 | nvcc_bin = 'nvcc.exe' 20 | lib_dir = 'lib/x64' 21 | 22 | import distutils.msvc9compiler 23 | distutils.msvc9compiler.VERSION = 14.0 24 | 25 | # Obtain the numpy include directory. This logic works across numpy versions. 26 | try: 27 | numpy_include = np.get_include() 28 | except AttributeError: 29 | numpy_include = np.get_numpy_include() 30 | 31 | ext_modules = [ 32 | # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts 33 | Extension( 34 | "bbox", 35 | sources=["bbox.pyx"], 36 | extra_compile_args={}, 37 | include_dirs = [numpy_include] 38 | ), 39 | ] 40 | 41 | setup( 42 | name='fast_rcnn', 43 | ext_modules=ext_modules, 44 | # inject our custom trigger 45 | cmdclass={'build_ext': build_ext}, 46 | ) 47 | -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from imdb import IMDB 2 | from pascal_voc import PascalVOC 3 | from cityscape import CityScape 4 | from cityscape_video import CityScape_Video 5 | from coco import coco 6 | -------------------------------------------------------------------------------- /lib/dataset/ds_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def unique_boxes(boxes, scale=1.0): 5 | """ return indices of unique boxes """ 6 | v = np.array([1, 1e3, 1e6, 1e9]) 7 | hashes = np.round(boxes * scale).dot(v) 8 | _, index = np.unique(hashes, return_index=True) 9 | return np.sort(index) 10 | 11 | 12 | def filter_small_boxes(boxes, min_size): 13 | w = boxes[:, 2] - boxes[:, 0] 14 | h = boxes[:, 3] - boxes[:, 1] 15 | keep = np.where((w >= min_size) & (h > min_size))[0] 16 | return keep -------------------------------------------------------------------------------- /lib/dataset/pycocotools/.gitignore: -------------------------------------------------------------------------------- 1 | _mask.c 2 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import _mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | encode = _mask.encode 77 | #decode = _mask.decode 78 | def decode(rleObjs): 79 | if type(rleObjs) == list: 80 | return _mask.decode(rleObjs) 81 | else: 82 | return _mask.decode([rleObjs])[:,:,0] 83 | iou = _mask.iou 84 | merge = _mask.merge 85 | area = _mask.area 86 | toBbox = _mask.toBbox 87 | frPyObjects = _mask.frPyObjects 88 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/setup_linux.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | # To compile and install locally run "python setup.py build_ext --inplace" 7 | # To install library to Python site-packages run "python setup.py build_ext install" 8 | 9 | ext_modules = [ 10 | Extension( 11 | '_mask', 12 | sources=['maskApi.c', '_mask.pyx'], 13 | include_dirs=[np.get_include()], 14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 15 | ) 16 | ] 17 | 18 | setup(name='pycocotools', 19 | ext_modules=cythonize(ext_modules) 20 | ) 21 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/setup_windows.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | import distutils.msvc9compiler 7 | distutils.msvc9compiler.VERSION = 14.0 8 | 9 | 10 | # To compile and install locally run "python setup.py build_ext --inplace" 11 | # To install library to Python site-packages run "python setup.py build_ext install" 12 | 13 | ext_modules = [ 14 | Extension( 15 | '_mask', 16 | sources=['maskApi.c', '_mask.pyx'], 17 | include_dirs=[np.get_include()], 18 | extra_compile_args=[], 19 | ) 20 | ] 21 | 22 | setup(name='pycocotools', 23 | ext_modules=cythonize(ext_modules) 24 | ) 25 | -------------------------------------------------------------------------------- /lib/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/logger/__init__.py -------------------------------------------------------------------------------- /lib/logger/logger.py: -------------------------------------------------------------------------------- 1 | """ Logging values to various sinks """ 2 | 3 | class Logger(object): 4 | _fields = None 5 | 6 | @property 7 | def fields(self): 8 | assert self._fields is not None, "self.fields is not set!" 9 | return self._fields 10 | 11 | @fields.setter 12 | def fields(self, value): 13 | self._fields 14 | 15 | def __init__(self, fields=None): 16 | """ Automatically logs the variables in 'fields' """ 17 | self.fields = fields 18 | 19 | def log(self, *args, **kwargs): 20 | pass 21 | 22 | def log_state(self, state_dict): 23 | pass -------------------------------------------------------------------------------- /lib/logger/readme.md: -------------------------------------------------------------------------------- 1 | This folder was copied from https://github.com/pytorch/tnt/blob/master/torchnet/logger -------------------------------------------------------------------------------- /lib/mask/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/mask/__init__.py -------------------------------------------------------------------------------- /lib/mask/mask_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Haozhi Qi, Yi Li, Guodong Zhang 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | 11 | def intersect_box_mask(ex_box, gt_box, gt_mask): 12 | """ 13 | This function calculate the intersection part of a external box 14 | and gt_box, mask it according to gt_mask 15 | Args: 16 | ex_box: external ROIS 17 | gt_box: ground truth boxes 18 | gt_mask: ground truth masks, not been resized yet 19 | Returns: 20 | regression_target: logical numpy array 21 | """ 22 | x1 = max(ex_box[0], gt_box[0]) 23 | y1 = max(ex_box[1], gt_box[1]) 24 | x2 = min(ex_box[2], gt_box[2]) 25 | y2 = min(ex_box[3], gt_box[3]) 26 | if x1 > x2 or y1 > y2: 27 | return np.zeros((21, 21), dtype=bool) 28 | w = x2 - x1 + 1 29 | h = y2 - y1 + 1 30 | ex_starty = y1 - ex_box[1] 31 | ex_startx = x1 - ex_box[0] 32 | 33 | inter_maskb = gt_mask[y1:y2+1 , x1:x2+1] 34 | regression_target = np.zeros((ex_box[3] - ex_box[1] + 1, ex_box[2] - ex_box[0] + 1)) 35 | regression_target[ex_starty: ex_starty + h, ex_startx: ex_startx + w] = inter_maskb 36 | 37 | return regression_target 38 | 39 | 40 | def mask_overlap(box1, box2, mask1, mask2): 41 | """ 42 | This function calculate region IOU when masks are 43 | inside different boxes 44 | Returns: 45 | intersection over unions of this two masks 46 | """ 47 | x1 = max(box1[0], box2[0]) 48 | y1 = max(box1[1], box2[1]) 49 | x2 = min(box1[2], box2[2]) 50 | y2 = min(box1[3], box2[3]) 51 | if x1 > x2 or y1 > y2: 52 | return 0 53 | w = x2 - x1 + 1 54 | h = y2 - y1 + 1 55 | # get masks in the intersection part 56 | start_ya = y1 - box1[1] 57 | start_xa = x1 - box1[0] 58 | inter_maska = mask1[start_ya: start_ya + h, start_xa:start_xa + w] 59 | 60 | start_yb = y1 - box2[1] 61 | start_xb = x1 - box2[0] 62 | inter_maskb = mask2[start_yb: start_yb + h, start_xb:start_xb + w] 63 | 64 | assert inter_maska.shape == inter_maskb.shape 65 | 66 | inter = np.logical_and(inter_maskb, inter_maska).sum() 67 | union = mask1.sum() + mask2.sum() - inter 68 | if union < 1.0: 69 | return 0 70 | return float(inter) / float(union) 71 | -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i') 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int32_t, ndim=1] \ 26 | order = scores.argsort()[::-1].astype(np.int32) 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /lib/nms/nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from cpu_nms import cpu_nms 4 | from gpu_nms import gpu_nms 5 | 6 | def py_nms_wrapper(thresh): 7 | def _nms(dets): 8 | return nms(dets, thresh) 9 | return _nms 10 | 11 | 12 | def cpu_nms_wrapper(thresh): 13 | def _nms(dets): 14 | return cpu_nms(dets, thresh) 15 | return _nms 16 | 17 | 18 | def gpu_nms_wrapper(thresh, device_id): 19 | def _nms(dets): 20 | return gpu_nms(dets, thresh, device_id) 21 | return _nms 22 | 23 | 24 | def nms(dets, thresh): 25 | """ 26 | greedily select boxes with high confidence and overlap with current maximum <= thresh 27 | rule out overlap >= thresh 28 | :param dets: [[x1, y1, x2, y2 score]] 29 | :param thresh: retain overlap < thresh 30 | :return: indexes to keep 31 | """ 32 | if dets.shape[0] == 0: 33 | return [] 34 | 35 | x1 = dets[:, 0] 36 | y1 = dets[:, 1] 37 | x2 = dets[:, 2] 38 | y2 = dets[:, 3] 39 | scores = dets[:, 4] 40 | 41 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 42 | order = scores.argsort()[::-1] 43 | 44 | keep = [] 45 | while order.size > 0: 46 | i = order[0] 47 | keep.append(i) 48 | xx1 = np.maximum(x1[i], x1[order[1:]]) 49 | yy1 = np.maximum(y1[i], y1[order[1:]]) 50 | xx2 = np.minimum(x2[i], x2[order[1:]]) 51 | yy2 = np.minimum(y2[i], y2[order[1:]]) 52 | 53 | w = np.maximum(0.0, xx2 - xx1 + 1) 54 | h = np.maximum(0.0, yy2 - yy1 + 1) 55 | inter = w * h 56 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 57 | 58 | inds = np.where(ovr <= thresh)[0] 59 | order = order[inds + 1] 60 | 61 | return keep 62 | -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Deformable Convolutional Networks 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License 5 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /lib/nms/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import numpy as np 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 30 | and values giving the absolute path to each directory. 31 | Starts by looking for the CUDAHOME env variable. If not found, everything 32 | is based on finding 'nvcc' in the PATH. 33 | """ 34 | 35 | # first check if the CUDAHOME env variable is in use 36 | if 'CUDAHOME' in os.environ: 37 | home = os.environ['CUDAHOME'] 38 | nvcc = pjoin(home, 'bin', 'nvcc') 39 | else: 40 | # otherwise, search the PATH for NVCC 41 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 42 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 43 | if nvcc is None: 44 | raise EnvironmentError('The nvcc binary could not be ' 45 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 46 | home = os.path.dirname(os.path.dirname(nvcc)) 47 | 48 | cudaconfig = {'home':home, 'nvcc':nvcc, 49 | 'include': pjoin(home, 'include'), 50 | 'lib64': pjoin(home, 'lib64')} 51 | for k, v in cudaconfig.iteritems(): 52 | if not os.path.exists(v): 53 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 54 | 55 | return cudaconfig 56 | CUDA = locate_cuda() 57 | 58 | 59 | # Obtain the numpy include directory. This logic works across numpy versions. 60 | try: 61 | numpy_include = np.get_include() 62 | except AttributeError: 63 | numpy_include = np.get_numpy_include() 64 | 65 | 66 | def customize_compiler_for_nvcc(self): 67 | """inject deep into distutils to customize how the dispatch 68 | to gcc/nvcc works. 69 | If you subclass UnixCCompiler, it's not trivial to get your subclass 70 | injected in, and still have the right customizations (i.e. 71 | distutils.sysconfig.customize_compiler) run on it. So instead of going 72 | the OO route, I have this. Note, it's kindof like a wierd functional 73 | subclassing going on.""" 74 | 75 | # tell the compiler it can processes .cu 76 | self.src_extensions.append('.cu') 77 | 78 | # save references to the default compiler_so and _comple methods 79 | default_compiler_so = self.compiler_so 80 | super = self._compile 81 | 82 | # now redefine the _compile method. This gets executed for each 83 | # object but distutils doesn't have the ability to change compilers 84 | # based on source extension: we add it. 85 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 86 | if os.path.splitext(src)[1] == '.cu': 87 | # use the cuda for .cu files 88 | self.set_executable('compiler_so', CUDA['nvcc']) 89 | # use only a subset of the extra_postargs, which are 1-1 translated 90 | # from the extra_compile_args in the Extension class 91 | postargs = extra_postargs['nvcc'] 92 | else: 93 | postargs = extra_postargs['gcc'] 94 | 95 | super(obj, src, ext, cc_args, postargs, pp_opts) 96 | # reset the default compiler_so, which we might have changed for cuda 97 | self.compiler_so = default_compiler_so 98 | 99 | # inject our redefined _compile method into the class 100 | self._compile = _compile 101 | 102 | 103 | # run the customize_compiler 104 | class custom_build_ext(build_ext): 105 | def build_extensions(self): 106 | customize_compiler_for_nvcc(self.compiler) 107 | build_ext.build_extensions(self) 108 | 109 | 110 | ext_modules = [ 111 | Extension( 112 | "cpu_nms", 113 | ["cpu_nms.pyx"], 114 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 115 | include_dirs = [numpy_include] 116 | ), 117 | Extension('gpu_nms', 118 | ['nms_kernel.cu', 'gpu_nms.pyx'], 119 | library_dirs=[CUDA['lib64']], 120 | libraries=['cudart'], 121 | language='c++', 122 | runtime_library_dirs=[CUDA['lib64']], 123 | # this syntax is specific to this build system 124 | # we're only going to use certain compiler args with nvcc and not with 125 | # gcc the implementation of this trick is in customize_compiler() below 126 | extra_compile_args={'gcc': ["-Wno-unused-function"], 127 | 'nvcc': ['-arch=sm_35', 128 | '--ptxas-options=-v', 129 | '-c', 130 | '--compiler-options', 131 | "'-fPIC'"]}, 132 | include_dirs = [numpy_include, CUDA['include']] 133 | ), 134 | ] 135 | 136 | setup( 137 | name='nms', 138 | ext_modules=ext_modules, 139 | # inject our custom trigger 140 | cmdclass={'build_ext': custom_build_ext}, 141 | ) 142 | -------------------------------------------------------------------------------- /lib/nms/setup_windows.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import os 10 | from os.path import join as pjoin 11 | #from distutils.core import setup 12 | from setuptools import setup 13 | from distutils.extension import Extension 14 | from Cython.Distutils import build_ext 15 | import subprocess 16 | 17 | #change for windows, by MrX 18 | nvcc_bin = 'nvcc.exe' 19 | lib_dir = 'lib/x64' 20 | 21 | import distutils.msvc9compiler 22 | distutils.msvc9compiler.VERSION = 14.0 23 | 24 | 25 | def find_in_path(name, path): 26 | "Find a file in a search path" 27 | # Adapted fom 28 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 29 | for dir in path.split(os.pathsep): 30 | binpath = pjoin(dir, name) 31 | if os.path.exists(binpath): 32 | return os.path.abspath(binpath) 33 | return None 34 | 35 | 36 | def locate_cuda(): 37 | """Locate the CUDA environment on the system 38 | 39 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 40 | and values giving the absolute path to each directory. 41 | 42 | Starts by looking for the CUDAHOME env variable. If not found, everything 43 | is based on finding 'nvcc' in the PATH. 44 | """ 45 | 46 | # first check if the CUDAHOME env variable is in use 47 | if 'CUDA_PATH' in os.environ: 48 | home = os.environ['CUDA_PATH'] 49 | print("home = %s\n" % home) 50 | nvcc = pjoin(home, 'bin', nvcc_bin) 51 | else: 52 | # otherwise, search the PATH for NVCC 53 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 54 | nvcc = find_in_path(nvcc_bin, os.environ['PATH'] + os.pathsep + default_path) 55 | if nvcc is None: 56 | raise EnvironmentError('The nvcc binary could not be ' 57 | 'located in your $PATH. Either add it to your path, or set $CUDA_PATH') 58 | home = os.path.dirname(os.path.dirname(nvcc)) 59 | print("home = %s, nvcc = %s\n" % (home, nvcc)) 60 | 61 | 62 | cudaconfig = {'home':home, 'nvcc':nvcc, 63 | 'include': pjoin(home, 'include'), 64 | 'lib64': pjoin(home, lib_dir)} 65 | for k, v in cudaconfig.iteritems(): 66 | if not os.path.exists(v): 67 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 68 | 69 | return cudaconfig 70 | CUDA = locate_cuda() 71 | 72 | 73 | # Obtain the numpy include directory. This logic works across numpy versions. 74 | try: 75 | numpy_include = np.get_include() 76 | except AttributeError: 77 | numpy_include = np.get_numpy_include() 78 | 79 | 80 | def customize_compiler_for_nvcc(self): 81 | """inject deep into distutils to customize how the dispatch 82 | to gcc/nvcc works. 83 | 84 | If you subclass UnixCCompiler, it's not trivial to get your subclass 85 | injected in, and still have the right customizations (i.e. 86 | distutils.sysconfig.customize_compiler) run on it. So instead of going 87 | the OO route, I have this. Note, it's kindof like a wierd functional 88 | subclassing going on.""" 89 | 90 | # tell the compiler it can processes .cu 91 | #self.src_extensions.append('.cu') 92 | 93 | 94 | # save references to the default compiler_so and _comple methods 95 | #default_compiler_so = self.spawn 96 | #default_compiler_so = self.rc 97 | super = self.compile 98 | 99 | # now redefine the _compile method. This gets executed for each 100 | # object but distutils doesn't have the ability to change compilers 101 | # based on source extension: we add it. 102 | def compile(sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None): 103 | postfix=os.path.splitext(sources[0])[1] 104 | 105 | if postfix == '.cu': 106 | # use the cuda for .cu files 107 | #self.set_executable('compiler_so', CUDA['nvcc']) 108 | # use only a subset of the extra_postargs, which are 1-1 translated 109 | # from the extra_compile_args in the Extension class 110 | postargs = extra_postargs['nvcc'] 111 | else: 112 | postargs = extra_postargs['gcc'] 113 | 114 | 115 | return super(sources, output_dir, macros, include_dirs, debug, extra_preargs, postargs, depends) 116 | # reset the default compiler_so, which we might have changed for cuda 117 | #self.rc = default_compiler_so 118 | 119 | # inject our redefined _compile method into the class 120 | self.compile = compile 121 | 122 | 123 | # run the customize_compiler 124 | class custom_build_ext(build_ext): 125 | def build_extensions(self): 126 | customize_compiler_for_nvcc(self.compiler) 127 | build_ext.build_extensions(self) 128 | 129 | 130 | ext_modules = [ 131 | # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts 132 | Extension( 133 | "cpu_nms", 134 | sources=["cpu_nms.pyx"], 135 | extra_compile_args={'gcc': []}, 136 | include_dirs = [numpy_include], 137 | ), 138 | ] 139 | 140 | setup( 141 | name='fast_rcnn', 142 | ext_modules=ext_modules, 143 | # inject our custom trigger 144 | cmdclass={'build_ext': custom_build_ext}, 145 | ) 146 | -------------------------------------------------------------------------------- /lib/rpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/rpn/__init__.py -------------------------------------------------------------------------------- /lib/rpn/generate_anchor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate base anchors on index 0 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 9 | scales=2 ** np.arange(3, 6)): 10 | """ 11 | Generate anchor (reference) windows by enumerating aspect ratios X 12 | scales wrt a reference (0, 0, 15, 15) window. 13 | """ 14 | 15 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 16 | ratio_anchors = _ratio_enum(base_anchor, ratios) 17 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 18 | for i in xrange(ratio_anchors.shape[0])]) 19 | return anchors 20 | 21 | 22 | def _whctrs(anchor): 23 | """ 24 | Return width, height, x center, and y center for an anchor (window). 25 | """ 26 | 27 | w = anchor[2] - anchor[0] + 1 28 | h = anchor[3] - anchor[1] + 1 29 | x_ctr = anchor[0] + 0.5 * (w - 1) 30 | y_ctr = anchor[1] + 0.5 * (h - 1) 31 | return w, h, x_ctr, y_ctr 32 | 33 | 34 | def _mkanchors(ws, hs, x_ctr, y_ctr): 35 | """ 36 | Given a vector of widths (ws) and heights (hs) around a center 37 | (x_ctr, y_ctr), output a set of anchors (windows). 38 | """ 39 | 40 | ws = ws[:, np.newaxis] 41 | hs = hs[:, np.newaxis] 42 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 43 | y_ctr - 0.5 * (hs - 1), 44 | x_ctr + 0.5 * (ws - 1), 45 | y_ctr + 0.5 * (hs - 1))) 46 | return anchors 47 | 48 | 49 | def _ratio_enum(anchor, ratios): 50 | """ 51 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 52 | """ 53 | 54 | w, h, x_ctr, y_ctr = _whctrs(anchor) 55 | size = w * h 56 | size_ratios = size / ratios 57 | ws = np.round(np.sqrt(size_ratios)) 58 | hs = np.round(ws * ratios) 59 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 60 | return anchors 61 | 62 | 63 | def _scale_enum(anchor, scales): 64 | """ 65 | Enumerate a set of anchors for each scale wrt an anchor. 66 | """ 67 | 68 | w, h, x_ctr, y_ctr = _whctrs(anchor) 69 | ws = w * scales 70 | hs = h * scales 71 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 72 | return anchors 73 | -------------------------------------------------------------------------------- /lib/segmentation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/segmentation/__init__.py -------------------------------------------------------------------------------- /lib/segmentation/segmentation.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | """ 12 | Segmentation: 13 | data = 14 | {'data': [num_images, c, h, w], 15 | 'im_info': [num_images, 4] (optional)} 16 | label = 17 | {'label': [batch_size, 1] <- [batch_size, c, h, w]} 18 | """ 19 | 20 | import numpy as np 21 | from utils.image import get_segmentation_image, tensor_vstack 22 | from utils.image import get_segmentation_image_video 23 | 24 | def get_segmentation_test_batch(segdb, config): 25 | """ 26 | return a dict of train batch 27 | :param segdb: ['image', 'flipped'] 28 | :param config: the config setting 29 | :return: data, label, im_info 30 | """ 31 | imgs, seg_cls_gts, segdb = get_segmentation_image(segdb, config) 32 | im_array = imgs 33 | im_info = [np.array([segdb[i]['im_info']], dtype=np.float32) for i in xrange(len(segdb))] 34 | 35 | data = [{'data': im_array[i], 36 | 'im_info': im_info[i]} for i in xrange(len(segdb))] 37 | label = [{'label':seg_cls_gts[i]} for i in xrange(len(segdb))] 38 | 39 | return data, label, im_info 40 | 41 | def get_segmentation_train_batch(segdb, config): 42 | """ 43 | return a dict of train batch 44 | :param segdb: ['image', 'flipped'] 45 | :param config: the config setting 46 | :return: data, label, im_info 47 | """ 48 | # assert len(segdb) == 1, 'Single batch only' 49 | assert len(segdb) == 1, 'Single batch only' 50 | 51 | imgs, seg_cls_gts, segdb = get_segmentation_image(segdb, config) 52 | im_array = imgs[0] 53 | seg_cls_gt = seg_cls_gts[0] 54 | 55 | im_info = np.array([segdb[0]['im_info']], dtype=np.float32) 56 | 57 | data = {'data': im_array, 58 | 'im_info': im_info} 59 | label = {'label': seg_cls_gt} 60 | 61 | return data, label 62 | 63 | def get_segmentation_test_batch_video(segdb, config): 64 | """ 65 | return a dict of train batch 66 | :param segdb: ['image', 'flipped'] 67 | :param config: the config setting 68 | :return: data, label, im_info 69 | """ 70 | imgs, ref_imgs, eq_flags, seg_cls_gts, segdb = get_segmentation_image_video(segdb, config,is_train=False) 71 | im_array = imgs 72 | ref_im_array = ref_imgs 73 | eq_flag_array = eq_flags 74 | 75 | im_info = [np.array([segdb[i]['im_info']], dtype=np.float32) for i in xrange(len(segdb))] 76 | eq_flag_array = [np.array([eq_flags[i],], dtype=np.float32) for i in xrange(len(segdb))] 77 | 78 | data = [{'data':im_array[i], 79 | 'data_ref': ref_im_array[i], 80 | 'eq_flag': eq_flag_array[i], 81 | 'im_info': im_info[i]} for i in xrange(len(segdb))] 82 | label = [{'label': seg_cls_gts[i]} for i in xrange(len(segdb))] 83 | 84 | return data, label, im_info 85 | 86 | def get_segmentation_train_batch_video(segdb, config): 87 | """ 88 | return a dict of train batch 89 | :param segdb: ['image', 'flipped'] 90 | :param config: the config setting 91 | :return: data, label, im_info 92 | """ 93 | # assert len(segdb) == 1, 'Single batch only' 94 | assert len(segdb) == 1, 'Single batch only' 95 | imgs, ref_imgs, eq_flags,seg_cls_gts, segdb = get_segmentation_image_video(segdb, config) 96 | im_array = imgs[0] 97 | ref_im_array = ref_imgs[0] 98 | eq_flag_array = np.array([eq_flags[0],], dtype=np.float32) 99 | seg_cls_gt = seg_cls_gts[0] 100 | 101 | im_info = np.array([segdb[0]['im_info']], dtype=np.float32) 102 | 103 | data = {'data': im_array, 104 | 'data_ref': ref_im_array, 105 | 'eq_flag': eq_flag_array, 106 | 'im_info': im_info} 107 | label = {'label': seg_cls_gt} 108 | return data, label 109 | -------------------------------------------------------------------------------- /lib/utils/PrefetchingIter.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Modified by Yuwen Xiong 7 | # -------------------------------------------------------- 8 | 9 | 10 | import mxnet as mx 11 | from mxnet.io import DataDesc, DataBatch 12 | import threading 13 | 14 | 15 | class PrefetchingIter(mx.io.DataIter): 16 | """Base class for prefetching iterators. Takes one or more DataIters ( 17 | or any class with "reset" and "next" methods) and combine them with 18 | prefetching. For example: 19 | 20 | Parameters 21 | ---------- 22 | iters : DataIter or list of DataIter 23 | one or more DataIters (or any class with "reset" and "next" methods) 24 | rename_data : None or list of dict 25 | i-th element is a renaming map for i-th iter, in the form of 26 | {'original_name' : 'new_name'}. Should have one entry for each entry 27 | in iter[i].provide_data 28 | rename_label : None or list of dict 29 | Similar to rename_data 30 | 31 | Examples 32 | -------- 33 | iter = PrefetchingIter([NDArrayIter({'data': X1}), NDArrayIter({'data': X2})], 34 | rename_data=[{'data': 'data1'}, {'data': 'data2'}]) 35 | """ 36 | def __init__(self, iters, rename_data=None, rename_label=None): 37 | super(PrefetchingIter, self).__init__() 38 | if not isinstance(iters, list): 39 | iters = [iters] 40 | self.n_iter = len(iters) 41 | assert self.n_iter ==1, "Our prefetching iter only support 1 DataIter" 42 | self.iters = iters 43 | self.rename_data = rename_data 44 | self.rename_label = rename_label 45 | self.batch_size = len(self.provide_data) * self.provide_data[0][0][1][0] 46 | self.data_ready = [threading.Event() for i in range(self.n_iter)] 47 | self.data_taken = [threading.Event() for i in range(self.n_iter)] 48 | for e in self.data_taken: 49 | e.set() 50 | self.started = True 51 | self.current_batch = [None for _ in range(self.n_iter)] 52 | self.next_batch = [None for _ in range(self.n_iter)] 53 | def prefetch_func(self, i): 54 | """Thread entry""" 55 | while True: 56 | self.data_taken[i].wait() 57 | if not self.started: 58 | break 59 | try: 60 | self.next_batch[i] = self.iters[i].next() 61 | except StopIteration: 62 | self.next_batch[i] = None 63 | self.data_taken[i].clear() 64 | self.data_ready[i].set() 65 | self.prefetch_threads = [threading.Thread(target=prefetch_func, args=[self, i]) \ 66 | for i in range(self.n_iter)] 67 | for thread in self.prefetch_threads: 68 | thread.setDaemon(True) 69 | thread.start() 70 | 71 | def __del__(self): 72 | self.started = False 73 | for e in self.data_taken: 74 | e.set() 75 | for thread in self.prefetch_threads: 76 | thread.join() 77 | 78 | @property 79 | def provide_data(self): 80 | """The name and shape of data provided by this iterator""" 81 | if self.rename_data is None: 82 | return sum([i.provide_data for i in self.iters], []) 83 | else: 84 | return sum([[ 85 | DataDesc(r[x.name], x.shape, x.dtype) 86 | if isinstance(x, DataDesc) else DataDesc(*x) 87 | for x in i.provide_data 88 | ] for r, i in zip(self.rename_data, self.iters)], []) 89 | 90 | @property 91 | def provide_label(self): 92 | """The name and shape of label provided by this iterator""" 93 | if self.rename_label is None: 94 | return sum([i.provide_label for i in self.iters], []) 95 | else: 96 | return sum([[ 97 | DataDesc(r[x.name], x.shape, x.dtype) 98 | if isinstance(x, DataDesc) else DataDesc(*x) 99 | for x in i.provide_label 100 | ] for r, i in zip(self.rename_label, self.iters)], []) 101 | 102 | def reset(self): 103 | for e in self.data_ready: 104 | e.wait() 105 | for i in self.iters: 106 | i.reset() 107 | for e in self.data_ready: 108 | e.clear() 109 | for e in self.data_taken: 110 | e.set() 111 | 112 | def iter_next(self): 113 | for e in self.data_ready: 114 | e.wait() 115 | if self.next_batch[0] is None: 116 | return False 117 | else: 118 | self.current_batch = self.next_batch[0] 119 | for e in self.data_ready: 120 | e.clear() 121 | for e in self.data_taken: 122 | e.set() 123 | return True 124 | 125 | def next(self): 126 | if self.iter_next(): 127 | return self.current_batch 128 | else: 129 | raise StopIteration 130 | 131 | def getdata(self): 132 | return self.current_batch.data 133 | 134 | def getlabel(self): 135 | return self.current_batch.label 136 | 137 | def getindex(self): 138 | return self.current_batch.index 139 | 140 | def getpad(self): 141 | return self.current_batch.pad 142 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/combine_model.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | from load_model import load_checkpoint 12 | from save_model import save_checkpoint 13 | 14 | 15 | def combine_model(prefix1, epoch1, prefix2, epoch2, prefix_out, epoch_out): 16 | args1, auxs1 = load_checkpoint(prefix1, epoch1) 17 | args2, auxs2 = load_checkpoint(prefix2, epoch2) 18 | arg_names = args1.keys() + args2.keys() 19 | aux_names = auxs1.keys() + auxs2.keys() 20 | args = dict() 21 | for arg in arg_names: 22 | if arg in args1: 23 | args[arg] = args1[arg] 24 | if arg in args2: 25 | args[arg] = args2[arg] 26 | auxs = dict() 27 | for aux in aux_names: 28 | if aux in auxs1: 29 | auxs[aux] = auxs1[aux] 30 | if aux in auxs2: 31 | auxs[aux] = auxs2[aux] 32 | save_checkpoint(prefix_out, epoch_out, args, auxs) 33 | -------------------------------------------------------------------------------- /lib/utils/create_logger.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Bin Xiao 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import os 12 | import logging 13 | import time 14 | 15 | def create_logger(root_output_path, cfg, image_set): 16 | """Create logger and get the output path 17 | 18 | Args: 19 | root_output_path: 20 | 21 | cfg: 22 | configuration file path 23 | dataset: 24 | dataset image path 25 | 26 | Return: 27 | logger 28 | 29 | final output folder path 30 | """ 31 | # set up logger 32 | if not os.path.exists(root_output_path): 33 | os.makedirs(root_output_path) 34 | assert os.path.exists(root_output_path), '{} does not exist'.format(root_output_path) 35 | 36 | cfg_name = os.path.basename(cfg).split('.')[0] 37 | config_output_path = os.path.join(root_output_path, '{}'.format(cfg_name)) 38 | if not os.path.exists(config_output_path): 39 | os.makedirs(config_output_path) 40 | 41 | image_sets = [iset for iset in image_set.split('+')] 42 | final_output_path = os.path.join(config_output_path, '{}'.format('_'.join(image_sets))) 43 | if not os.path.exists(final_output_path): 44 | os.makedirs(final_output_path) 45 | 46 | log_file = '{}_{}.log'.format(cfg_name, time.strftime('%Y-%m-%d-%H-%M')) 47 | head = '%(asctime)-15s %(message)s' 48 | logging.basicConfig(filename=os.path.join(final_output_path, log_file), format=head) 49 | logger = logging.getLogger() 50 | logger.setLevel(logging.INFO) 51 | 52 | return logger, final_output_path 53 | 54 | -------------------------------------------------------------------------------- /lib/utils/image_processing.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import numpy as np 12 | import cv2 13 | 14 | 15 | def resize(im, target_size, max_size): 16 | """ 17 | only resize input image to target size and return scale 18 | :param im: BGR image input by opencv 19 | :param target_size: one dimensional size (the short side) 20 | :param max_size: one dimensional max size (the long side) 21 | :return: 22 | """ 23 | im_shape = im.shape 24 | im_size_min = np.min(im_shape[0:2]) 25 | im_size_max = np.max(im_shape[0:2]) 26 | im_scale = float(target_size) / float(im_size_min) 27 | # prevent bigger axis from being more than max_size: 28 | if np.round(im_scale * im_size_max) > max_size: 29 | im_scale = float(max_size) / float(im_size_max) 30 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) 31 | return im, im_scale 32 | 33 | 34 | def transform(im, pixel_means, need_mean=False): 35 | """ 36 | transform into mxnet tensor 37 | subtract pixel size and transform to correct format 38 | :param im: [height, width, channel] in BGR 39 | :param pixel_means: [[[R, G, B pixel means]]] 40 | :return: [batch, channel, height, width] 41 | """ 42 | assert False, "shouldn't reach here." 43 | im = im.copy() 44 | im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)] 45 | im = im.astype(float) 46 | if need_mean: 47 | im -= pixel_means 48 | im_tensor = im[np.newaxis, :] 49 | # put channel first 50 | channel_swap = (0, 3, 1, 2) 51 | im_tensor = im_tensor.transpose(channel_swap) 52 | return im_tensor 53 | 54 | 55 | def transform_inverse(im_tensor, pixel_means): 56 | """ 57 | transform from mxnet im_tensor to ordinary RGB image 58 | im_tensor is limited to one image 59 | :param im_tensor: [batch, channel, height, width] 60 | :param pixel_means: [[[R, G, B pixel means]]] 61 | :return: im [height, width, channel(RGB)] 62 | """ 63 | assert im_tensor.shape[0] == 1 64 | im_tensor = im_tensor.copy() 65 | # put channel back 66 | channel_swap = (0, 2, 3, 1) 67 | im_tensor = im_tensor.transpose(channel_swap) 68 | im = im_tensor[0] 69 | assert im.shape[2] == 3 70 | im += pixel_means 71 | im = im.astype(np.uint8) 72 | return im 73 | 74 | 75 | def tensor_vstack(tensor_list, pad=0): 76 | """ 77 | vertically stack tensors 78 | :param tensor_list: list of tensor to be stacked vertically 79 | :param pad: label to pad with 80 | :return: tensor with max shape 81 | """ 82 | ndim = len(tensor_list[0].shape) 83 | if ndim == 1: 84 | return np.hstack(tensor_list) 85 | dimensions = [0] 86 | for dim in range(1, ndim): 87 | dimensions.append(max([tensor.shape[dim] for tensor in tensor_list])) 88 | for ind, tensor in enumerate(tensor_list): 89 | pad_shape = [(0, 0)] 90 | for dim in range(1, ndim): 91 | pad_shape.append((0, dimensions[dim] - tensor.shape[dim])) 92 | tensor_list[ind] = np.lib.pad(tensor, pad_shape, 'constant', constant_values=pad) 93 | all_tensor = np.vstack(tensor_list) 94 | return all_tensor 95 | -------------------------------------------------------------------------------- /lib/utils/load_data.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import numpy as np 12 | from dataset import * 13 | 14 | 15 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 16 | flip=False): 17 | """ load ground truth roidb """ 18 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path) 19 | roidb = imdb.gt_roidb() 20 | if flip: 21 | roidb = imdb.append_flipped_images(roidb) 22 | return roidb 23 | 24 | 25 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 26 | proposal='rpn', append_gt=True, flip=False): 27 | """ load proposal roidb (append_gt when training) """ 28 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path) 29 | 30 | gt_roidb = imdb.gt_roidb() 31 | roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt) 32 | if flip: 33 | roidb = imdb.append_flipped_images(roidb) 34 | return roidb 35 | 36 | 37 | def merge_roidb(roidbs): 38 | """ roidb are list, concat them together """ 39 | roidb = roidbs[0] 40 | for r in roidbs[1:]: 41 | roidb.extend(r) 42 | return roidb 43 | 44 | 45 | def filter_roidb(roidb, config): 46 | """ remove roidb entries without usable rois """ 47 | 48 | def is_valid(entry): 49 | """ valid images have at least 1 fg or bg roi """ 50 | overlaps = entry['max_overlaps'] 51 | fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] 52 | bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] 53 | valid = len(fg_inds) > 0 or len(bg_inds) > 0 54 | return valid 55 | 56 | num = len(roidb) 57 | filtered_roidb = [entry for entry in roidb if is_valid(entry)] 58 | num_after = len(filtered_roidb) 59 | print 'filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after) 60 | 61 | return filtered_roidb 62 | 63 | 64 | def load_gt_segdb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 65 | flip=False, video=False): 66 | """ load ground truth segdb """ 67 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path) 68 | segdb = imdb.gt_segdb() 69 | if flip: 70 | segdb = imdb.append_flipped_images_for_segmentation(segdb, video=video) 71 | return segdb 72 | 73 | 74 | def merge_segdb(segdbs): 75 | """ segdb are list, concat them together """ 76 | segdb = segdbs[0] 77 | for r in segdbs[1:]: 78 | segdb.extend(r) 79 | return segdb 80 | -------------------------------------------------------------------------------- /lib/utils/load_model.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import mxnet as mx 12 | 13 | 14 | def load_checkpoint(prefix, epoch): 15 | """ 16 | Load model checkpoint from file. 17 | :param prefix: Prefix of model name. 18 | :param epoch: Epoch number of model we would like to load. 19 | :return: (arg_params, aux_params) 20 | arg_params : dict of str to NDArray 21 | Model parameter, dict of name to NDArray of net's weights. 22 | aux_params : dict of str to NDArray 23 | Model parameter, dict of name to NDArray of net's auxiliary states. 24 | """ 25 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 26 | arg_params = {} 27 | aux_params = {} 28 | for k, v in save_dict.items(): 29 | tp, name = k.split(':', 1) 30 | if tp == 'arg': 31 | arg_params[name] = v 32 | if tp == 'aux': 33 | aux_params[name] = v 34 | return arg_params, aux_params 35 | 36 | 37 | def convert_context(params, ctx): 38 | """ 39 | :param params: dict of str to NDArray 40 | :param ctx: the context to convert to 41 | :return: dict of str of NDArray with context ctx 42 | """ 43 | new_params = dict() 44 | for k, v in params.items(): 45 | new_params[k] = v.as_in_context(ctx) 46 | return new_params 47 | 48 | 49 | def load_param(prefix, epoch, convert=False, ctx=None, process=False): 50 | """ 51 | wrapper for load checkpoint 52 | :param prefix: Prefix of model name. 53 | :param epoch: Epoch number of model we would like to load. 54 | :param convert: reference model should be converted to GPU NDArray first 55 | :param ctx: if convert then ctx must be designated. 56 | :param process: model should drop any test 57 | :return: (arg_params, aux_params) 58 | """ 59 | arg_params, aux_params = load_checkpoint(prefix, epoch) 60 | if convert: 61 | if ctx is None: 62 | ctx = mx.cpu() 63 | arg_params = convert_context(arg_params, ctx) 64 | aux_params = convert_context(aux_params, ctx) 65 | if process: 66 | tests = [k for k in arg_params.keys() if '_test' in k] 67 | for test in tests: 68 | arg_params[test.replace('_test', '')] = arg_params.pop(test) 69 | return arg_params, aux_params 70 | -------------------------------------------------------------------------------- /lib/utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | 12 | import logging 13 | from mxnet.lr_scheduler import LRScheduler 14 | 15 | class WarmupMultiFactorScheduler(LRScheduler): 16 | """Reduce learning rate in factor at steps specified in a list 17 | 18 | Assume the weight has been updated by n times, then the learning rate will 19 | be 20 | 21 | base_lr * factor^(sum((step/n)<=1)) # step is an array 22 | 23 | Parameters 24 | ---------- 25 | step: list of int 26 | schedule learning rate after n updates 27 | factor: float 28 | the factor for reducing the learning rate 29 | """ 30 | def __init__(self, step, factor=1, warmup=False, warmup_lr=0, warmup_step=0): 31 | super(WarmupMultiFactorScheduler, self).__init__() 32 | assert isinstance(step, list) and len(step) >= 1 33 | for i, _step in enumerate(step): 34 | if i != 0 and step[i] <= step[i-1]: 35 | raise ValueError("Schedule step must be an increasing integer list") 36 | if _step < 1: 37 | raise ValueError("Schedule step must be greater or equal than 1 round") 38 | if factor > 1.0: 39 | raise ValueError("Factor must be no more than 1 to make lr reduce") 40 | self.step = step 41 | self.cur_step_ind = 0 42 | self.factor = factor 43 | self.count = 0 44 | self.warmup = warmup 45 | self.warmup_lr = warmup_lr 46 | self.warmup_step = warmup_step 47 | 48 | def __call__(self, num_update): 49 | """ 50 | Call to schedule current learning rate 51 | 52 | Parameters 53 | ---------- 54 | num_update: int 55 | the maximal number of updates applied to a weight. 56 | """ 57 | 58 | # NOTE: use while rather than if (for continuing training via load_epoch) 59 | if self.warmup and num_update < self.warmup_step: 60 | return self.warmup_lr 61 | while self.cur_step_ind <= len(self.step)-1: 62 | if num_update > self.step[self.cur_step_ind]: 63 | self.count = self.step[self.cur_step_ind] 64 | self.cur_step_ind += 1 65 | self.base_lr *= self.factor 66 | logging.info("Update[%d]: Change learning rate to %0.5e", 67 | num_update, self.base_lr) 68 | else: 69 | return self.base_lr 70 | return self.base_lr 71 | -------------------------------------------------------------------------------- /lib/utils/mask_coco2voc.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Yi Li 6 | # -------------------------------------------------------- 7 | 8 | from skimage.draw import polygon 9 | import numpy as np 10 | 11 | def segToMask( S, h, w ): 12 | """ 13 | Convert polygon segmentation to binary mask. 14 | :param S (float array) : polygon segmentation mask 15 | :param h (int) : target mask height 16 | :param w (int) : target mask width 17 | :return: M (bool 2D array) : binary mask 18 | """ 19 | M = np.zeros((h,w), dtype=np.bool) 20 | for s in S: 21 | N = len(s) 22 | rr, cc = polygon(np.array(s[1:N:2]).clip(max=h-1), \ 23 | np.array(s[0:N:2]).clip(max=w-1)) # (y, x) 24 | M[rr, cc] = 1 25 | return M 26 | 27 | 28 | def decodeMask(R): 29 | """ 30 | Decode binary mask M encoded via run-length encoding. 31 | :param R (object RLE) : run-length encoding of binary mask 32 | :return: M (bool 2D array) : decoded binary mask 33 | """ 34 | N = len(R['counts']) 35 | M = np.zeros( (R['size'][0]*R['size'][1], )) 36 | n = 0 37 | val = 1 38 | for pos in range(N): 39 | val = not val 40 | for c in range(R['counts'][pos]): 41 | R['counts'][pos] 42 | M[n] = val 43 | n += 1 44 | return M.reshape((R['size']), order='F') 45 | 46 | def mask_coco2voc(coco_masks, im_height, im_width): 47 | voc_masks = np.zeros((len(coco_masks), im_height, im_width)) 48 | for i, ann in enumerate(coco_masks): 49 | if type(ann) == list: 50 | # polygon 51 | m = segToMask(ann, im_height, im_width) 52 | else: 53 | # rle 54 | m = decodeMask(ann) 55 | voc_masks[i,:,:]=m; 56 | return voc_masks 57 | -------------------------------------------------------------------------------- /lib/utils/mask_voc2coco.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Yi Li 6 | # -------------------------------------------------------- 7 | 8 | from skimage.draw import polygon 9 | import numpy as np 10 | import cv2 11 | from utils.tictoc import tic, toc 12 | from dataset.pycocotools.mask import encode as encodeMask_c 13 | 14 | def encodeMask(M): 15 | """ 16 | Encode binary mask M using run-length encoding. 17 | :param M (bool 2D array) : binary mask to encode 18 | :return: R (object RLE) : run-length encoding of binary mask 19 | """ 20 | [h, w] = M.shape 21 | M = M.flatten(order='F') 22 | N = len(M) 23 | counts_list = [] 24 | pos = 0 25 | # counts 26 | counts_list.append(1) 27 | diffs = np.logical_xor(M[0:N - 1], M[1:N]) 28 | for diff in diffs: 29 | if diff: 30 | pos += 1 31 | counts_list.append(1) 32 | else: 33 | counts_list[pos] += 1 34 | # if array starts from 1. start with 0 counts for 0 35 | if M[0] == 1: 36 | counts_list = [0] + counts_list 37 | return {'size': [h, w], 38 | 'counts': counts_list, 39 | } 40 | 41 | def mask_voc2coco(voc_masks, voc_boxes, im_height, im_width, binary_thresh = 0.4): 42 | num_pred = len(voc_masks) 43 | assert(num_pred==voc_boxes.shape[0]) 44 | mask_img = np.zeros((im_height, im_width, num_pred), dtype=np.uint8, order='F') 45 | for i in xrange(num_pred): 46 | pred_box = np.round(voc_boxes[i, :4]).astype(int) 47 | pred_mask = voc_masks[i] 48 | pred_mask = cv2.resize(pred_mask.astype(np.float32), (pred_box[2] - pred_box[0] + 1, pred_box[3] - pred_box[1] + 1)) 49 | mask_img[pred_box[1]:pred_box[3]+1, pred_box[0]:pred_box[2]+1, i] = pred_mask >= binary_thresh 50 | coco_mask = encodeMask_c(mask_img) 51 | return coco_mask 52 | -------------------------------------------------------------------------------- /lib/utils/network_visualization.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2016 by Contributors 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Zheng Zhang 7 | # Written by Songyang Zhang 8 | # E-main: sy.zhangbuaa#gmail.com 9 | # -------------------------------------------------------- 10 | 11 | import mxnet as mx 12 | from deeplab import _init_paths 13 | # from deeplab.symbols.resnet_v1_101_deeplab_dcn_duc import resnet_v1_101_deeplab_dcn_duc 14 | # from deeplab.symbols.duc_hdc_symbol.network_duc_hdc import get_symbol_duc_hdc 15 | from deeplab.symbols.densenet_bc_deeplab_base import densenet_bc_deeplab_base 16 | 17 | def plot_network(symbol, input_data_shape): 18 | t = mx.viz.plot_network(symbol, shape={'data' : input_data_shape}) 19 | t.render() 20 | 21 | 22 | if __name__ == '__main__': 23 | # For resnet-dcn 24 | # resnet_dcn = resnet_v1_101_deeplab_dcn_duc() 25 | # symbol = resnet_dcn.get_train_duc_symbol(19) 26 | # input_data_shape = (1, 3, 1024, 2048) 27 | # plot_network(symbol, input_data_shape) 28 | 29 | # symbol_duc = get_symbol_duc_hdc(19, 16) 30 | # plot_network(symbol_duc, input_data_shape) 31 | 32 | # For DenseNet 33 | # depth = 121 34 | 35 | # if depth == 121: 36 | # units = [6, 12, 24, 16] 37 | # elif depth == 169: 38 | # units = [6, 12, 32, 32] 39 | # elif depth == 201: 40 | # units = [6, 12, 48, 32] 41 | # elif depth == 161: 42 | # units = [6, 12, 36, 24] 43 | # else: 44 | # raise ValueError("no experiments done on detph {}, you can do it youself".format(depth)) 45 | 46 | # reduction = 0.5 47 | 48 | # symbol_densenet = DenseNet(units=units, num_stage=4, growth_rate=48 if depth==161 else 32, 49 | # num_class=1000, data_type='imagenet', reduction=reduction, drop_out=0, bottle_neck=True, 50 | # bn_mom=0.9, workspace=512) 51 | densenets = densenet_bc_deeplab_base() 52 | input_data_shape = (1, 3, 1024, 2048) 53 | symbol_densenet = densenets.get_train_symbol(19) 54 | plot_network(symbol_densenet, input_data_shape) 55 | -------------------------------------------------------------------------------- /lib/utils/roidb.py: -------------------------------------------------------------------------------- 1 | """ 2 | roidb 3 | basic format [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] 4 | extended ['image', 'max_classes', 'max_overlaps', 'bbox_targets'] 5 | """ 6 | 7 | import cv2 8 | import numpy as np 9 | 10 | from bbox.bbox_regression import compute_bbox_regression_targets 11 | 12 | 13 | def prepare_roidb(imdb, roidb, cfg): 14 | """ 15 | add image path, max_classes, max_overlaps to roidb 16 | :param imdb: image database, provide path 17 | :param roidb: roidb 18 | :return: None 19 | """ 20 | print 'prepare roidb' 21 | for i in range(len(roidb)): # image_index 22 | roidb[i]['image'] = imdb.image_path_from_index(imdb.image_set_index[i]) 23 | if cfg.TRAIN.ASPECT_GROUPING: 24 | size = cv2.imread(roidb[i]['image']).shape 25 | roidb[i]['height'] = size[0] 26 | roidb[i]['width'] = size[1] 27 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 28 | max_overlaps = gt_overlaps.max(axis=1) 29 | max_classes = gt_overlaps.argmax(axis=1) 30 | roidb[i]['max_overlaps'] = max_overlaps 31 | roidb[i]['max_classes'] = max_classes 32 | 33 | # background roi => background class 34 | zero_indexes = np.where(max_overlaps == 0)[0] 35 | assert all(max_classes[zero_indexes] == 0) 36 | # foreground roi => foreground class 37 | nonzero_indexes = np.where(max_overlaps > 0)[0] 38 | assert all(max_classes[nonzero_indexes] != 0) 39 | -------------------------------------------------------------------------------- /lib/utils/save_model.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | 4 | def save_checkpoint(prefix, epoch, arg_params, aux_params): 5 | """Checkpoint the model data into file. 6 | :param prefix: Prefix of model name. 7 | :param epoch: The epoch number of the model. 8 | :param arg_params: dict of str to NDArray 9 | Model parameter, dict of name to NDArray of net's weights. 10 | :param aux_params: dict of str to NDArray 11 | Model parameter, dict of name to NDArray of net's auxiliary states. 12 | :return: None 13 | prefix-epoch.params will be saved for parameters. 14 | """ 15 | save_dict = {('arg:%s' % k) : v for k, v in arg_params.items()} 16 | save_dict.update({('aux:%s' % k) : v for k, v in aux_params.items()}) 17 | param_name = '%s-%04d.params' % (prefix, epoch) 18 | mx.nd.save(param_name, save_dict) 19 | -------------------------------------------------------------------------------- /lib/utils/show_boxes.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Yi Li, Haocheng Zhang 6 | # -------------------------------------------------------- 7 | 8 | import matplotlib.pyplot as plt 9 | from random import random as rand 10 | def show_boxes(im, dets, classes, scale = 1.0): 11 | plt.cla() 12 | plt.axis("off") 13 | plt.imshow(im) 14 | for cls_idx, cls_name in enumerate(classes): 15 | cls_dets = dets[cls_idx] 16 | for det in cls_dets: 17 | bbox = det[:4] * scale 18 | color = (rand(), rand(), rand()) 19 | rect = plt.Rectangle((bbox[0], bbox[1]), 20 | bbox[2] - bbox[0], 21 | bbox[3] - bbox[1], fill=False, 22 | edgecolor=color, linewidth=2.5) 23 | plt.gca().add_patch(rect) 24 | 25 | if cls_dets.shape[1] == 5: 26 | score = det[-1] 27 | plt.gca().text(bbox[0], bbox[1], 28 | '{:s} {:.3f}'.format(cls_name, score), 29 | bbox=dict(facecolor=color, alpha=0.5), fontsize=9, color='white') 30 | plt.show() 31 | return im 32 | 33 | -------------------------------------------------------------------------------- /lib/utils/show_masks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import random 4 | import cv2 5 | 6 | def show_masks(im, dets, msks, show = True, thresh = 1e-3, scale = 1.0): 7 | plt.cla() 8 | plt.imshow(im) 9 | for det, msk in zip(dets, msks): 10 | color = (random.random(), random.random(), random.random()) # generate a random color 11 | bbox = det[:4] * scale 12 | cod = np.zeros(4).astype(int) 13 | cod[0] = int(bbox[0]) 14 | cod[1] = int(bbox[1]) 15 | cod[2] = int(bbox[2]) 16 | cod[3] = int(bbox[3]) 17 | if im[cod[0]:cod[2], cod[1]:cod[3], 0].size > 0: 18 | msk = cv2.resize(msk, im[cod[1]:cod[3], cod[0]:cod[2], 0].T.shape) 19 | bimsk = msk > thresh 20 | bimsk = bimsk.astype(int) 21 | bimsk = np.repeat(bimsk[:, :, np.newaxis], 3, axis=2) 22 | mskd = im[cod[1]:cod[3], cod[0]:cod[2], :] * bimsk 23 | clmsk = np.ones(bimsk.shape) * bimsk 24 | clmsk[:, :, 0] = clmsk[:, :, 0] * color[0] * 256; 25 | clmsk[:, :, 1] = clmsk[:, :, 1] * color[1] * 256; 26 | clmsk[:, :, 2] = clmsk[:, :, 2] * color[2] * 256; 27 | im[cod[1]:cod[3], cod[0]:cod[2], :] = im[cod[1]:cod[3], cod[0]:cod[2], :] + 0.8 * clmsk - 0.8 * mskd 28 | plt.imshow(im) 29 | if(show): 30 | plt.show() 31 | return im 32 | 33 | -------------------------------------------------------------------------------- /lib/utils/show_offset.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Guodong Zhang 6 | # -------------------------------------------------------- 7 | 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | 11 | def show_boxes_simple(bbox, color='r', lw=2): 12 | rect = plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], 13 | bbox[3] - bbox[1], fill=False, edgecolor=color, linewidth=lw) 14 | plt.gca().add_patch(rect) 15 | 16 | def kernel_inv_map(vis_attr, target_point, map_h, map_w): 17 | pos_shift = [vis_attr['dilation'] * 0 - vis_attr['pad'], 18 | vis_attr['dilation'] * 1 - vis_attr['pad'], 19 | vis_attr['dilation'] * 2 - vis_attr['pad']] 20 | source_point = [] 21 | for idx in range(vis_attr['filter_size']**2): 22 | cur_source_point = np.array([target_point[0] + pos_shift[idx / 3], 23 | target_point[1] + pos_shift[idx % 3]]) 24 | if cur_source_point[0] < 0 or cur_source_point[1] < 0 \ 25 | or cur_source_point[0] > map_h - 1 or cur_source_point[1] > map_w - 1: 26 | continue 27 | source_point.append(cur_source_point.astype('f')) 28 | return source_point 29 | 30 | def offset_inv_map(source_points, offset): 31 | for idx, _ in enumerate(source_points): 32 | source_points[idx][0] += offset[2*idx] 33 | source_points[idx][1] += offset[2*idx + 1] 34 | return source_points 35 | 36 | def get_bottom_position(vis_attr, top_points, all_offset): 37 | map_h = all_offset[0].shape[2] 38 | map_w = all_offset[0].shape[3] 39 | 40 | for level in range(vis_attr['plot_level']): 41 | source_points = [] 42 | for idx, cur_top_point in enumerate(top_points): 43 | cur_top_point = np.round(cur_top_point) 44 | if cur_top_point[0] < 0 or cur_top_point[1] < 0 \ 45 | or cur_top_point[0] > map_h-1 or cur_top_point[1] > map_w-1: 46 | continue 47 | cur_source_point = kernel_inv_map(vis_attr, cur_top_point, map_h, map_w) 48 | cur_offset = np.squeeze(all_offset[level][:, :, int(cur_top_point[0]), int(cur_top_point[1])]) 49 | cur_source_point = offset_inv_map(cur_source_point, cur_offset) 50 | source_points = source_points + cur_source_point 51 | top_points = source_points 52 | return source_points 53 | 54 | def plot_according_to_point(vis_attr, im, source_points, map_h, map_w, color=[255,0,0]): 55 | plot_area = vis_attr['plot_area'] 56 | for idx, cur_source_point in enumerate(source_points): 57 | y = np.round((cur_source_point[0] + 0.5) * im.shape[0] / map_h).astype('i') 58 | x = np.round((cur_source_point[1] + 0.5) * im.shape[1] / map_w).astype('i') 59 | 60 | if x < 0 or y < 0 or x > im.shape[1]-1 or y > im.shape[0]-1: 61 | continue 62 | y = min(y, im.shape[0] - vis_attr['plot_area'] - 1) 63 | x = min(x, im.shape[1] - vis_attr['plot_area'] - 1) 64 | y = max(y, vis_attr['plot_area']) 65 | x = max(x, vis_attr['plot_area']) 66 | im[y-plot_area:y+plot_area+1, x-plot_area:x+plot_area+1, :] = np.tile( 67 | np.reshape(color, (1, 1, 3)), (2*plot_area+1, 2*plot_area+1, 1) 68 | ) 69 | return im 70 | 71 | 72 | 73 | def show_dpsroi_offset(im, boxes, offset, classes, trans_std=0.1): 74 | plt.cla 75 | for idx, bbox in enumerate(boxes): 76 | plt.figure(idx+1) 77 | plt.axis("off") 78 | plt.imshow(im) 79 | 80 | offset_w = np.squeeze(offset[idx, classes[idx]*2, :, :]) * trans_std 81 | offset_h = np.squeeze(offset[idx, classes[idx]*2+1, :, :]) * trans_std 82 | x1 = int(bbox[0]) 83 | y1 = int(bbox[1]) 84 | x2 = int(bbox[2]) 85 | y2 = int(bbox[3]) 86 | roi_width = x2-x1+1 87 | roi_height = y2-y1+1 88 | part_size = offset_w.shape[0] 89 | bin_size_w = roi_width / part_size 90 | bin_size_h = roi_height / part_size 91 | show_boxes_simple(bbox, color='b') 92 | for ih in range(part_size): 93 | for iw in range(part_size): 94 | sub_box = np.array([x1+iw*bin_size_w, y1+ih*bin_size_h, 95 | x1+(iw+1)*bin_size_w, y1+(ih+1)*bin_size_h]) 96 | sub_offset = offset_h[ih, iw] * np.array([0, 1, 0, 1]) * roi_height \ 97 | + offset_w[ih, iw] * np.array([1, 0, 1, 0]) * roi_width 98 | sub_box = sub_box + sub_offset 99 | show_boxes_simple(sub_box) 100 | plt.show() 101 | 102 | def show_dconv_offset(im, all_offset, step=[2, 2], filter_size=3, 103 | dilation=2, pad=2, plot_area=2, plot_level=3): 104 | vis_attr = {'filter_size': filter_size, 'dilation': dilation, 'pad': pad, 105 | 'plot_area': plot_area, 'plot_level': plot_level} 106 | 107 | map_h = all_offset[0].shape[2] 108 | map_w = all_offset[0].shape[3] 109 | 110 | step_h = step[0] 111 | step_w = step[1] 112 | start_h = np.round(step_h / 2) 113 | start_w = np.round(step_w / 2) 114 | 115 | plt.figure() 116 | for im_h in range(start_h, map_h, step_h): 117 | for im_w in range(start_w, map_w, step_w): 118 | target_point = np.array([im_h, im_w]) 119 | source_y = np.round(target_point[0] * im.shape[0] / map_h) 120 | source_x = np.round(target_point[1] * im.shape[1] / map_w) 121 | if source_y < plot_area or source_x < plot_area \ 122 | or source_y >= im.shape[0] - plot_area or source_x >= im.shape[1] - plot_area: 123 | continue 124 | 125 | cur_im = np.copy(im) 126 | source_points = get_bottom_position(vis_attr, [target_point], all_offset) 127 | cur_im = plot_according_to_point(vis_attr, cur_im, source_points, map_h, map_w) 128 | cur_im[source_y-plot_area:source_y+plot_area+1, source_x-plot_area:source_x+plot_area+1, :] = \ 129 | np.tile(np.reshape([0, 255, 0], (1, 1, 3)), (2*plot_area+1, 2*plot_area+1, 1)) 130 | 131 | 132 | plt.axis("off") 133 | plt.imshow(cur_im) 134 | plt.show(block=False) 135 | plt.pause(0.01) 136 | plt.clf() 137 | -------------------------------------------------------------------------------- /lib/utils/symbol.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Yuwen Xiong 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | class Symbol: 10 | def __init__(self): 11 | self.arg_shape_dict = None 12 | self.out_shape_dict = None 13 | self.aux_shape_dict = None 14 | self.sym = None 15 | 16 | @property 17 | def symbol(self): 18 | return self.sym 19 | 20 | def get_symbol(self, cfg, is_train=True): 21 | """ 22 | return a generated symbol, it also need to be assigned to self.sym 23 | """ 24 | raise NotImplementedError() 25 | 26 | def init_weights(self, cfg, arg_params, aux_params): 27 | raise NotImplementedError() 28 | 29 | def get_msra_std(self, shape): 30 | fan_in = float(shape[1]) 31 | if len(shape) > 2: 32 | fan_in *= np.prod(shape[2:]) 33 | print(np.sqrt(2 / fan_in)) 34 | return np.sqrt(2 / fan_in) 35 | 36 | def infer_shape(self, data_shape_dict): 37 | # infer shape 38 | arg_shape, out_shape, aux_shape = self.sym.infer_shape(**data_shape_dict) 39 | self.arg_shape_dict = dict(zip(self.sym.list_arguments(), arg_shape)) 40 | self.out_shape_dict = dict(zip(self.sym.list_outputs(), out_shape)) 41 | self.aux_shape_dict = dict(zip(self.sym.list_auxiliary_states(), aux_shape)) 42 | 43 | def check_parameter_shapes(self, arg_params, aux_params, data_shape_dict, is_train=True): 44 | for k in self.sym.list_arguments(): 45 | if k in data_shape_dict or (False if is_train else 'label' in k): 46 | continue 47 | assert k in arg_params, k + ' not initialized' 48 | assert arg_params[k].shape == self.arg_shape_dict[k], \ 49 | 'shape inconsistent for ' + k + ' inferred ' + str(self.arg_shape_dict[k]) + ' provided ' + str( 50 | arg_params[k].shape) 51 | for k in self.sym.list_auxiliary_states(): 52 | assert k in aux_params, k + ' not initialized' 53 | assert aux_params[k].shape == self.aux_shape_dict[k], \ 54 | 'shape inconsistent for ' + k + ' inferred ' + str(self.aux_shape_dict[k]) + ' provided ' + str( 55 | aux_params[k].shape) 56 | -------------------------------------------------------------------------------- /lib/utils/tictoc.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def tic(): 4 | import time 5 | global startTime_for_tictoc 6 | startTime_for_tictoc = time.time() 7 | return startTime_for_tictoc 8 | 9 | def toc(): 10 | if 'startTime_for_tictoc' in globals(): 11 | endTime = time.time() 12 | return endTime - startTime_for_tictoc 13 | else: 14 | return None -------------------------------------------------------------------------------- /model/pretrained_model/resnet_v1_101-0000.params: -------------------------------------------------------------------------------- 1 | /home/PublicModel/mxnet/pretrained_model/resnet_v1_101-0000.params -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nose 2 | numpy==1.14.0 3 | nose-timer 4 | requests>=2.20.0 5 | Pillow 6 | easydict 7 | pyyaml 8 | sacred 9 | visdom 10 | Cython 11 | matplotlib 12 | scikit-image 13 | tqdm 14 | mxnet-cu90 15 | opencv-python==3.4.1.15 16 | --------------------------------------------------------------------------------