├── .gitignore
├── DeeplabCityscapes.tgn
├── DockerConfigs
    ├── MXNet
    │   ├── Dockerfile.python.gpu
    │   └── install
    │   │   ├── cpp.sh
    │   │   ├── python.sh
    │   │   └── source.list
    └── README.md
├── LICENSE
├── README.md
├── data
    └── cityscapes
├── deeplab
    ├── __init__.py
    ├── _init_paths.py
    ├── config
    │   ├── __init__.py
    │   ├── config.py
    │   └── dff_config.py
    ├── core
    │   ├── DataParallelExecutorGroup.py
    │   ├── __init__.py
    │   ├── callback.py
    │   ├── loader.py
    │   ├── metric.py
    │   ├── module.py
    │   └── tester.py
    ├── demo.py
    ├── dff_test.py
    ├── dff_train.py
    ├── function
    │   ├── __init__.py
    │   ├── reeval.py
    │   └── test_deeplab.py
    ├── symbols
    │   ├── __init__.py
    │   ├── deeplabv3
    │   │   ├── aspp_temp.py
    │   │   ├── xception_65_deeplab_v3_plus.py
    │   │   └── xception_temp.py
    │   ├── print_summary.py
    │   ├── resnet_v1_101_deeplab.py
    │   ├── resnet_v1_101_deeplab_dcn.py
    │   ├── resnet_v1_101_deeplab_dcn_duc.py
    │   ├── resnet_v1_101_deeplab_video.py
    │   └── resnet_v1_101_deeplab_video_dcn.py
    ├── test.py
    └── train.py
├── experiments
    ├── deeplab
    │   ├── cfgs
    │   │   ├── deeplab_cityscapes_demo.yaml
    │   │   ├── deeplab_resnet_v1_101_cityscapes_segmentation_base.yaml
    │   │   ├── deeplab_resnet_v1_101_cityscapes_segmentation_capsule.yaml
    │   │   ├── deeplab_resnet_v1_101_cityscapes_segmentation_dcn.yaml
    │   │   ├── deeplab_resnet_v1_101_voc12_segmentation_base.yaml
    │   │   └── deeplab_resnet_v1_101_voc12_segmentation_dcn.yaml
    │   ├── deeplab_test.py
    │   └── deeplab_train_test.py
    ├── deeplab_dff
    │   ├── cfgs
    │   │   ├── deeplab_resnet_v1_101_cityscapes_segmentation_video.yaml
    │   │   └── deeplab_resnet_v1_101_cityscapes_segmentation_video_duc.yaml
    │   ├── deeplab_dff_test.py
    │   └── deeplab_dff_train.py
    ├── faster_rcnn
    │   ├── cfgs
    │   │   ├── resnet_v1_101_coco_trainval_rcnn_dcn_end2end.yaml
    │   │   ├── resnet_v1_101_coco_trainval_rcnn_end2end.yaml
    │   │   ├── resnet_v1_101_voc0712_rcnn_dcn_end2end.yaml
    │   │   └── resnet_v1_101_voc0712_rcnn_end2end.yaml
    │   ├── rcnn_end2end_train_test.py
    │   ├── rcnn_test.py
    │   └── rcnn_train_test.py
    └── rfcn
    │   ├── cfgs
    │       ├── deform_conv_demo.yaml
    │       ├── deform_psroi_demo.yaml
    │       ├── resnet_v1_101_coco_trainval_rfcn_dcn_end2end_ohem.yaml
    │       ├── resnet_v1_101_coco_trainval_rfcn_end2end_ohem.yaml
    │       ├── resnet_v1_101_voc0712_rfcn_dcn_end2end_ohem.yaml
    │       ├── resnet_v1_101_voc0712_rfcn_end2end_ohem.yaml
    │       └── rfcn_coco_demo.yaml
    │   ├── rfcn_alternate_train_test.py
    │   ├── rfcn_end2end_train_test.py
    │   ├── rfcn_test.py
    │   └── rfcn_train_test.py
├── init.sh
├── lib
    ├── Makefile
    ├── __init__.py
    ├── bbox
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── bbox.pyx
    │   ├── bbox_regression.py
    │   ├── bbox_transform.py
    │   ├── setup_linux.py
    │   └── setup_windows.py
    ├── dataset
    │   ├── __init__.py
    │   ├── cityscape.py
    │   ├── cityscape_video.py
    │   ├── coco.py
    │   ├── ds_utils.py
    │   ├── imdb.py
    │   ├── pascal_voc.py
    │   ├── pascal_voc_eval.py
    │   └── pycocotools
    │   │   ├── .gitignore
    │   │   ├── UPSTREAM_REV
    │   │   ├── __init__.py
    │   │   ├── _mask.pyx
    │   │   ├── coco.py
    │   │   ├── cocoeval.py
    │   │   ├── mask.py
    │   │   ├── maskApi.c
    │   │   ├── maskApi.h
    │   │   ├── setup_linux.py
    │   │   └── setup_windows.py
    ├── logger
    │   ├── __init__.py
    │   ├── logger.py
    │   ├── readme.md
    │   └── visdomlogger.py
    ├── mask
    │   ├── __init__.py
    │   └── mask_transform.py
    ├── nms
    │   ├── __init__.py
    │   ├── cpu_nms.c
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.cpp
    │   ├── gpu_nms.cu
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms.py
    │   ├── nms_kernel.cu
    │   ├── setup_linux.py
    │   ├── setup_windows.py
    │   └── setup_windows_cuda.py
    ├── rpn
    │   ├── __init__.py
    │   ├── generate_anchor.py
    │   └── rpn.py
    ├── segmentation
    │   ├── __init__.py
    │   └── segmentation.py
    └── utils
    │   ├── PrefetchingIter.py
    │   ├── __init__.py
    │   ├── combine_model.py
    │   ├── create_logger.py
    │   ├── image.py
    │   ├── image_processing.py
    │   ├── load_data.py
    │   ├── load_model.py
    │   ├── lr_scheduler.py
    │   ├── mask_coco2voc.py
    │   ├── mask_voc2coco.py
    │   ├── network_visualization.py
    │   ├── roidb.py
    │   ├── save_model.py
    │   ├── show_boxes.py
    │   ├── show_masks.py
    │   ├── show_offset.py
    │   ├── symbol.py
    │   └── tictoc.py
├── model
    └── pretrained_model
    │   └── resnet_v1_101-0000.params
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | .vscode/ 
  3 | model/
  4 | .mxnet_0.12/
  5 | output/
  6 | data/
  7 | 
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | Deformable-origin/
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | env/
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | # lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | .hypothesis/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # dotenv
 89 | .env
 90 | 
 91 | # virtualenv
 92 | .venv
 93 | venv/
 94 | ENV/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 


--------------------------------------------------------------------------------
/DockerConfigs/MXNet/Dockerfile.python.gpu:
--------------------------------------------------------------------------------
 1 | # -*- mode: dockerfile -*-
 2 | # dockerfile to build libmxnet.so on GPU
 3 | # Use cuda 9.0
 4 | # FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 
 5 | FROM nvidia/cuda:latest
 6 | MAINTAINER SonayangZhang
 7 | 
 8 | COPY install/cpp.sh install/
 9 | RUN chmod +x install/cpp.sh
10 | RUN install/cpp.sh
11 | 
12 | # ENV BUILD_OPTS "USE_CUDA=1 USE_CUDA_PATH=/usr/local/cuda USE_CUDNN=1"
13 | # RUN git clone --recursive https://github.com/dmlc/mxnet && cd mxnet && \
14 | #    make -j$(nproc) $BUILD_OPTS
15 | 
16 | # OpenCV
17 | RUN apt-get update && \
18 |         apt-get install -y \
19 |         build-essential \
20 |         cmake \
21 |         git \
22 |         wget \
23 |         unzip \
24 |         yasm \
25 |         pkg-config \
26 |         libswscale-dev \
27 |         libtbb2 \
28 |         libtbb-dev \
29 |         libjpeg-dev \
30 |         libpng-dev \
31 |         libtiff-dev \
32 |         libjasper-dev \
33 |         libavformat-dev \
34 |         libpq-dev
35 | 
36 | WORKDIR /
37 | ENV OPENCV_VERSION="3.4.1"
38 | RUN wget https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip \
39 | && unzip ${OPENCV_VERSION}.zip \
40 | && mkdir /opencv-${OPENCV_VERSION}/cmake_binary \
41 | && cd /opencv-${OPENCV_VERSION}/cmake_binary \
42 | && cmake -DBUILD_TIFF=ON \
43 |   -DBUILD_opencv_java=OFF \
44 |   -DWITH_CUDA=OFF \
45 |   -DENABLE_AVX=ON \
46 |   -DWITH_OPENGL=ON \
47 |   -DWITH_OPENCL=ON \
48 |   -DWITH_IPP=ON \
49 |   -DWITH_TBB=ON \
50 |   -DWITH_EIGEN=ON \
51 |   -DWITH_V4L=ON \
52 |   -DBUILD_TESTS=OFF \
53 |   -DBUILD_PERF_TESTS=OFF \
54 |   -DCMAKE_BUILD_TYPE=RELEASE \
55 |   -DCMAKE_INSTALL_PREFIX=$(python2.7 -c "import sys; print(sys.prefix)") \
56 |   -DPYTHON_EXECUTABLE=$(which python2.7) \
57 |   -DPYTHON_INCLUDE_DIR=$(python2.7 -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \
58 |   -DPYTHON_PACKAGES_PATH=$(python2.7 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") .. \
59 | && make install -j ${nproc} \
60 | && rm /${OPENCV_VERSION}.zip \
61 | && rm -r /opencv-${OPENCV_VERSION}
62 | 
63 | # -*- mode: dockerfile -*-
64 | # part of the dockerfile to install the python binding
65 | 
66 | COPY install/python.sh install/
67 | RUN chmod +x install/python.sh
68 | RUN install/python.sh
69 | 
70 | RUN pip2 install nose numpy==1.14.0 nose-timer requests==2.18.4 Pillow easydict pyyaml sacred visdom Cython matplotlib scikit-image tqdm mxnet-cu90 -i https://pypi.tuna.tsinghua.edu.cn/simple
71 | RUN pip3 install nose pylint numpy==1.14.0 nose-timer requests==2.18.4 Pillow easydict pyyaml sacred visdom Cython matplotlib scikit-image tqdm mxnet-cu90 -i https://pypi.tuna.tsinghua.edu.cn/simple
72 | 
73 | RUN pip2 install opencv-python==3.4.1.15 -i https://pypi.tuna.tsinghua.edu.cn/simple
74 | RUN pip3 install opencv-python==3.4.1.15 -i https://pypi.tuna.tsinghua.edu.cn/simple
75 | 
76 | RUN apt-get -y install python-tk
77 | RUN apt-get -y install python3-tk
78 | 
79 | ENV PYTHONPATH=/mxnet/python 
80 | CMD sh -c 'ln -s /dev/null /dev/raw1394'; bash
81 | 


--------------------------------------------------------------------------------
/DockerConfigs/MXNet/install/cpp.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | #!/usr/bin/env bash
 3 | 
 4 | # Licensed to the Apache Software Foundation (ASF) under one
 5 | # or more contributor license agreements.  See the NOTICE file
 6 | # distributed with this work for additional information
 7 | # regarding copyright ownership.  The ASF licenses this file
 8 | # to you under the Apache License, Version 2.0 (the
 9 | # "License"); you may not use this file except in compliance
10 | # with the License.  You may obtain a copy of the License at
11 | #
12 | #   http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing,
15 | # software distributed under the License is distributed on an
16 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 | # KIND, either express or implied.  See the License for the
18 | # specific language governing permissions and limitations
19 | # under the License.
20 | 
21 | # libraries for building mxnet c++ core on ubuntu
22 | 
23 | cp source.list /etc/apt/source.list
24 | 
25 | apt-get update && apt-get install -y \
26 |     build-essential git libatlas-base-dev libopencv-dev python-opencv \
27 |     libcurl4-openssl-dev libgtest-dev cmake wget unzip
28 | 
29 | cd /usr/src/gtest && cmake CMakeLists.txt && make && cp *.a /usr/lib


--------------------------------------------------------------------------------
/DockerConfigs/MXNet/install/python.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Licensed to the Apache Software Foundation (ASF) under one
 4 | # or more contributor license agreements.  See the NOTICE file
 5 | # distributed with this work for additional information
 6 | # regarding copyright ownership.  The ASF licenses this file
 7 | # to you under the Apache License, Version 2.0 (the
 8 | # "License"); you may not use this file except in compliance
 9 | # with the License.  You may obtain a copy of the License at
10 | #
11 | #   http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied.  See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | 
20 | # install libraries for mxnet's python package on ubuntu
21 | 
22 | apt-get update && apt-get install -y python-dev python3-dev
23 | 
24 | # the version of the pip shipped with ubuntu may be too lower, install a recent version here
25 | cd /tmp && wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py && python2 get-pip.py
26 | 
27 | pip2 install nose pylint numpy nose-timer requests Pillow 
28 | pip3 install nose pylint numpy nose-timer requests Pillow
29 | 
30 | # For segmentation
31 | pip2 install easydict pyyaml sacred visdom Cython matplotlib scikit-image tqdm mxnet-cu90
32 | pip3 install easydict pyyaml sacred visdom Cython matplotlib scikit-image tqdm mxnet-cu90
33 | 
34 | 


--------------------------------------------------------------------------------
/DockerConfigs/MXNet/install/source.list:
--------------------------------------------------------------------------------
 1 | # deb cdrom:[Ubuntu 16.04 LTS _Xenial Xerus_ - Release amd64 (20160420.1)]/ xenial main restricted
 2 | deb-src http://archive.ubuntu.com/ubuntu xenial main restricted #Added by software-properties
 3 | deb http://mirrors.aliyun.com/ubuntu/ xenial main restricted
 4 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial main restricted multiverse universe #Added by software-properties
 5 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted
 6 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted multiverse universe #Added by software-properties
 7 | deb http://mirrors.aliyun.com/ubuntu/ xenial universe
 8 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates universe
 9 | deb http://mirrors.aliyun.com/ubuntu/ xenial multiverse
10 | deb http://mirrors.aliyun.com/ubuntu/ xenial-updates multiverse
11 | deb http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse
12 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse #Added by software-properties
13 | deb http://archive.canonical.com/ubuntu xenial partner
14 | deb-src http://archive.canonical.com/ubuntu xenial partner
15 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted
16 | deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted multiverse universe #Added by software-properties
17 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security universe
18 | deb http://mirrors.aliyun.com/ubuntu/ xenial-security multiverse


--------------------------------------------------------------------------------
/DockerConfigs/README.md:
--------------------------------------------------------------------------------
1 | # Build Docker to run Deep Feature Flow
2 | Build Image use our Dockerfile configuration
3 | ```bash
4 | cd ./MXNet
5 | docker build -t mxnet_dff/python:gpu -f ./Dockerfile.python.gpu .
6 | ```
7 | - `-t` is for tag
8 | - `-f` is for dockerfile path
9 |  


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Songyang Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Feature Flow for Video Semantic Segmentation
 2 | Based on Deeplab V2
 3 | 
 4 | ## 1. Setup environment
 5 | - If you use our dockerfile, you can run the code easily.
 6 | - If you want to set up your own env, please follow these steps:
 7 |     - We only support `python2.7` now
 8 |     - Install tk: `sudo apt-get -y install python-tk`
 9 |     - Install OpenCV 3.4.1
10 |     - Install needed python packages with `pip install -r requirements.txt`
11 |         - If you are in China Mainland, you can use these to speedup
12 |         `pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple`
13 | - Then `sh init.sh` to build the lib for faster-rcnn
14 |     Because we use the code from Deformable ConvNets and the dataloader has some dependencies on faster-rcnn, so you need to build the lib first.
15 | ## 2. Prepare Data and Pretrained Model
16 | ### Cityscapes Data
17 | You need to download the cityscapes data from the official webpapge and unzip the data
18 | Put the data into `data/cityscapes`, you can use soft link to set the data path as the following:
19 | `ln -s Dataset_path ./data/cityscapes`
20 | 
21 | If you want to try DFF, you should download cityscapes video data and put it into `data/cityscapes_video`
22 | 
23 | ### Pretrained Model
24 | Download pretrained resnet model flow net from [Onedrive](https://onedrive.live.com/?authkey=%21AAXQgYjWim3Iz6w&cid=F371D9563727B96F&id=F371D9563727B96F%21102798&parId=F371D9563727B96F%21102795&action=locate), and put the model into `mode/pretrained_model/`
25 | ```bash
26 | ./model/pretrained_model/resnet_v1_101-0000.params
27 | ./model/pretrained_model/flownet-0000.params
28 | ```
29 | 
30 | ## 3. Train and Test
31 | ### Training Deeplab V2
32 | `python ./experiments/deeplab/deeplab_train_test.py --cfg ./experiments/deeplab/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_base.yaml`
33 | ### Training Deeplab V2 Deformable
34 | `python ./experiments/deeplab/deeplab_train_test.py --cfg ./experiments/deeplab/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_dcn.yaml`
35 | ### Training DFF Deeplab V2
36 | `python ./experiments/deeplab_dff/deeplab_dff_train.py --cfg ./experiments/deeplab_dff/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_video.yaml`
37 | 
38 | ## 4. Performance 
39 | TBD
40 | ## 5. TODO List
41 | - [x] Add Scripts 
42 | - [ ] Add experiment results
43 | - [ ] Add support for Deeplab V3+
44 | - [ ] Add BiSeNet
45 | ## 6. FAQ
46 | - Program hang if your system opencv is 2.x and your opencv-python is 3.x
47 | 
48 | ## 7. Acknowledgement
49 | 
50 | Thanks for the official deep featuere flow implementation and deeplab implementation from MSRACVER
51 | - [Deep Feature Flow](https://github.com/msracver/Deep-Feature-Flow)
52 | - [Deformable ConvNets](https://github.com/msracver/Deformable-ConvNets)
53 | 


--------------------------------------------------------------------------------
/data/cityscapes:
--------------------------------------------------------------------------------
1 | /home/PublicDataset/cityscapes


--------------------------------------------------------------------------------
/deeplab/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/deeplab/__init__.py


--------------------------------------------------------------------------------
/deeplab/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Modified by Zheng Zhang
 7 | # --------------------------------------------------------
 8 | 
 9 | import os.path as osp
10 | import sys
11 | 
12 | def add_path(path):
13 |     if path not in sys.path:
14 |         sys.path.insert(0, path)
15 | 
16 | this_dir = osp.dirname(__file__)
17 | 
18 | lib_path = osp.join(this_dir, '..', 'lib')
19 | add_path(lib_path)
20 | 


--------------------------------------------------------------------------------
/deeplab/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/deeplab/config/__init__.py


--------------------------------------------------------------------------------
/deeplab/config/config.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import yaml
12 | import numpy as np
13 | from easydict import EasyDict as edict
14 | 
15 | config = edict()
16 | 
17 | config.MXNET_VERSION = ''
18 | config.output_path = ''
19 | config.symbol = ''
20 | config.gpus = ''
21 | config.CLASS_AGNOSTIC = True
22 | config.SCALES = [(360, 600)]  # first is scale (the shorter side); second is max size
23 | 
24 | # default training
25 | config.default = edict()
26 | config.default.frequent = 1000
27 | config.default.kvstore = 'device'
28 | 
29 | # network related params
30 | config.network = edict()
31 | config.network.pretrained = '../model/pretrained_model/resnet_v1-101'
32 | config.network.pretrained_epoch = 0
33 | config.network.PIXEL_MEANS = np.array([103.06, 115.90, 123.15])
34 | config.network.IMAGE_STRIDE = 0
35 | config.network.FIXED_PARAMS = ['conv1', 'bn_conv1', 'res2', 'bn2', 'gamma', 'beta']
36 | 
37 | # dataset related params
38 | config.dataset = edict()
39 | config.dataset.dataset = 'cityscapes'
40 | config.dataset.image_set = 'leftImg8bit_train'
41 | config.dataset.test_image_set = 'leftImg8bit_val'
42 | config.dataset.root_path = '../data'
43 | config.dataset.dataset_path = '../data/cityscapes'
44 | config.dataset.NUM_CLASSES = 19
45 | config.dataset.annotation_prefix = 'gtFine'
46 | 
47 | config.TRAIN = edict()
48 | config.TRAIN.lr = 0
49 | config.TRAIN.lr_step = ''
50 | config.TRAIN.warmup = False
51 | config.TRAIN.warmup_lr = 0
52 | config.TRAIN.warmup_step = 0
53 | config.TRAIN.momentum = 0.9
54 | config.TRAIN.wd = 0.0005
55 | config.TRAIN.begin_epoch = 0
56 | config.TRAIN.end_epoch = 0
57 | config.TRAIN.model_prefix = 'deeplab'
58 | 
59 | # whether resume training
60 | config.TRAIN.RESUME = False
61 | # whether flip image
62 | config.TRAIN.FLIP = True
63 | # whether shuffle image
64 | config.TRAIN.SHUFFLE = True
65 | # whether use OHEM
66 | config.TRAIN.ENABLE_OHEM = False
67 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e
68 | config.TRAIN.BATCH_IMAGES = 1
69 | 
70 | config.TEST = edict()
71 | # size of images for each device
72 | config.TEST.BATCH_IMAGES = 1
73 | 
74 | # Test Model Epoch
75 | config.TEST.test_epoch = 0
76 | 
77 | def update_config(config_file):
78 |     exp_config = None
79 |     with open(config_file) as f:
80 |         exp_config = edict(yaml.load(f))
81 |         for k, v in exp_config.items():
82 |             if k in config:
83 |                 if isinstance(v, dict):
84 |                     if k == 'TRAIN':
85 |                         if 'BBOX_WEIGHTS' in v:
86 |                             v['BBOX_WEIGHTS'] = np.array(v['BBOX_WEIGHTS'])
87 |                     elif k == 'network':
88 |                         if 'PIXEL_MEANS' in v:
89 |                             v['PIXEL_MEANS'] = np.array(v['PIXEL_MEANS'])
90 |                     for vk, vv in v.items():
91 |                         config[k][vk] = vv
92 |                 else:
93 |                     if k == 'SCALES':
94 |                         config[k][0] = (tuple(v))
95 |                     else:
96 |                         config[k] = v
97 |             else:
98 |                 raise ValueError("key must exist in config.py")
99 | 


--------------------------------------------------------------------------------
/deeplab/config/dff_config.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Copyright (c) 2016 by Contributors
  3 | # Copyright (c) 2017 Microsoft
  4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Written by Zheng Zhang
  7 | # Written by Songyang Zhang 
  8 | # E-main: sy.zhangbuaa#gmail.com
  9 | # --------------------------------------------------------
 10 | 
 11 | import yaml
 12 | import numpy as np
 13 | from easydict import EasyDict as edict
 14 | 
 15 | config = edict()
 16 | 
 17 | config.MXNET_VERSION = ''
 18 | config.output_path = ''
 19 | config.symbol = ''
 20 | config.gpus = ''
 21 | config.CLASS_AGNOSTIC = True
 22 | config.SCALES = [(360, 600)]  # first is scale (the shorter side); second is max size
 23 | config.final_output_path = ''
 24 | # default training
 25 | config.default = edict()
 26 | config.default.frequent = 1000
 27 | config.default.kvstore = 'device'
 28 | 
 29 | # network related params
 30 | config.network = edict()
 31 | config.network.pretrained = '../model/pretrained_model/flownet'
 32 | config.network.pretrained_epoch = 0
 33 | config.network.PIXEL_MEANS = np.array([103.06, 115.90, 123.15])
 34 | config.network.IMAGE_STRIDE = 0
 35 | config.network.FIXED_PARAMS = ['conv1', 'bn_conv1', 'res2', 'bn2', 'gamma', 'beta']
 36 | 
 37 | # dataset related params
 38 | config.dataset = edict()
 39 | config.dataset.dataset = 'cityscapes'
 40 | config.dataset.image_set = 'leftImg8bit_train'
 41 | config.dataset.test_image_set = 'leftImg8bit_val'
 42 | config.dataset.root_path = '../data'
 43 | config.dataset.dataset_path = '../data/cityscapes'
 44 | config.dataset.NUM_CLASSES = 19
 45 | config.dataset.annotation_prefix = 'gtFine'
 46 | config.dataset.dataset_video_path = './data/cityscapes_video/leftImg8bit_sequence'
 47 | config.dataset.video_gt_path = './data/cityscapes_video/gtFine_sequence_own' # video sequence gt folder
 48 | 
 49 | config.TRAIN = edict()
 50 | config.TRAIN.lr = 0
 51 | config.TRAIN.lr_step = ''
 52 | config.TRAIN.warmup = False
 53 | config.TRAIN.warmup_lr = 0
 54 | config.TRAIN.warmup_step = 0
 55 | config.TRAIN.momentum = 0.9
 56 | config.TRAIN.wd = 0.0005
 57 | config.TRAIN.begin_epoch = 0
 58 | config.TRAIN.end_epoch = 0
 59 | config.TRAIN.model_prefix = 'deeplab'
 60 | 
 61 | # for video segmentation
 62 | config.TRAIN.MIN_OFFSET = -5
 63 | config.TRAIN.MAX_OFFSET = 1
 64 | 
 65 | # whether resume training
 66 | config.TRAIN.RESUME = False
 67 | # whether flip image
 68 | config.TRAIN.FLIP = True
 69 | # whether shuffle image
 70 | config.TRAIN.SHUFFLE = True
 71 | # whether use OHEM
 72 | config.TRAIN.ENABLE_OHEM = False
 73 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 74 | config.TRAIN.BATCH_IMAGES = 1
 75 | 
 76 | # Video mode:(for load data)
 77 | config.TRAIN.VIDEO = True
 78 | # for tensorboard file
 79 | config.TRAIN.TB_DIR = ''
 80 | 
 81 | config.TEST = edict()
 82 | # size of images for each device
 83 | config.TEST.BATCH_IMAGES = 1
 84 | config.TEST.OFFSET = -1
 85 | # Test Model Epoch
 86 | config.TEST.test_epoch = 0
 87 | 
 88 | def update_config(config_file):
 89 |     exp_config = None
 90 |     with open(config_file) as f:
 91 |         exp_config = edict(yaml.load(f))
 92 |         for k, v in exp_config.items():
 93 |             if k in config:
 94 |                 if isinstance(v, dict):
 95 |                     if k == 'TRAIN':
 96 |                         if 'BBOX_WEIGHTS' in v:
 97 |                             v['BBOX_WEIGHTS'] = np.array(v['BBOX_WEIGHTS'])
 98 |                     elif k == 'network':
 99 |                         if 'PIXEL_MEANS' in v:
100 |                             v['PIXEL_MEANS'] = np.array(v['PIXEL_MEANS'])
101 |                     for vk, vv in v.items():
102 |                         config[k][vk] = vv
103 |                 else:
104 |                     if k == 'SCALES':
105 |                         config[k][0] = (tuple(v))
106 |                     else:
107 |                         config[k] = v
108 |             else:
109 |                 raise ValueError("key must exist in config.py")
110 | 


--------------------------------------------------------------------------------
/deeplab/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/deeplab/core/__init__.py


--------------------------------------------------------------------------------
/deeplab/core/callback.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import time
12 | import logging
13 | import mxnet as mx
14 | # from lib.logger.visdomlogger import VisdomPlotLogger
15 | class Speedometer(object):
16 |     def __init__(self, batch_size, frequent=50):
17 |         self.batch_size = batch_size
18 |         self.frequent = frequent
19 |         self.init = False
20 |         self.tic = 0
21 |         self.last_count = 0
22 |         self.loss_idx_abs = 0 # remember total index from the first epoch
23 |         # self.train_loss_logger = VisdomPlotLogger('line', env='deeplab_duc_dff',opts={'title': 'Train FCNLoss'})
24 |     def __call__(self, param):
25 |         """Callback to Show speed."""
26 |         count = param.nbatch
27 |         if self.last_count > count:
28 |             self.init = False
29 |         self.last_count = count
30 | 
31 |         if self.init:
32 |             if count % self.frequent == 0:
33 |                 speed = self.frequent * self.batch_size / (time.time() - self.tic)
34 |                 s = ''
35 |                 if param.eval_metric is not None:
36 |                     name, value = param.eval_metric.get()
37 |                     s = "Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-" % (param.epoch, count, speed)
38 |                     for n, v in zip(name, value):
39 |                         s += "%s=%f,\t" % (n, v)
40 |                         if n == 'FCNLogLoss':
41 |                             self.loss_idx_abs += count
42 |                             FCNLogLoss = v
43 |                     # self.train_loss_logger.log(self.loss_idx_abs,FCNLogLoss)
44 |                 else:
45 |                     s = "Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec" % (param.epoch, count, speed)
46 |                     
47 |                 logging.info(s)
48 |                 print(s)
49 |                 self.tic = time.time()
50 |         else:
51 |             self.init = True
52 |             self.tic = time.time()
53 | 


--------------------------------------------------------------------------------
/deeplab/core/metric.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import mxnet as mx
12 | import numpy as np
13 | 
14 | class FCNLogLossMetric(mx.metric.EvalMetric):
15 |     def __init__(self, show_interval):
16 |         super(FCNLogLossMetric, self).__init__('FCNLogLoss')
17 |         self.show_interval = show_interval
18 |         self.sum_metric = 0
19 |         self.num_inst = 0
20 | 
21 |     def update(self, labels, preds):
22 |         pred = preds[0]
23 |         label = labels[0]
24 | 
25 |         # label (b, p)
26 |         label = label.asnumpy().astype('int32').reshape((-1))
27 |         # pred (b, c, p) or (b, c, h, w) --> (b, p, c) --> (b*p, c)
28 |         pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1))
29 |         pred = pred.reshape((label.shape[0], -1))
30 | 
31 |         # filter with keep_inds
32 |         keep_inds = np.where(label != 255)[0]
33 |         label = label[keep_inds]
34 |         cls = pred[keep_inds, label]
35 | 
36 |         cls += 1e-14
37 |         cls_loss = -1 * np.log(cls)
38 |         cls_loss = np.sum(cls_loss)
39 | 
40 |         self.sum_metric += cls_loss
41 |         self.num_inst += label.shape[0]
42 | 


--------------------------------------------------------------------------------
/deeplab/core/tester.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Copyright (c) 2016 by Contributors
  3 | # Copyright (c) 2017 Microsoft
  4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Written by Zheng Zhang
  7 | # Written by Songyang Zhang 
  8 | # E-main: sy.zhangbuaa#gmail.com
  9 | # --------------------------------------------------------
 10 | 
 11 | import cPickle
 12 | import os
 13 | import time
 14 | import mxnet as mx
 15 | import numpy as np
 16 | 
 17 | from PIL import Image
 18 | from module import MutableModule
 19 | # from config.config import config
 20 | from utils import image
 21 | from utils.PrefetchingIter import PrefetchingIter
 22 | 
 23 | 
 24 | class Predictor(object):
 25 |     def __init__(self, symbol, data_names, label_names,
 26 |                  context=mx.cpu(), max_data_shapes=None,
 27 |                  provide_data=None, provide_label=None,
 28 |                  arg_params=None, aux_params=None):
 29 |         self._mod = MutableModule(symbol, data_names, label_names,
 30 |                                   context=context, max_data_shapes=max_data_shapes)
 31 |         self._mod.bind(provide_data, provide_label, for_training=False)
 32 |         self._mod.init_params(arg_params=arg_params, aux_params=aux_params)
 33 | 
 34 |     def predict(self, data_batch):
 35 |         self._mod.forward(data_batch)
 36 |         # [dict(zip(self._mod.output_names, _)) for _ in zip(*self._mod.get_outputs(merge_multi_context=False))]
 37 |         return [dict(zip(self._mod.output_names, _)) for _ in zip(*self._mod.get_outputs(merge_multi_context=False))]
 38 | 
 39 | def pred_eval(predictor, test_data, imdb, vis=False, ignore_cache=None, logger=None):
 40 |     """
 41 |     wrapper for calculating offline validation for faster data analysis
 42 |     in this example, all threshold are set by hand
 43 |     :param predictor: Predictor
 44 |     :param test_data: data iterator, must be non-shuffle
 45 |     :param imdb: image database
 46 |     :param vis: controls visualization
 47 |     :param ignore_cache: ignore the saved cache file
 48 |     :param logger: the logger instance
 49 |     :return:
 50 |     """
 51 |     res_file = os.path.join(imdb.result_path, imdb.name + '_segmentations.pkl')
 52 |     if os.path.exists(res_file) and not ignore_cache:
 53 |         with open(res_file , 'rb') as fid:
 54 |             evaluation_results = cPickle.load(fid)
 55 |         print 'evaluate segmentation: \n'
 56 |         if logger:
 57 |             logger.info('evaluate segmentation: \n')
 58 | 
 59 |         meanIU = evaluation_results['meanIU']
 60 |         IU_array = evaluation_results['IU_array']
 61 |         print 'IU_array:\n'
 62 |         if logger:
 63 |             logger.info('IU_array:\n')
 64 |         for i in range(len(IU_array)):
 65 |             print '%.5f'%IU_array[i]
 66 |             if logger:
 67 |                 logger.info('%.5f'%IU_array[i])
 68 |         print 'meanIU:%.5f'%meanIU
 69 |         if logger:
 70 |             logger.info( 'meanIU:%.5f'%meanIU)
 71 |         return
 72 | 
 73 |     assert vis or not test_data.shuffle
 74 |     if not isinstance(test_data, PrefetchingIter):
 75 |         test_data = PrefetchingIter(test_data)
 76 | 
 77 |     num_images = imdb.num_images
 78 |     all_segmentation_result = [[] for _ in xrange(num_images)]
 79 |     idx = 0
 80 | 
 81 |     data_time, net_time, post_time = 0.0, 0.0, 0.0
 82 |     t = time.time()
 83 |     for data_batch in test_data:
 84 |         t1 = time.time() - t
 85 |         t = time.time()
 86 |         output_all = predictor.predict(data_batch)
 87 |         output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all]
 88 |         t2 = time.time() - t
 89 |         t = time.time()
 90 | 
 91 |         all_segmentation_result[idx: idx+test_data.batch_size] = [output.astype('int8') for output in output_all]
 92 | 
 93 |         idx += test_data.batch_size
 94 |         t3 = time.time() - t
 95 |         t = time.time()
 96 | 
 97 |         data_time += t1
 98 |         net_time += t2
 99 |         post_time += t3
100 |         print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)
101 |         if logger:
102 |             logger.info('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size))
103 | 
104 |     evaluation_results = imdb.evaluate_segmentations(all_segmentation_result)
105 | 
106 |     if not os.path.exists(res_file) or ignore_cache:
107 |         with open(res_file, 'wb') as f:
108 |             cPickle.dump(evaluation_results, f, protocol=cPickle.HIGHEST_PROTOCOL)
109 | 
110 |     print 'evaluate segmentation: \n'
111 |     if logger:
112 |         logger.info('evaluate segmentation: \n')
113 | 
114 |     meanIU = evaluation_results['meanIU']
115 |     IU_array = evaluation_results['IU_array']
116 |     print 'IU_array:\n'
117 |     if logger:
118 |         logger.info('IU_array:\n')
119 |     for i in range(len(IU_array)):
120 |         print '%.5f'%IU_array[i]
121 |         if logger:
122 |             logger.info('%.5f'%IU_array[i])
123 |     print 'meanIU:%.5f'%meanIU
124 |     if logger:
125 |         logger.info( 'meanIU:%.5f'%meanIU)
126 | 


--------------------------------------------------------------------------------
/deeplab/dff_test.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import _init_paths
 3 | 
 4 | import argparse
 5 | import os
 6 | import sys
 7 | import time
 8 | import logging
 9 | from config.dff_config import config, update_config
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(description='Test a Deeplab Network')
13 |     # general
14 |     parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str)
15 | 
16 |     args, rest = parser.parse_known_args()
17 |     update_config(args.cfg)
18 | 
19 |     # testing
20 |     parser.add_argument('--vis', help='turn on visualization', action='store_true')
21 |     parser.add_argument('--ignore_cache', help='ignore cached results boxes', action='store_true')
22 |     parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true')
23 |     args = parser.parse_args()
24 |     return args
25 | 
26 | args = parse_args()
27 | curr_path = os.path.abspath(os.path.dirname(__file__))
28 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION))
29 | 
30 | import pprint
31 | import mxnet as mx
32 | 
33 | from symbols import *
34 | from dataset import *
35 | from core.loader import TestDataLoaderVideo
36 | 
37 | from core.tester import Predictor, pred_eval
38 | from utils.load_data import load_gt_segdb, merge_segdb
39 | from utils.load_model import load_param
40 | from utils.create_logger import create_logger
41 | 
42 | def test_deeplab():
43 |     epoch = config.TEST.test_epoch
44 |     ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
45 |     image_set = config.dataset.test_image_set
46 |     root_path = config.dataset.root_path
47 |     dataset = config.dataset.dataset
48 |     dataset_path = config.dataset.dataset_path
49 | 
50 |     logger, final_output_path = create_logger(config.output_path, args.cfg, image_set)
51 |     prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix)
52 | 
53 |     # print config
54 |     pprint.pprint(config)
55 |     logger.info('testing config:{}\n'.format(pprint.pformat(config)))
56 | 
57 |     # load symbol and testing data
58 |     sym_instance = eval(config.symbol + '.' + config.symbol)()
59 |     sym = sym_instance.get_symbol(config, is_train=False)
60 | 
61 |     imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=final_output_path)
62 |     segdb = imdb.gt_segdb()
63 | 
64 |     # get test data iter
65 |     test_data = TestDataLoaderVideo(segdb, config=config, batch_size=len(ctx))
66 | 
67 |     # infer shape
68 |     data_shape_dict = dict(test_data.provide_data_single)
69 |     print data_shape_dict
70 |     sym_instance.infer_shape(data_shape_dict)
71 | 
72 |     # load model and check parameters
73 |     arg_params, aux_params = load_param(prefix, epoch, process=True)
74 | 
75 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)
76 | 
77 |     # decide maximum shape
78 |     data_names = [k[0] for k in test_data.provide_data_single]
79 |     label_names = ['softmax_label']
80 |     max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))),]]
81 | 
82 |     # create predictor
83 |     predictor = Predictor(sym, data_names, label_names,
84 |                           context=ctx, max_data_shapes=max_data_shape,
85 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
86 |                           arg_params=arg_params, aux_params=aux_params)
87 | 
88 |     # start detection
89 |     pred_eval(predictor, test_data, imdb, vis=args.vis, ignore_cache=args.ignore_cache, logger=logger)
90 | 
91 | def main():
92 |     print args
93 |     test_deeplab()
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     main()
98 | 


--------------------------------------------------------------------------------
/deeplab/function/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/deeplab/function/__init__.py


--------------------------------------------------------------------------------
/deeplab/function/reeval.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import argparse
12 | import cPickle
13 | import os
14 | import mxnet as mx
15 | 
16 | from config.config import config, generate_config
17 | from dataset import *
18 | 
19 | 
20 | def reeval(args):
21 |     # load imdb
22 |     imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path)
23 | 
24 |     # load detection results
25 |     cache_file = os.path.join(imdb.cache_path, imdb.name, 'detections.pkl')
26 |     with open(cache_file) as f:
27 |         detections = cPickle.load(f)
28 | 
29 |     # eval
30 |     imdb.evaluate_detections(detections)
31 | 
32 | 
33 | def parse_args():
34 |     parser = argparse.ArgumentParser(description='imdb test')
35 |     # general
36 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
37 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
38 |     args, rest = parser.parse_known_args()
39 |     generate_config(args.network, args.dataset)
40 |     parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str)
41 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
42 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
43 |     # other
44 |     parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true')
45 |     args = parser.parse_args()
46 |     return args
47 | 
48 | 
49 | def main():
50 |     args = parse_args()
51 |     print 'Called with argument:', args
52 |     reeval(args)
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     main()
57 | 


--------------------------------------------------------------------------------
/deeplab/function/test_deeplab.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import argparse
12 | import pprint
13 | import logging
14 | import time
15 | import os
16 | import mxnet as mx
17 | 
18 | from config.config import config, generate_config, update_config
19 | from config.dataset_conf import dataset
20 | from config.network_conf import network
21 | from symbols import *
22 | from dataset import *
23 | from core.loader import TestDataLoader
24 | from core.tester import Predictor, pred_eval
25 | from utils.load_model import load_param
26 | 
27 | def test_deeplab(network, dataset, image_set, root_path, dataset_path,
28 |               ctx, prefix, epoch,
29 |               vis, logger=None, output_path=None):
30 |     if not logger:
31 |         assert False, 'require a logger'
32 | 
33 |     # print config
34 |     pprint.pprint(config)
35 |     logger.info('testing config:{}\n'.format(pprint.pformat(config)))
36 | 
37 |     # load symbol and testing data
38 |     sym = eval('get_' + network + '_test')(num_classes=config.dataset.NUM_CLASSES)
39 |     imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
40 |     segdb = imdb.gt_segdb()
41 | 
42 |     # get test data iter
43 |     test_data = TestDataLoader(segdb, batch_size=len(ctx))
44 | 
45 |     # load model
46 |     # arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True)
47 |     arg_params, aux_params = load_param(prefix, epoch, process=True)
48 | 
49 |     # infer shape
50 |     data_shape_dict = dict(test_data.provide_data_single)
51 |     arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
52 |     arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
53 |     aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
54 | 
55 |     # check parameters
56 |     for k in sym.list_arguments():
57 |         if k in data_shape_dict or k in ['softmax_label']:
58 |             continue
59 |         assert k in arg_params, k + ' not initialized'
60 |         assert arg_params[k].shape == arg_shape_dict[k], \
61 |             'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
62 |     for k in sym.list_auxiliary_states():
63 |         assert k in aux_params, k + ' not initialized'
64 |         assert aux_params[k].shape == aux_shape_dict[k], \
65 |             'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)
66 | 
67 |     # decide maximum shape
68 |     data_names = [k[0] for k in test_data.provide_data_single]
69 |     label_names = ['softmax_label']
70 |     max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
71 | 
72 |     # create predictor
73 |     predictor = Predictor(sym, data_names, label_names,
74 |                           context=ctx, max_data_shapes=max_data_shape,
75 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
76 |                           arg_params=arg_params, aux_params=aux_params)
77 | 
78 |     # start detection
79 |     pred_eval(predictor, test_data, imdb, vis=vis, logger=logger)
80 | 
81 | 


--------------------------------------------------------------------------------
/deeplab/symbols/__init__.py:
--------------------------------------------------------------------------------
1 | import resnet_v1_101_deeplab
2 | import resnet_v1_101_deeplab_dcn
3 | import resnet_v1_101_deeplab_video
4 | import resnet_v1_101_deeplab_video_dcn
5 | # Capsule Layer For Segmentation
6 | # import resnet_v1_101_deeplab_capsule
7 | # import densenet_bc_deeplab_base
8 | # import densenet_bc_deeplab_base_v2


--------------------------------------------------------------------------------
/deeplab/symbols/print_summary.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Copyright (c) 2016 by Contributors
  3 | # Copyright (c) 2017 Microsoft
  4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Written by Zheng Zhang
  7 | # Written by Songyang Zhang 
  8 | # E-main: sy.zhangbuaa#gmail.com
  9 | # --------------------------------------------------------
 10 | 
 11 | import json
 12 | from mxnet import Symbol
 13 | def print_summary(symbol, shape=None, line_length=120, positions=[.44, .64, .74, 1.]):
 14 |     """Convert symbol for detail information.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     symbol: Symbol
 19 |         Symbol to be visualized.
 20 |     shape: dict
 21 |         A dict of shapes, str->shape (tuple), given input shapes.
 22 |     line_length: int
 23 |         Rotal length of printed lines
 24 |     positions: list
 25 |         Relative or absolute positions of log elements in each line.
 26 |     Returns
 27 |     ------
 28 |     None
 29 |     """
 30 |     if not isinstance(symbol, Symbol):
 31 |         raise TypeError("symbol must be Symbol")
 32 |     show_shape = False
 33 |     if shape is not None:
 34 |         show_shape = True
 35 |         interals = symbol.get_internals()
 36 |         _, out_shapes, _ = interals.infer_shape(**shape)
 37 |         if out_shapes is None:
 38 |             raise ValueError("Input shape is incomplete")
 39 |         shape_dict = dict(zip(interals.list_outputs(), out_shapes))
 40 |     conf = json.loads(symbol.tojson())
 41 |     nodes = conf["nodes"]
 42 |     heads = set(conf["heads"][0])
 43 |     if positions[-1] <= 1:
 44 |         positions = [int(line_length * p) for p in positions]
 45 |     # header names for the different log elements
 46 |     to_display = ['Layer (type)', 'Output Shape', 'Param #', 'Previous Layer']
 47 |     def print_row(fields, positions):
 48 |         """Print format row.
 49 | 
 50 |         Parameters
 51 |         ----------
 52 |         fields: list
 53 |             Information field.
 54 |         positions: list
 55 |             Field length ratio.
 56 |         Returns
 57 |         ------
 58 |         None
 59 |         """
 60 |         line = ''
 61 |         for i, field in enumerate(fields):
 62 |             line += str(field)
 63 |             line = line[:positions[i]]
 64 |             line += ' ' * (positions[i] - len(line))
 65 |         print(line)
 66 |     print('_' * line_length)
 67 |     print_row(to_display, positions)
 68 |     print('=' * line_length)
 69 |     def print_layer_summary(node, out_shape):
 70 |         """print layer information
 71 | 
 72 |         Parameters
 73 |         ----------
 74 |         node: dict
 75 |             Node information.
 76 |         out_shape: dict
 77 |             Node shape information.
 78 |         Returns
 79 |         ------
 80 |             Node total parameters.
 81 |         """
 82 |         op = node["op"]
 83 |         pre_node = []
 84 |         pre_filter = 0
 85 |         if op != "null":
 86 |             inputs = node["inputs"]
 87 |             for item in inputs:
 88 |                 input_node = nodes[item[0]]
 89 |                 input_name = input_node["name"]
 90 |                 if input_node["op"] != "null" or item[0] in heads:
 91 |                     # add precede
 92 |                     pre_node.append(input_name)
 93 |                     if show_shape:
 94 |                         if input_node["op"] != "null":
 95 |                             key = input_name + "_output"
 96 |                         else:
 97 |                             key = input_name
 98 |                         if key in shape_dict:
 99 |                             shape = shape_dict[key][1:]
100 |                             pre_filter = pre_filter + int(shape[0])
101 |         cur_param = 0
102 |         if op == 'Convolution':
103 |             if ("no_bias" in node["attrs"]) and (isinstance(node["attrs"]["no_bias"],(bool,int)) and int(node["attrs"]["no_bias"])) or ((isinstance(node["attrs"]["no_bias"],(str)) and bool(node["attrs"]["no_bias"]))):
104 |                 cur_param = pre_filter * int(node["attrs"]["num_filter"])
105 |                 for k in _str2tuple(node["attrs"]["kernel"]):
106 |                     cur_param *= int(k)
107 |             else:
108 |                 cur_param = pre_filter * int(node["attrs"]["num_filter"])
109 |                 for k in _str2tuple(node["attrs"]["kernel"]):
110 |                     cur_param *= int(k)
111 |                 cur_param += int(node["attrs"]["num_filter"])
112 |         elif op == 'FullyConnected':
113 |             if ("no_bias" in node["attrs"]) and int(node["attrs"]["no_bias"]):
114 |                 cur_param = pre_filter * (int(node["attrs"]["num_hidden"]))
115 |             else:
116 |                 cur_param = (pre_filter+1) * (int(node["attrs"]["num_hidden"]))
117 |         elif op == 'BatchNorm':
118 |             key = node["name"] + "_output"
119 |             if show_shape:
120 |                 num_filter = shape_dict[key][1]
121 |                 cur_param = int(num_filter) * 2
122 |         if not pre_node:
123 |             first_connection = ''
124 |         else:
125 |             first_connection = pre_node[0]
126 |         fields = [node['name'] + '(' + op + ')',
127 |                   "x".join([str(x) for x in out_shape]),
128 |                   cur_param,
129 |                   first_connection]
130 |         print_row(fields, positions)
131 |         if len(pre_node) > 1:
132 |             for i in range(1, len(pre_node)):
133 |                 fields = ['', '', '', pre_node[i]]
134 |                 print_row(fields, positions)
135 |         return cur_param
136 |     total_params = 0
137 |     for i, node in enumerate(nodes):
138 |         out_shape = []
139 |         op = node["op"]
140 |         if op == "null" and i > 0:
141 |             continue
142 |         if op != "null" or i in heads:
143 |             if show_shape:
144 |                 if op != "null":
145 |                     key = node["name"] + "_output"
146 |                 else:
147 |                     key = node["name"]
148 |                 if key in shape_dict:
149 |                     out_shape = shape_dict[key][1:]
150 |         total_params += print_layer_summary(nodes[i], out_shape)
151 |         if i == len(nodes) - 1:
152 |             print('=' * line_length)
153 |         else:
154 |             print('_' * line_length)
155 |     print('Total params: %s' % total_params)
156 |     print('_' * line_length)


--------------------------------------------------------------------------------
/deeplab/test.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Deformable Convolutional Networks
  3 | # Copyright (c) 2016 by Contributors
  4 | # Copyright (c) 2017 Microsoft
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Written by Zheng Zhang
  7 | # --------------------------------------------------------
  8 | 
  9 | import _init_paths
 10 | 
 11 | import argparse
 12 | import os
 13 | import sys
 14 | import time
 15 | import logging
 16 | from config.config import config, update_config
 17 | 
 18 | def parse_args():
 19 |     parser = argparse.ArgumentParser(description='Test a Deeplab Network')
 20 |     # general
 21 |     parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str)
 22 | 
 23 |     args, rest = parser.parse_known_args()
 24 |     update_config(args.cfg)
 25 | 
 26 |     # testing
 27 |     parser.add_argument('--vis', help='turn on visualization', action='store_true')
 28 |     parser.add_argument('--ignore_cache', help='ignore cached results boxes', action='store_true')
 29 |     parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true')
 30 |     args = parser.parse_args()
 31 |     return args
 32 | 
 33 | args = parse_args()
 34 | curr_path = os.path.abspath(os.path.dirname(__file__))
 35 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION))
 36 | 
 37 | import pprint
 38 | import mxnet as mx
 39 | 
 40 | from symbols import *
 41 | from dataset import *
 42 | from core.loader import TestDataLoader
 43 | from core.tester import Predictor, pred_eval
 44 | from utils.load_data import load_gt_segdb, merge_segdb
 45 | from utils.load_model import load_param
 46 | from utils.create_logger import create_logger
 47 | 
 48 | def test_deeplab():
 49 |     epoch = config.TEST.test_epoch
 50 |     ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
 51 |     image_set = config.dataset.test_image_set
 52 |     root_path = config.dataset.root_path
 53 |     dataset = config.dataset.dataset
 54 |     dataset_path = config.dataset.dataset_path
 55 | 
 56 |     logger, final_output_path = create_logger(config.output_path, args.cfg, image_set)
 57 |     prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix)
 58 | 
 59 |     # print config
 60 |     pprint.pprint(config)
 61 |     logger.info('testing config:{}\n'.format(pprint.pformat(config)))
 62 | 
 63 |     # load symbol and testing data
 64 |     sym_instance = eval(config.symbol + '.' + config.symbol)()
 65 |     sym = sym_instance.get_symbol(config, is_train=False)
 66 | 
 67 |     imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=final_output_path)
 68 |     segdb = imdb.gt_segdb()
 69 | 
 70 |     # get test data iter
 71 |     test_data = TestDataLoader(segdb, config=config, batch_size=len(ctx))
 72 | 
 73 |     # infer shape
 74 |     data_shape_dict = dict(test_data.provide_data_single)
 75 |     sym_instance.infer_shape(data_shape_dict)
 76 | 
 77 |     # load model and check parameters
 78 |     arg_params, aux_params = load_param(prefix, epoch, process=True)
 79 | 
 80 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)
 81 | 
 82 |     # decide maximum shape
 83 |     data_names = [k[0] for k in test_data.provide_data_single]
 84 |     label_names = ['softmax_label']
 85 |     max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
 86 | 
 87 |     # create predictor
 88 |     predictor = Predictor(sym, data_names, label_names,
 89 |                           context=ctx, max_data_shapes=max_data_shape,
 90 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
 91 |                           arg_params=arg_params, aux_params=aux_params)
 92 | 
 93 |     # start detection
 94 |     pred_eval(predictor, test_data, imdb, vis=args.vis, ignore_cache=args.ignore_cache, logger=logger)
 95 | 
 96 | def main():
 97 |     print args
 98 |     test_deeplab()
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     main()
103 | 


--------------------------------------------------------------------------------
/experiments/deeplab/cfgs/deeplab_cityscapes_demo.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | MXNET_VERSION: "mxnet"
 3 | output_path: "./output/cityscape"
 4 | symbol: resnet_v1_101_deeplab
 5 | gpus: '0'
 6 | SCALES:
 7 | - 1024
 8 | - 2048
 9 | default:
10 |   frequent: 10
11 |   kvstore: device
12 | dataset:
13 |   NUM_CLASSES: 19
14 |   dataset: CityScape
15 |   dataset_path: "./data/cityscapes/"
16 |   image_set: leftImg8bit_train
17 |   root_path: "./data/"
18 |   test_image_set: leftImg8bit_val
19 | network:
20 |   FIXED_PARAMS:
21 |   - conv1
22 |   - bn_conv1
23 |   - res2
24 |   - bn2
25 |   - gamma
26 |   - beta
27 |   FIXED_PARAMS_SHARED:
28 |   - conv1
29 |   - bn_conv1
30 |   - res2
31 |   - bn2
32 |   - res3
33 |   - bn3
34 |   - res4
35 |   - bn4
36 |   - gamma
37 |   - beta
38 |   IMAGE_STRIDE: 0
39 |   PIXEL_MEANS:
40 |   - 103.06
41 |   - 115.90
42 |   - 123.15
43 |   pretrained: "./model/pretrained_model/resnet_v1_101"
44 |   pretrained_epoch: 0
45 | TRAIN:
46 |   warmup: true
47 |   warmup_lr: 0.00005
48 |   # typically we will use 4000 warmup step for single GPU
49 |   warmup_step: 1000
50 |   begin_epoch: 0
51 |   end_epoch: 53
52 |   lr: 0.0005
53 |   lr_step: '40.336'
54 |   model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_dcn"
55 |   # whether flip image
56 |   FLIP: true
57 |   # size of images for each device
58 |   BATCH_IMAGES: 1
59 |   # wheter crop image during training
60 |   ENABLE_CROP: True
61 |   # scale of cropped image during training
62 |   CROP_HEIGHT: 768
63 |   CROP_WIDTH: 1024
64 |   # whether resume training
65 |   RESUME: false
66 |   # whether shuffle image
67 |   SHUFFLE: true
68 | TEST:
69 |   # size of images for each device
70 |   BATCH_IMAGES: 1
71 |   test_epoch: 53
72 | 


--------------------------------------------------------------------------------
/experiments/deeplab/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_base.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | MXNET_VERSION: "mxnet"
 3 | output_path: "./output/cityscape"
 4 | symbol: resnet_v1_101_deeplab
 5 | gpus: '0,1,2,3'
 6 | SCALES:
 7 | - 1024
 8 | - 2048
 9 | default:
10 |   frequent: 10
11 |   kvstore: device
12 | dataset:
13 |   NUM_CLASSES: 19
14 |   dataset: CityScape
15 |   dataset_path: "./data/cityscapes/"
16 |   image_set: leftImg8bit_train
17 |   root_path: "./data/"
18 |   test_image_set: leftImg8bit_val
19 | network:
20 |   FIXED_PARAMS:
21 |   - conv1
22 |   - bn_conv1
23 |   - res2
24 |   - bn2
25 |   - gamma
26 |   - beta
27 |   FIXED_PARAMS_SHARED:
28 |   - conv1
29 |   - bn_conv1
30 |   - res2
31 |   - bn2
32 |   - res3
33 |   - bn3
34 |   - res4
35 |   - bn4
36 |   - gamma
37 |   - beta
38 |   IMAGE_STRIDE: 0
39 |   PIXEL_MEANS:
40 |   - 103.06
41 |   - 115.90
42 |   - 123.15
43 |   pretrained: "./model/pretrained_model/resnet_v1_101"
44 |   pretrained_epoch: 0
45 | TRAIN:
46 |   warmup: true
47 |   warmup_lr: 0.00005
48 |   # typically we will use 4000 warmup step for single GPU
49 |   warmup_step: 1000
50 |   begin_epoch: 0
51 |   end_epoch: 53
52 |   lr: 0.0005
53 |   lr_step: '40.336'
54 |   model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_base"
55 |   # whether flip image
56 |   FLIP: true
57 |   # size of images for each device
58 |   BATCH_IMAGES: 1
59 |   # wheter crop image during training
60 |   ENABLE_CROP: True
61 |   # scale of cropped image during training
62 |   CROP_HEIGHT: 768
63 |   CROP_WIDTH: 1024
64 |   # whether resume training
65 |   RESUME: false
66 |   # whether shuffle image
67 |   SHUFFLE: true
68 | TEST:
69 |   # size of images for each device
70 |   BATCH_IMAGES: 1
71 |   test_epoch: 53
72 | 


--------------------------------------------------------------------------------
/experiments/deeplab/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_capsule.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | MXNET_VERSION: "mxnet"
 3 | output_path: "./output/cityscape"
 4 | symbol: resnet_v1_101_deeplab_capsule
 5 | gpus: '0,1,2,3'
 6 | SCALES:
 7 | - 1024
 8 | - 2048
 9 | default:
10 |   frequent: 10
11 |   kvstore: device
12 | dataset:
13 |   NUM_CLASSES: 19
14 |   dataset: CityScape
15 |   dataset_path: "./data/cityscapes/"
16 |   image_set: leftImg8bit_train
17 |   root_path: "./data/"
18 |   test_image_set: leftImg8bit_val
19 | network:
20 |   FIXED_PARAMS:
21 |   - conv1
22 |   - bn_conv1
23 |   - res2
24 |   - bn2
25 |   - gamma
26 |   - beta
27 |   FIXED_PARAMS_SHARED:
28 |   - conv1
29 |   - bn_conv1
30 |   - res2
31 |   - bn2
32 |   - res3
33 |   - bn3
34 |   - res4
35 |   - bn4
36 |   - gamma
37 |   - beta
38 |   IMAGE_STRIDE: 0
39 |   PIXEL_MEANS:
40 |   - 103.06
41 |   - 115.90
42 |   - 123.15
43 |   pretrained: "./model/pretrained_model/resnet_v1_101"
44 |   pretrained_epoch: 0
45 | TRAIN:
46 |   warmup: true
47 |   warmup_lr: 0.00005
48 |   # typically we will use 4000 warmup step for single GPU
49 |   warmup_step: 1000
50 |   begin_epoch: 0
51 |   end_epoch: 53
52 |   lr: 0.00005
53 |   lr_step: '40.336'
54 |   model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_capsule"
55 |   # whether flip image
56 |   FLIP: true
57 |   # size of images for each device
58 |   BATCH_IMAGES: 1
59 |   # wheter crop image during training
60 |   ENABLE_CROP: True
61 |   # scale of cropped image during training
62 |   CROP_HEIGHT: 512
63 |   CROP_WIDTH: 1024
64 |   # whether resume training
65 |   RESUME: false
66 |   # whether shuffle image
67 |   SHUFFLE: true
68 | TEST:
69 |   # size of images for each device
70 |   BATCH_IMAGES: 1
71 |   test_epoch: 53
72 | 


--------------------------------------------------------------------------------
/experiments/deeplab/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_dcn.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | MXNET_VERSION: "mxnet"
 3 | output_path: "./output/cityscape"
 4 | symbol: resnet_v1_101_deeplab_dcn
 5 | gpus: '0,1,2,3'
 6 | SCALES:
 7 | - 1024
 8 | - 2048
 9 | default:
10 |   frequent: 10
11 |   kvstore: device
12 | dataset:
13 |   NUM_CLASSES: 19
14 |   dataset: CityScape
15 |   dataset_path: "./data/cityscapes/"
16 |   image_set: leftImg8bit_train
17 |   root_path: "./data/"
18 |   test_image_set: leftImg8bit_val
19 | network:
20 |   FIXED_PARAMS:
21 |   - conv1
22 |   - bn_conv1
23 |   - res2
24 |   - bn2
25 |   - gamma
26 |   - beta
27 |   FIXED_PARAMS_SHARED:
28 |   - conv1
29 |   - bn_conv1
30 |   - res2
31 |   - bn2
32 |   - res3
33 |   - bn3
34 |   - res4
35 |   - bn4
36 |   - gamma
37 |   - beta
38 |   IMAGE_STRIDE: 0
39 |   PIXEL_MEANS:
40 |   - 103.06
41 |   - 115.90
42 |   - 123.15
43 |   pretrained: "./model/pretrained_model/resnet_v1_101"
44 |   pretrained_epoch: 0
45 | TRAIN:
46 |   warmup: true
47 |   warmup_lr: 0.00005
48 |   # typically we will use 4000 warmup step for single GPU
49 |   warmup_step: 1000
50 |   begin_epoch: 0
51 |   end_epoch: 53
52 |   lr: 0.0005
53 |   lr_step: '40.336'
54 |   model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_dcn"
55 |   # whether flip image
56 |   FLIP: true
57 |   # size of images for each device
58 |   BATCH_IMAGES: 1
59 |   # wheter crop image during training
60 |   ENABLE_CROP: True
61 |   # scale of cropped image during training
62 |   CROP_HEIGHT: 768
63 |   CROP_WIDTH: 1024
64 |   # whether resume training
65 |   RESUME: false
66 |   # whether shuffle image
67 |   SHUFFLE: true
68 | TEST:
69 |   # size of images for each device
70 |   BATCH_IMAGES: 1
71 |   test_epoch: 53
72 | 


--------------------------------------------------------------------------------
/experiments/deeplab/cfgs/deeplab_resnet_v1_101_voc12_segmentation_base.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | MXNET_VERSION: "mxnet"
 3 | output_path: "./output/voc12"
 4 | symbol: resnet_v1_101_deeplab
 5 | gpus: '0'
 6 | SCALES:
 7 | - 360
 8 | - 600
 9 | default:
10 |   frequent: 10
11 |   kvstore: device
12 | dataset:
13 |   NUM_CLASSES: 21
14 |   dataset: PascalVOC
15 |   dataset_path: "./data/VOCdevkit2012/"
16 |   image_set: 2012_train_seg
17 |   root_path: "./data/"
18 |   test_image_set: 2012_val_seg
19 | network:
20 |   FIXED_PARAMS:
21 |   - conv1
22 |   - bn_conv1
23 |   - res2
24 |   - bn2
25 |   - gamma
26 |   - beta
27 |   FIXED_PARAMS_SHARED:
28 |   - conv1
29 |   - bn_conv1
30 |   - res2
31 |   - bn2
32 |   - res3
33 |   - bn3
34 |   - res4
35 |   - bn4
36 |   - gamma
37 |   - beta
38 |   IMAGE_STRIDE: 0
39 |   PIXEL_MEANS:
40 |   - 103.06
41 |   - 115.90
42 |   - 123.15
43 |   pretrained: "./model/pretrained_model/resnet_v1_101"
44 |   pretrained_epoch: 0
45 | TRAIN:
46 |   warmup: false
47 |   warmup_lr: 0.00005
48 |   # typically we will use 4000 warmup step for single GPU
49 |   warmup_step: 1000
50 |   begin_epoch: 0
51 |   end_epoch: 12
52 |   lr: 0.0005
53 |   lr_step: '8'
54 |   model_prefix: "deeplab_resnet_v1_101_voc12_segmentation_base"
55 |   # whether flip image
56 |   FLIP: true
57 |   # size of images for each device
58 |   BATCH_IMAGES: 1
59 |   # wheter crop image during training
60 |   ENABLE_CROP: False
61 |   # scale of cropped image during training
62 |   CROP_HEIGHT: 768
63 |   CROP_WIDTH: 1024
64 |   # whether resume training
65 |   RESUME: false
66 |   # whether shuffle image
67 |   SHUFFLE: true
68 | TEST:
69 |   # size of images for each device
70 |   BATCH_IMAGES: 1
71 |   test_epoch: 12
72 | 


--------------------------------------------------------------------------------
/experiments/deeplab/cfgs/deeplab_resnet_v1_101_voc12_segmentation_dcn.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | MXNET_VERSION: "mxnet"
 3 | output_path: "./output/voc12"
 4 | symbol: resnet_v1_101_deeplab_dcn
 5 | gpus: '0'
 6 | SCALES:
 7 | - 360
 8 | - 600
 9 | default:
10 |   frequent: 10
11 |   kvstore: device
12 | dataset:
13 |   NUM_CLASSES: 21
14 |   dataset: PascalVOC
15 |   dataset_path: "./data/VOCdevkit2012/"
16 |   image_set: 2012_train_seg
17 |   root_path: "./data/"
18 |   test_image_set: 2012_val_seg
19 | network:
20 |   FIXED_PARAMS:
21 |   - conv1
22 |   - bn_conv1
23 |   - res2
24 |   - bn2
25 |   - gamma
26 |   - beta
27 |   FIXED_PARAMS_SHARED:
28 |   - conv1
29 |   - bn_conv1
30 |   - res2
31 |   - bn2
32 |   - res3
33 |   - bn3
34 |   - res4
35 |   - bn4
36 |   - gamma
37 |   - beta
38 |   IMAGE_STRIDE: 0
39 |   PIXEL_MEANS:
40 |   - 103.06
41 |   - 115.90
42 |   - 123.15
43 |   pretrained: "./model/pretrained_model/resnet_v1_101"
44 |   pretrained_epoch: 0
45 | TRAIN:
46 |   warmup: false
47 |   warmup_lr: 0.00005
48 |   # typically we will use 4000 warmup step for single GPU
49 |   warmup_step: 1000
50 |   begin_epoch: 0
51 |   end_epoch: 12
52 |   lr: 0.0005
53 |   lr_step: '8'
54 |   model_prefix: "deeplab_resnet_v1_101_voc12_segmentation_dcn"
55 |   # whether flip image
56 |   FLIP: true
57 |   # size of images for each device
58 |   BATCH_IMAGES: 1
59 |   # wheter crop image during training
60 |   ENABLE_CROP: False
61 |   # scale of cropped image during training
62 |   CROP_HEIGHT: 768
63 |   CROP_WIDTH: 1024
64 |   # whether resume training
65 |   RESUME: false
66 |   # whether shuffle image
67 |   SHUFFLE: true
68 | TEST:
69 |   # size of images for each device
70 |   BATCH_IMAGES: 1
71 |   test_epoch: 12
72 | 


--------------------------------------------------------------------------------
/experiments/deeplab/deeplab_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import os
12 | import sys
13 | os.environ['PYTHONUNBUFFERED'] = '1'
14 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
15 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
16 | this_dir = os.path.dirname(__file__)
17 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'deeplab'))
18 | 
19 | import test
20 | 
21 | if __name__ == "__main__":
22 |     test.main()
23 | 
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/experiments/deeplab/deeplab_train_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import os
12 | import sys
13 | os.environ['PYTHONUNBUFFERED'] = '1'
14 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
15 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
16 | this_dir = os.path.dirname(__file__)
17 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'deeplab'))
18 | 
19 | import train
20 | import test
21 | 
22 | if __name__ == "__main__":
23 |     train.main()
24 |     # test.main()
25 | 
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/experiments/deeplab_dff/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_video.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | MXNET_VERSION: "mxnet"
 3 | output_path: "./output/cityscape_dff_dcn"
 4 | symbol: resnet_v1_101_deeplab_video
 5 | gpus: '0,1,2,3'
 6 | SCALES:
 7 | - 1024
 8 | - 2048
 9 | default:
10 |   frequent: 10
11 |   kvstore: device
12 | dataset:
13 |   NUM_CLASSES: 19
14 |   dataset: CityScape_Video
15 |   dataset_path: "./data/cityscapes/"
16 |   image_set: leftImg8bit_train
17 |   root_path: "./data/"
18 |   test_image_set: leftImg8bit_val
19 | network:
20 |   FIXED_PARAMS:
21 |   - conv1
22 |   - bn_conv1
23 |   - res2
24 |   - bn2
25 |   - gamma
26 |   - beta
27 |   FIXED_PARAMS_SHARED:
28 |   - conv1
29 |   - bn_conv1
30 |   - res2
31 |   - bn2
32 |   - res3
33 |   - bn3
34 |   - res4
35 |   - bn4
36 |   - gamma
37 |   - beta
38 |   IMAGE_STRIDE: 0
39 |   PIXEL_MEANS:
40 |   - 103.06
41 |   - 115.90
42 |   - 123.15
43 |   pretrained: "./model/pretrained_model/resnet_v1_101"
44 |   pretrained_flow: "./model/pretrained_model/flownet"
45 |   pretrained_epoch: 0
46 | TRAIN:
47 |   warmup: true
48 |   warmup_lr: 0.00005
49 |   # typically we will use 4000 warmup step for single GPU
50 |   warmup_step: 1000
51 |   begin_epoch: 0
52 |   end_epoch: 72
53 |   lr: 0.0005
54 |   lr_step: '40.336, 60.504'
55 |   model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_video"
56 |   # whether flip image
57 |   FLIP: true
58 |   # size of images for each device
59 |   BATCH_IMAGES: 1
60 |   # wheter crop image during training
61 |   ENABLE_CROP: True
62 |   # scale of cropped image during training
63 |   CROP_HEIGHT: 768
64 |   CROP_WIDTH: 1024
65 |   # whether resume training
66 |   RESUME: false
67 |   # whether shuffle image
68 |   SHUFFLE: true
69 |   # tensorboard log dir
70 |   TB_DIR: "./output/cityscape/tensorboard"
71 | TEST:
72 |   # size of images for each device
73 |   BATCH_IMAGES: 1
74 |   OFFSET: -1
75 |   test_epoch: 72
76 | 


--------------------------------------------------------------------------------
/experiments/deeplab_dff/cfgs/deeplab_resnet_v1_101_cityscapes_segmentation_video_duc.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | MXNET_VERSION: "mxnet"
 3 | output_path: "./output/cityscape_dff_dcn"
 4 | symbol: resnet_v1_101_deeplab_video_dcn
 5 | gpus: '0,1,2,3'
 6 | SCALES:
 7 | - 1024
 8 | - 2048
 9 | default:
10 |   frequent: 10
11 |   kvstore: device
12 | dataset:
13 |   NUM_CLASSES: 19
14 |   dataset: CityScape_Video
15 |   dataset_path: "./data/cityscapes/"
16 |   image_set: leftImg8bit_train
17 |   root_path: "./data/"
18 |   test_image_set: leftImg8bit_val
19 | network:
20 |   FIXED_PARAMS:
21 |   - conv1
22 |   - bn_conv1
23 |   - res2
24 |   - bn2
25 |   - gamma
26 |   - beta
27 |   FIXED_PARAMS_SHARED:
28 |   - conv1
29 |   - bn_conv1
30 |   - res2
31 |   - bn2
32 |   - res3
33 |   - bn3
34 |   - res4
35 |   - bn4
36 |   - gamma
37 |   - beta
38 |   IMAGE_STRIDE: 0
39 |   PIXEL_MEANS:
40 |   - 103.06
41 |   - 115.90
42 |   - 123.15
43 |   pretrained: "./model/pretrained_model/resnet_v1_101"
44 |   pretrained_flow: "./model/pretrained_model/flownet"
45 |   pretrained_epoch: 0
46 | TRAIN:
47 |   warmup: true
48 |   warmup_lr: 0.00005
49 |   # typically we will use 4000 warmup step for single GPU
50 |   warmup_step: 1000
51 |   begin_epoch: 0
52 |   end_epoch: 72
53 |   lr: 0.0005
54 |   lr_step: '40.336, 60.504'
55 |   model_prefix: "deeplab_resnet_v1_101_cityscapes_segmentation_video_dcn"
56 |   # whether flip imagep
57 |   FLIP: true
58 |   # size of images for each device
59 |   BATCH_IMAGES: 1
60 |   # wheter crop image during training
61 |   ENABLE_CROP: True
62 |   # scale of cropped image during training
63 |   CROP_HEIGHT: 768
64 |   CROP_WIDTH: 1024
65 |   # whether resume training
66 |   RESUME: false
67 |   # whether shuffle image
68 |   SHUFFLE: true
69 |   # tensorboard log dir
70 |   TB_DIR: "./output/cityscape/tensorboard"
71 | TEST:
72 |   # size of images for each device
73 |   BATCH_IMAGES: 1
74 |   OFFSET: -3
75 |   test_epoch: 64
76 | 


--------------------------------------------------------------------------------
/experiments/deeplab_dff/deeplab_dff_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import os
12 | import sys
13 | os.environ['PYTHONUNBUFFERED'] = '1'
14 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
15 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
16 | this_dir = os.path.dirname(__file__)
17 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'deeplab'))
18 | 
19 | # import dff_train
20 | import dff_test
21 | 
22 | if __name__ == "__main__":
23 |     # dff_train.main()
24 |     dff_test.main()
25 | 


--------------------------------------------------------------------------------
/experiments/deeplab_dff/deeplab_dff_train.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import os
12 | import sys
13 | os.environ['PYTHONUNBUFFERED'] = '1'
14 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
15 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
16 | this_dir = os.path.dirname(__file__)
17 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'deeplab'))
18 | 
19 | import dff_train
20 | # import dff_test
21 | 
22 | if __name__ == "__main__":
23 |     dff_train.main()
24 |     # test.main()
25 | 


--------------------------------------------------------------------------------
/experiments/faster_rcnn/cfgs/resnet_v1_101_coco_trainval_rcnn_dcn_end2end.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn_dcn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: false
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 | dataset:
 52 |   NUM_CLASSES: 81
 53 |   dataset: coco
 54 |   dataset_path: "./data/coco"
 55 |   image_set: train2014+val2014
 56 |   root_path: "./data"
 57 |   test_image_set: test-dev2015
 58 |   proposal: rpn
 59 | TRAIN:
 60 |   lr: 0.0005
 61 |   lr_step: '5.333'
 62 |   warmup: true
 63 |   warmup_lr: 0.00005
 64 |   # typically we will use 8000 warmup step for single GPU for COCO
 65 |   warmup_step: 1000
 66 |   begin_epoch: 0
 67 |   end_epoch: 8
 68 |   model_prefix: 'rcnn_coco'
 69 |   # whether resume training
 70 |   RESUME: false
 71 |   # whether flip image
 72 |   FLIP: true
 73 |   # whether shuffle image
 74 |   SHUFFLE: true
 75 |   # whether use OHEM
 76 |   ENABLE_OHEM: false
 77 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 78 |   BATCH_IMAGES: 1
 79 |   # e2e changes behavior of anchor loader and metric
 80 |   END2END: true
 81 |   # group images with similar aspect ratio
 82 |   ASPECT_GROUPING: true
 83 |   # R-CNN
 84 |   # rcnn rois batch size
 85 |   BATCH_ROIS: 128
 86 |   BATCH_ROIS_OHEM: 128
 87 |   # rcnn rois sampling params
 88 |   FG_FRACTION: 0.25
 89 |   FG_THRESH: 0.5
 90 |   BG_THRESH_HI: 0.5
 91 |   BG_THRESH_LO: 0.1
 92 |   # rcnn bounding box regression params
 93 |   BBOX_REGRESSION_THRESH: 0.5
 94 |   BBOX_WEIGHTS:
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 |   - 1.0
 99 | 
100 |   # RPN anchor loader
101 |   # rpn anchors batch size
102 |   RPN_BATCH_SIZE: 256
103 |   # rpn anchors sampling params
104 |   RPN_FG_FRACTION: 0.5
105 |   RPN_POSITIVE_OVERLAP: 0.7
106 |   RPN_NEGATIVE_OVERLAP: 0.3
107 |   RPN_CLOBBER_POSITIVES: false
108 |   # rpn bounding box regression params
109 |   RPN_BBOX_WEIGHTS:
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   - 1.0
114 |   RPN_POSITIVE_WEIGHT: -1.0
115 |   # used for end2end training
116 |   # RPN proposal
117 |   CXX_PROPOSAL: false
118 |   RPN_NMS_THRESH: 0.7
119 |   RPN_PRE_NMS_TOP_N: 6000
120 |   RPN_POST_NMS_TOP_N: 300
121 |   RPN_MIN_SIZE: 0
122 |   # approximate bounding box regression
123 |   BBOX_NORMALIZATION_PRECOMPUTED: true
124 |   BBOX_MEANS:
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   - 0.0
129 |   BBOX_STDS:
130 |   - 0.1
131 |   - 0.1
132 |   - 0.2
133 |   - 0.2
134 | TEST:
135 |   # use rpn to generate proposal
136 |   HAS_RPN: true
137 |   # size of images for each device
138 |   BATCH_IMAGES: 1
139 |   # RPN proposal
140 |   CXX_PROPOSAL: false
141 |   RPN_NMS_THRESH: 0.7
142 |   RPN_PRE_NMS_TOP_N: 6000
143 |   RPN_POST_NMS_TOP_N: 300
144 |   RPN_MIN_SIZE: 0
145 |   # RPN generate proposal
146 |   PROPOSAL_NMS_THRESH: 0.7
147 |   PROPOSAL_PRE_NMS_TOP_N: 20000
148 |   PROPOSAL_POST_NMS_TOP_N: 2000
149 |   PROPOSAL_MIN_SIZE: 0
150 |   # RCNN nms
151 |   NMS: 0.3
152 |   test_epoch: 8
153 |   max_per_image: 100
154 | 
155 | 


--------------------------------------------------------------------------------
/experiments/faster_rcnn/cfgs/resnet_v1_101_coco_trainval_rcnn_end2end.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: false
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 | dataset:
 52 |   NUM_CLASSES: 81
 53 |   dataset: coco
 54 |   dataset_path: "./data/coco"
 55 |   image_set: train2014+val2014
 56 |   root_path: "./data"
 57 |   test_image_set: test-dev2015
 58 |   proposal: rpn
 59 | TRAIN:
 60 |   lr: 0.0005
 61 |   lr_step: '5.333'
 62 |   warmup: true
 63 |   warmup_lr: 0.00005
 64 |   # typically we will use 8000 warmup step for single GPU for COCO
 65 |   warmup_step: 1000
 66 |   begin_epoch: 0
 67 |   end_epoch: 8
 68 |   model_prefix: 'rcnn_coco'
 69 |   # whether resume training
 70 |   RESUME: false
 71 |   # whether flip image
 72 |   FLIP: true
 73 |   # whether shuffle image
 74 |   SHUFFLE: true
 75 |   # whether use OHEM
 76 |   ENABLE_OHEM: false
 77 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 78 |   BATCH_IMAGES: 1
 79 |   # e2e changes behavior of anchor loader and metric
 80 |   END2END: true
 81 |   # group images with similar aspect ratio
 82 |   ASPECT_GROUPING: true
 83 |   # R-CNN
 84 |   # rcnn rois batch size
 85 |   BATCH_ROIS: 128
 86 |   BATCH_ROIS_OHEM: 128
 87 |   # rcnn rois sampling params
 88 |   FG_FRACTION: 0.25
 89 |   FG_THRESH: 0.5
 90 |   BG_THRESH_HI: 0.5
 91 |   BG_THRESH_LO: 0.1
 92 |   # rcnn bounding box regression params
 93 |   BBOX_REGRESSION_THRESH: 0.5
 94 |   BBOX_WEIGHTS:
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 |   - 1.0
 99 | 
100 |   # RPN anchor loader
101 |   # rpn anchors batch size
102 |   RPN_BATCH_SIZE: 256
103 |   # rpn anchors sampling params
104 |   RPN_FG_FRACTION: 0.5
105 |   RPN_POSITIVE_OVERLAP: 0.7
106 |   RPN_NEGATIVE_OVERLAP: 0.3
107 |   RPN_CLOBBER_POSITIVES: false
108 |   # rpn bounding box regression params
109 |   RPN_BBOX_WEIGHTS:
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   - 1.0
114 |   RPN_POSITIVE_WEIGHT: -1.0
115 |   # used for end2end training
116 |   # RPN proposal
117 |   CXX_PROPOSAL: false
118 |   RPN_NMS_THRESH: 0.7
119 |   RPN_PRE_NMS_TOP_N: 6000
120 |   RPN_POST_NMS_TOP_N: 300
121 |   RPN_MIN_SIZE: 0
122 |   # approximate bounding box regression
123 |   BBOX_NORMALIZATION_PRECOMPUTED: true
124 |   BBOX_MEANS:
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   - 0.0
129 |   BBOX_STDS:
130 |   - 0.1
131 |   - 0.1
132 |   - 0.2
133 |   - 0.2
134 | TEST:
135 |   # use rpn to generate proposal
136 |   HAS_RPN: true
137 |   # size of images for each device
138 |   BATCH_IMAGES: 1
139 |   # RPN proposal
140 |   CXX_PROPOSAL: false
141 |   RPN_NMS_THRESH: 0.7
142 |   RPN_PRE_NMS_TOP_N: 6000
143 |   RPN_POST_NMS_TOP_N: 300
144 |   RPN_MIN_SIZE: 0
145 |   # RPN generate proposal
146 |   PROPOSAL_NMS_THRESH: 0.7
147 |   PROPOSAL_PRE_NMS_TOP_N: 20000
148 |   PROPOSAL_POST_NMS_TOP_N: 2000
149 |   PROPOSAL_MIN_SIZE: 0
150 |   # RCNN nms
151 |   NMS: 0.3
152 |   test_epoch: 8
153 |   max_per_image: 100
154 | 
155 | 


--------------------------------------------------------------------------------
/experiments/faster_rcnn/cfgs/resnet_v1_101_voc0712_rcnn_dcn_end2end.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rcnn/voc"
  4 | symbol: resnet_v1_101_rcnn_dcn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: false
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 8
 47 |   - 16
 48 |   - 32
 49 |   NUM_ANCHORS: 9
 50 | dataset:
 51 |   NUM_CLASSES: 21
 52 |   dataset: PascalVOC
 53 |   dataset_path: "./data/VOCdevkit"
 54 |   image_set: 2007_trainval+2012_trainval
 55 |   root_path: "./data"
 56 |   test_image_set: 2007_test
 57 |   proposal: rpn
 58 | TRAIN:
 59 |   lr: 0.0005
 60 |   lr_step: '4.83'
 61 |   warmup: true
 62 |   warmup_lr: 0.00005
 63 |   # typically we will use 4000 warmup step for single GPU on VOC
 64 |   warmup_step: 1000
 65 |   begin_epoch: 0
 66 |   end_epoch: 7
 67 |   model_prefix: 'rcnn_voc'
 68 |   # whether resume training
 69 |   RESUME: false
 70 |   # whether flip image
 71 |   FLIP: true
 72 |   # whether shuffle image
 73 |   SHUFFLE: true
 74 |   # whether use OHEM
 75 |   ENABLE_OHEM: false
 76 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 77 |   BATCH_IMAGES: 1
 78 |   # e2e changes behavior of anchor loader and metric
 79 |   END2END: true
 80 |   # group images with similar aspect ratio
 81 |   ASPECT_GROUPING: true
 82 |   # R-CNN
 83 |   # rcnn rois batch size
 84 |   BATCH_ROIS: 128
 85 |   BATCH_ROIS_OHEM: 128
 86 |   # rcnn rois sampling params
 87 |   FG_FRACTION: 0.25
 88 |   FG_THRESH: 0.5
 89 |   BG_THRESH_HI: 0.5
 90 |   BG_THRESH_LO: 0.1
 91 |   # rcnn bounding box regression params
 92 |   BBOX_REGRESSION_THRESH: 0.5
 93 |   BBOX_WEIGHTS:
 94 |   - 1.0
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 | 
 99 |   # RPN anchor loader
100 |   # rpn anchors batch size
101 |   RPN_BATCH_SIZE: 256
102 |   # rpn anchors sampling params
103 |   RPN_FG_FRACTION: 0.5
104 |   RPN_POSITIVE_OVERLAP: 0.7
105 |   RPN_NEGATIVE_OVERLAP: 0.3
106 |   RPN_CLOBBER_POSITIVES: false
107 |   # rpn bounding box regression params
108 |   RPN_BBOX_WEIGHTS:
109 |   - 1.0
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   RPN_POSITIVE_WEIGHT: -1.0
114 |   # used for end2end training
115 |   # RPN proposal
116 |   CXX_PROPOSAL: false
117 |   RPN_NMS_THRESH: 0.7
118 |   RPN_PRE_NMS_TOP_N: 6000
119 |   RPN_POST_NMS_TOP_N: 300
120 |   RPN_MIN_SIZE: 0
121 |   # approximate bounding box regression
122 |   BBOX_NORMALIZATION_PRECOMPUTED: true
123 |   BBOX_MEANS:
124 |   - 0.0
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   BBOX_STDS:
129 |   - 0.1
130 |   - 0.1
131 |   - 0.2
132 |   - 0.2
133 | TEST:
134 |   # use rpn to generate proposal
135 |   HAS_RPN: true
136 |   # size of images for each device
137 |   BATCH_IMAGES: 1
138 |   # RPN proposal
139 |   CXX_PROPOSAL: false
140 |   RPN_NMS_THRESH: 0.7
141 |   RPN_PRE_NMS_TOP_N: 6000
142 |   RPN_POST_NMS_TOP_N: 300
143 |   RPN_MIN_SIZE: 0
144 |   # RPN generate proposal
145 |   PROPOSAL_NMS_THRESH: 0.7
146 |   PROPOSAL_PRE_NMS_TOP_N: 20000
147 |   PROPOSAL_POST_NMS_TOP_N: 2000
148 |   PROPOSAL_MIN_SIZE: 0
149 |   # RCNN nms
150 |   NMS: 0.3
151 |   test_epoch: 7
152 | 
153 | 


--------------------------------------------------------------------------------
/experiments/faster_rcnn/cfgs/resnet_v1_101_voc0712_rcnn_end2end.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rcnn/voc"
  4 | symbol: resnet_v1_101_rcnn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: false
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 8
 47 |   - 16
 48 |   - 32
 49 |   NUM_ANCHORS: 9
 50 | dataset:
 51 |   NUM_CLASSES: 21
 52 |   dataset: PascalVOC
 53 |   dataset_path: "./data/VOCdevkit"
 54 |   image_set: 2007_trainval+2012_trainval
 55 |   root_path: "./data"
 56 |   test_image_set: 2007_test
 57 |   proposal: rpn
 58 | TRAIN:
 59 |   lr: 0.0005
 60 |   lr_step: '4.83'
 61 |   warmup: true
 62 |   warmup_lr: 0.00005
 63 |   # typically we will use 4000 warmup step for single GPU on VOC
 64 |   warmup_step: 1000
 65 |   begin_epoch: 0
 66 |   end_epoch: 7
 67 |   model_prefix: 'rcnn_voc'
 68 |   # whether resume training
 69 |   RESUME: false
 70 |   # whether flip image
 71 |   FLIP: true
 72 |   # whether shuffle image
 73 |   SHUFFLE: true
 74 |   # whether use OHEM
 75 |   ENABLE_OHEM: false
 76 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 77 |   BATCH_IMAGES: 1
 78 |   # e2e changes behavior of anchor loader and metric
 79 |   END2END: true
 80 |   # group images with similar aspect ratio
 81 |   ASPECT_GROUPING: true
 82 |   # R-CNN
 83 |   # rcnn rois batch size
 84 |   BATCH_ROIS: 128
 85 |   BATCH_ROIS_OHEM: 128
 86 |   # rcnn rois sampling params
 87 |   FG_FRACTION: 0.25
 88 |   FG_THRESH: 0.5
 89 |   BG_THRESH_HI: 0.5
 90 |   BG_THRESH_LO: 0.1
 91 |   # rcnn bounding box regression params
 92 |   BBOX_REGRESSION_THRESH: 0.5
 93 |   BBOX_WEIGHTS:
 94 |   - 1.0
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 | 
 99 |   # RPN anchor loader
100 |   # rpn anchors batch size
101 |   RPN_BATCH_SIZE: 256
102 |   # rpn anchors sampling params
103 |   RPN_FG_FRACTION: 0.5
104 |   RPN_POSITIVE_OVERLAP: 0.7
105 |   RPN_NEGATIVE_OVERLAP: 0.3
106 |   RPN_CLOBBER_POSITIVES: false
107 |   # rpn bounding box regression params
108 |   RPN_BBOX_WEIGHTS:
109 |   - 1.0
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   RPN_POSITIVE_WEIGHT: -1.0
114 |   # used for end2end training
115 |   # RPN proposal
116 |   CXX_PROPOSAL: false
117 |   RPN_NMS_THRESH: 0.7
118 |   RPN_PRE_NMS_TOP_N: 6000
119 |   RPN_POST_NMS_TOP_N: 300
120 |   RPN_MIN_SIZE: 0
121 |   # approximate bounding box regression
122 |   BBOX_NORMALIZATION_PRECOMPUTED: true
123 |   BBOX_MEANS:
124 |   - 0.0
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   BBOX_STDS:
129 |   - 0.1
130 |   - 0.1
131 |   - 0.2
132 |   - 0.2
133 | TEST:
134 |   # use rpn to generate proposal
135 |   HAS_RPN: true
136 |   # size of images for each device
137 |   BATCH_IMAGES: 1
138 |   # RPN proposal
139 |   CXX_PROPOSAL: false
140 |   RPN_NMS_THRESH: 0.7
141 |   RPN_PRE_NMS_TOP_N: 6000
142 |   RPN_POST_NMS_TOP_N: 300
143 |   RPN_MIN_SIZE: 0
144 |   # RPN generate proposal
145 |   PROPOSAL_NMS_THRESH: 0.7
146 |   PROPOSAL_PRE_NMS_TOP_N: 20000
147 |   PROPOSAL_POST_NMS_TOP_N: 2000
148 |   PROPOSAL_MIN_SIZE: 0
149 |   # RCNN nms
150 |   NMS: 0.3
151 |   test_epoch: 7
152 | 
153 | 


--------------------------------------------------------------------------------
/experiments/faster_rcnn/rcnn_end2end_train_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Modified by Guodong Zhang
 6 | # --------------------------------------------------------
 7 | import os
 8 | import sys
 9 | os.environ['PYTHONUNBUFFERED'] = '1'
10 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
11 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
12 | #os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine'
13 | this_dir = os.path.dirname(__file__)
14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'faster_rcnn'))
15 | 
16 | import train_end2end
17 | import test
18 | 
19 | if __name__ == "__main__":
20 |     train_end2end.main()
21 |     test.main()
22 | 
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/experiments/faster_rcnn/rcnn_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Modified by Guodong Zhang
 6 | # --------------------------------------------------------
 7 | 
 8 | import os
 9 | import sys
10 | os.environ['PYTHONUNBUFFERED'] = '1'
11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
13 | this_dir = os.path.dirname(__file__)
14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'faster_rcnn'))
15 | 
16 | import test
17 | 
18 | if __name__ == "__main__":
19 |     test.main()
20 | 


--------------------------------------------------------------------------------
/experiments/faster_rcnn/rcnn_train_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Modified by Guodong Zhang
 6 | # --------------------------------------------------------
 7 | 
 8 | import os
 9 | import sys
10 | os.environ['PYTHONUNBUFFERED'] = '1'
11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
13 | this_dir = os.path.dirname(__file__)
14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'faster_rcnn'))
15 | 
16 | import train_rcnn
17 | import test
18 | 
19 | if __name__ == "__main__":
20 |     train_rcnn.main()
21 |     test.main()
22 | 
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/experiments/rfcn/cfgs/deform_conv_demo.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rfcn"
  4 | symbol: deform_conv_demo
  5 | gpus: '0'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 8
 47 |   - 16
 48 |   - 32
 49 |   NUM_ANCHORS: 9
 50 | dataset:
 51 |   NUM_CLASSES: 21
 52 |   dataset: PascalVOC
 53 |   dataset_path: "./data/VOCdevkit"
 54 |   image_set: 2007_trainval+2012_trainval
 55 |   root_path: "./data"
 56 |   test_image_set: 2007_test
 57 |   proposal: rpn
 58 | TRAIN:
 59 |   lr: 0.0005
 60 |   lr_step: '4.83'
 61 |   warmup: true
 62 |   warmup_lr: 0.00005
 63 |   # typically we will use 4000 warmup step for single GPU on VOC
 64 |   warmup_step: 1000
 65 |   begin_epoch: 0
 66 |   end_epoch: 8
 67 |   model_prefix: 'rfcn_voc'
 68 |   # whether resume training
 69 |   RESUME: false
 70 |   # whether flip image
 71 |   FLIP: true
 72 |   # whether shuffle image
 73 |   SHUFFLE: true
 74 |   # whether use OHEM
 75 |   ENABLE_OHEM: true
 76 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 77 |   BATCH_IMAGES: 1
 78 |   # e2e changes behavior of anchor loader and metric
 79 |   END2END: true
 80 |   # group images with similar aspect ratio
 81 |   ASPECT_GROUPING: true
 82 |   # R-CNN
 83 |   # rcnn rois batch size
 84 |   BATCH_ROIS: -1
 85 |   BATCH_ROIS_OHEM: 128
 86 |   # rcnn rois sampling params
 87 |   FG_FRACTION: 0.25
 88 |   FG_THRESH: 0.5
 89 |   BG_THRESH_HI: 0.5
 90 |   BG_THRESH_LO: 0.0
 91 |   # rcnn bounding box regression params
 92 |   BBOX_REGRESSION_THRESH: 0.5
 93 |   BBOX_WEIGHTS:
 94 |   - 1.0
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 | 
 99 |   # RPN anchor loader
100 |   # rpn anchors batch size
101 |   RPN_BATCH_SIZE: 256
102 |   # rpn anchors sampling params
103 |   RPN_FG_FRACTION: 0.5
104 |   RPN_POSITIVE_OVERLAP: 0.7
105 |   RPN_NEGATIVE_OVERLAP: 0.3
106 |   RPN_CLOBBER_POSITIVES: false
107 |   # rpn bounding box regression params
108 |   RPN_BBOX_WEIGHTS:
109 |   - 1.0
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   RPN_POSITIVE_WEIGHT: -1.0
114 |   # used for end2end training
115 |   # RPN proposal
116 |   CXX_PROPOSAL: false
117 |   RPN_NMS_THRESH: 0.7
118 |   RPN_PRE_NMS_TOP_N: 6000
119 |   RPN_POST_NMS_TOP_N: 300
120 |   RPN_MIN_SIZE: 0
121 |   # approximate bounding box regression
122 |   BBOX_NORMALIZATION_PRECOMPUTED: true
123 |   BBOX_MEANS:
124 |   - 0.0
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   BBOX_STDS:
129 |   - 0.1
130 |   - 0.1
131 |   - 0.2
132 |   - 0.2
133 | TEST:
134 |   # use rpn to generate proposal
135 |   HAS_RPN: true
136 |   # size of images for each device
137 |   BATCH_IMAGES: 1
138 |   # RPN proposal
139 |   CXX_PROPOSAL: false
140 |   RPN_NMS_THRESH: 0.7
141 |   RPN_PRE_NMS_TOP_N: 6000
142 |   RPN_POST_NMS_TOP_N: 300
143 |   RPN_MIN_SIZE: 0
144 |   # RPN generate proposal
145 |   PROPOSAL_NMS_THRESH: 0.7
146 |   PROPOSAL_PRE_NMS_TOP_N: 20000
147 |   PROPOSAL_POST_NMS_TOP_N: 2000
148 |   PROPOSAL_MIN_SIZE: 0
149 |   # RCNN nms
150 |   NMS: 0.3
151 |   test_epoch: 7
152 | 
153 | 


--------------------------------------------------------------------------------
/experiments/rfcn/cfgs/deform_psroi_demo.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rfcn"
  4 | symbol: deform_psroi_demo
  5 | gpus: '0'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 8
 47 |   - 16
 48 |   - 32
 49 |   NUM_ANCHORS: 9
 50 | dataset:
 51 |   NUM_CLASSES: 21
 52 |   dataset: PascalVOC
 53 |   dataset_path: "./data/VOCdevkit"
 54 |   image_set: 2007_trainval+2012_trainval
 55 |   root_path: "./data"
 56 |   test_image_set: 2007_test
 57 |   proposal: selective_search
 58 | TRAIN:
 59 |   lr: 0.0005
 60 |   lr_step: '4.83'
 61 |   warmup: true
 62 |   warmup_lr: 0.00005
 63 |   # typically we will use 4000 warmup step for single GPU on VOC
 64 |   warmup_step: 1000
 65 |   begin_epoch: 0
 66 |   end_epoch: 8
 67 |   model_prefix: 'rfcn_voc'
 68 |   # whether resume training
 69 |   RESUME: false
 70 |   # whether flip image
 71 |   FLIP: true
 72 |   # whether shuffle image
 73 |   SHUFFLE: true
 74 |   # whether use OHEM
 75 |   ENABLE_OHEM: true
 76 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 77 |   BATCH_IMAGES: 1
 78 |   # e2e changes behavior of anchor loader and metric
 79 |   END2END: false
 80 |   # group images with similar aspect ratio
 81 |   ASPECT_GROUPING: true
 82 |   # R-CNN
 83 |   # rcnn rois batch size
 84 |   BATCH_ROIS: -1
 85 |   BATCH_ROIS_OHEM: 128
 86 |   # rcnn rois sampling params
 87 |   FG_FRACTION: 0.25
 88 |   FG_THRESH: 0.5
 89 |   BG_THRESH_HI: 0.5
 90 |   BG_THRESH_LO: 0.0
 91 |   # rcnn bounding box regression params
 92 |   BBOX_REGRESSION_THRESH: 0.5
 93 |   BBOX_WEIGHTS:
 94 |   - 1.0
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 | 
 99 |   # RPN anchor loader
100 |   # rpn anchors batch size
101 |   RPN_BATCH_SIZE: 256
102 |   # rpn anchors sampling params
103 |   RPN_FG_FRACTION: 0.5
104 |   RPN_POSITIVE_OVERLAP: 0.7
105 |   RPN_NEGATIVE_OVERLAP: 0.3
106 |   RPN_CLOBBER_POSITIVES: false
107 |   # rpn bounding box regression params
108 |   RPN_BBOX_WEIGHTS:
109 |   - 1.0
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   RPN_POSITIVE_WEIGHT: -1.0
114 |   # used for end2end training
115 |   # RPN proposal
116 |   CXX_PROPOSAL: false
117 |   RPN_NMS_THRESH: 0.7
118 |   RPN_PRE_NMS_TOP_N: 6000
119 |   RPN_POST_NMS_TOP_N: 300
120 |   RPN_MIN_SIZE: 0
121 |   # approximate bounding box regression
122 |   BBOX_NORMALIZATION_PRECOMPUTED: true
123 |   BBOX_MEANS:
124 |   - 0.0
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   BBOX_STDS:
129 |   - 0.1
130 |   - 0.1
131 |   - 0.2
132 |   - 0.2
133 | TEST:
134 |   # use rpn to generate proposal
135 |   HAS_RPN: false
136 |   # size of images for each device
137 |   BATCH_IMAGES: 1
138 |   # RPN proposal
139 |   CXX_PROPOSAL: false
140 |   RPN_NMS_THRESH: 0.7
141 |   RPN_PRE_NMS_TOP_N: 6000
142 |   RPN_POST_NMS_TOP_N: 300
143 |   RPN_MIN_SIZE: 0
144 |   # RPN generate proposal
145 |   PROPOSAL_NMS_THRESH: 0.7
146 |   PROPOSAL_PRE_NMS_TOP_N: 20000
147 |   PROPOSAL_POST_NMS_TOP_N: 2000
148 |   PROPOSAL_MIN_SIZE: 0
149 |   # RCNN nms
150 |   NMS: 0.3
151 |   test_epoch: 7
152 | 
153 | 


--------------------------------------------------------------------------------
/experiments/rfcn/cfgs/resnet_v1_101_coco_trainval_rfcn_dcn_end2end_ohem.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rfcn_dcn/coco"
  4 | symbol: resnet_v1_101_rfcn_dcn
  5 | gpus: '0,1,2,3,4,5,6,7'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 | dataset:
 52 |   NUM_CLASSES: 81
 53 |   dataset: coco
 54 |   dataset_path: "./data/coco"
 55 |   image_set: train2014+val2014
 56 |   root_path: "./data"
 57 |   test_image_set: test-dev2015
 58 |   proposal: rpn
 59 | TRAIN:
 60 |   lr: 0.0005
 61 |   lr_step: '5.333'
 62 |   warmup: true
 63 |   warmup_lr: 0.00005
 64 |   # typically we will use 8000 warmup step for single GPU for COCO
 65 |   warmup_step: 1000
 66 |   begin_epoch: 0
 67 |   end_epoch: 8
 68 |   model_prefix: 'rfcn_dcn_coco'
 69 |   # whether resume training
 70 |   RESUME: false
 71 |   # whether flip image
 72 |   FLIP: true
 73 |   # whether shuffle image
 74 |   SHUFFLE: true
 75 |   # whether use OHEM
 76 |   ENABLE_OHEM: true
 77 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 78 |   BATCH_IMAGES: 1
 79 |   # e2e changes behavior of anchor loader and metric
 80 |   END2END: true
 81 |   # group images with similar aspect ratio
 82 |   ASPECT_GROUPING: true
 83 |   # R-CNN
 84 |   # rcnn rois batch size
 85 |   BATCH_ROIS: -1
 86 |   BATCH_ROIS_OHEM: 128
 87 |   # rcnn rois sampling params
 88 |   FG_FRACTION: 0.25
 89 |   FG_THRESH: 0.5
 90 |   BG_THRESH_HI: 0.5
 91 |   BG_THRESH_LO: 0.0
 92 |   # rcnn bounding box regression params
 93 |   BBOX_REGRESSION_THRESH: 0.5
 94 |   BBOX_WEIGHTS:
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 |   - 1.0
 99 | 
100 |   # RPN anchor loader
101 |   # rpn anchors batch size
102 |   RPN_BATCH_SIZE: 256
103 |   # rpn anchors sampling params
104 |   RPN_FG_FRACTION: 0.5
105 |   RPN_POSITIVE_OVERLAP: 0.7
106 |   RPN_NEGATIVE_OVERLAP: 0.3
107 |   RPN_CLOBBER_POSITIVES: false
108 |   # rpn bounding box regression params
109 |   RPN_BBOX_WEIGHTS:
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   - 1.0
114 |   RPN_POSITIVE_WEIGHT: -1.0
115 |   # used for end2end training
116 |   # RPN proposal
117 |   CXX_PROPOSAL: false
118 |   RPN_NMS_THRESH: 0.7
119 |   RPN_PRE_NMS_TOP_N: 6000
120 |   RPN_POST_NMS_TOP_N: 300
121 |   RPN_MIN_SIZE: 0
122 |   # approximate bounding box regression
123 |   BBOX_NORMALIZATION_PRECOMPUTED: true
124 |   BBOX_MEANS:
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   - 0.0
129 |   BBOX_STDS:
130 |   - 0.1
131 |   - 0.1
132 |   - 0.2
133 |   - 0.2
134 | TEST:
135 |   # use rpn to generate proposal
136 |   HAS_RPN: true
137 |   # size of images for each device
138 |   BATCH_IMAGES: 1
139 |   # RPN proposal
140 |   CXX_PROPOSAL: false
141 |   RPN_NMS_THRESH: 0.7
142 |   RPN_PRE_NMS_TOP_N: 6000
143 |   RPN_POST_NMS_TOP_N: 300
144 |   RPN_MIN_SIZE: 0
145 |   # RPN generate proposal
146 |   PROPOSAL_NMS_THRESH: 0.7
147 |   PROPOSAL_PRE_NMS_TOP_N: 20000
148 |   PROPOSAL_POST_NMS_TOP_N: 2000
149 |   PROPOSAL_MIN_SIZE: 0
150 |   # RCNN nms
151 |   NMS: 0.3
152 |   test_epoch: 8
153 |   max_per_image: 100
154 | 
155 | 


--------------------------------------------------------------------------------
/experiments/rfcn/cfgs/resnet_v1_101_coco_trainval_rfcn_end2end_ohem.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rfcn/coco"
  4 | symbol: resnet_v1_101_rfcn
  5 | gpus: '0,1,2,3,4,5,6,7'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 | dataset:
 52 |   NUM_CLASSES: 81
 53 |   dataset: coco
 54 |   dataset_path: "./data/coco"
 55 |   image_set: train2014+val2014
 56 |   root_path: "./data"
 57 |   test_image_set: test-dev2015
 58 |   proposal: rpn
 59 | TRAIN:
 60 |   lr: 0.0005
 61 |   lr_step: '5.333'
 62 |   warmup: true
 63 |   warmup_lr: 0.00005
 64 |   # typically we will use 8000 warmup step for single GPU for COCO
 65 |   warmup_step: 1000
 66 |   begin_epoch: 0
 67 |   end_epoch: 8
 68 |   model_prefix: 'rfcn_coco'
 69 |   # whether resume training
 70 |   RESUME: false
 71 |   # whether flip image
 72 |   FLIP: true
 73 |   # whether shuffle image
 74 |   SHUFFLE: true
 75 |   # whether use OHEM
 76 |   ENABLE_OHEM: true
 77 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 78 |   BATCH_IMAGES: 1
 79 |   # e2e changes behavior of anchor loader and metric
 80 |   END2END: true
 81 |   # group images with similar aspect ratio
 82 |   ASPECT_GROUPING: true
 83 |   # R-CNN
 84 |   # rcnn rois batch size
 85 |   BATCH_ROIS: -1
 86 |   BATCH_ROIS_OHEM: 128
 87 |   # rcnn rois sampling params
 88 |   FG_FRACTION: 0.25
 89 |   FG_THRESH: 0.5
 90 |   BG_THRESH_HI: 0.5
 91 |   BG_THRESH_LO: 0.0
 92 |   # rcnn bounding box regression params
 93 |   BBOX_REGRESSION_THRESH: 0.5
 94 |   BBOX_WEIGHTS:
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 |   - 1.0
 99 | 
100 |   # RPN anchor loader
101 |   # rpn anchors batch size
102 |   RPN_BATCH_SIZE: 256
103 |   # rpn anchors sampling params
104 |   RPN_FG_FRACTION: 0.5
105 |   RPN_POSITIVE_OVERLAP: 0.7
106 |   RPN_NEGATIVE_OVERLAP: 0.3
107 |   RPN_CLOBBER_POSITIVES: false
108 |   # rpn bounding box regression params
109 |   RPN_BBOX_WEIGHTS:
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   - 1.0
114 |   RPN_POSITIVE_WEIGHT: -1.0
115 |   # used for end2end training
116 |   # RPN proposal
117 |   CXX_PROPOSAL: false
118 |   RPN_NMS_THRESH: 0.7
119 |   RPN_PRE_NMS_TOP_N: 6000
120 |   RPN_POST_NMS_TOP_N: 300
121 |   RPN_MIN_SIZE: 0
122 |   # approximate bounding box regression
123 |   BBOX_NORMALIZATION_PRECOMPUTED: true
124 |   BBOX_MEANS:
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   - 0.0
129 |   BBOX_STDS:
130 |   - 0.1
131 |   - 0.1
132 |   - 0.2
133 |   - 0.2
134 | TEST:
135 |   # use rpn to generate proposal
136 |   HAS_RPN: true
137 |   # size of images for each device
138 |   BATCH_IMAGES: 1
139 |   # RPN proposal
140 |   CXX_PROPOSAL: false
141 |   RPN_NMS_THRESH: 0.7
142 |   RPN_PRE_NMS_TOP_N: 6000
143 |   RPN_POST_NMS_TOP_N: 300
144 |   RPN_MIN_SIZE: 0
145 |   # RPN generate proposal
146 |   PROPOSAL_NMS_THRESH: 0.7
147 |   PROPOSAL_PRE_NMS_TOP_N: 20000
148 |   PROPOSAL_POST_NMS_TOP_N: 2000
149 |   PROPOSAL_MIN_SIZE: 0
150 |   # RCNN nms
151 |   NMS: 0.3
152 |   test_epoch: 8
153 |   max_per_image: 100
154 | 
155 | 


--------------------------------------------------------------------------------
/experiments/rfcn/cfgs/resnet_v1_101_voc0712_rfcn_dcn_end2end_ohem.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rfcn_dcn/voc"
  4 | symbol: resnet_v1_101_rfcn_dcn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 8
 47 |   - 16
 48 |   - 32
 49 |   NUM_ANCHORS: 9
 50 | dataset:
 51 |   NUM_CLASSES: 21
 52 |   dataset: PascalVOC
 53 |   dataset_path: "./data/VOCdevkit"
 54 |   image_set: 2007_trainval+2012_trainval
 55 |   root_path: "./data"
 56 |   test_image_set: 2007_test
 57 |   proposal: rpn
 58 | TRAIN:
 59 |   lr: 0.0005
 60 |   lr_step: '4.83'
 61 |   warmup: true
 62 |   warmup_lr: 0.00005
 63 |   # typically we will use 4000 warmup step for single GPU on VOC
 64 |   warmup_step: 1000
 65 |   begin_epoch: 0
 66 |   end_epoch: 7
 67 |   model_prefix: 'rfcn_voc'
 68 |   # whether resume training
 69 |   RESUME: false
 70 |   # whether flip image
 71 |   FLIP: true
 72 |   # whether shuffle image
 73 |   SHUFFLE: true
 74 |   # whether use OHEM
 75 |   ENABLE_OHEM: true
 76 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 77 |   BATCH_IMAGES: 1
 78 |   # e2e changes behavior of anchor loader and metric
 79 |   END2END: true
 80 |   # group images with similar aspect ratio
 81 |   ASPECT_GROUPING: true
 82 |   # R-CNN
 83 |   # rcnn rois batch size
 84 |   BATCH_ROIS: -1
 85 |   BATCH_ROIS_OHEM: 128
 86 |   # rcnn rois sampling params
 87 |   FG_FRACTION: 0.25
 88 |   FG_THRESH: 0.5
 89 |   BG_THRESH_HI: 0.5
 90 |   BG_THRESH_LO: 0.0
 91 |   # rcnn bounding box regression params
 92 |   BBOX_REGRESSION_THRESH: 0.5
 93 |   BBOX_WEIGHTS:
 94 |   - 1.0
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 | 
 99 |   # RPN anchor loader
100 |   # rpn anchors batch size
101 |   RPN_BATCH_SIZE: 256
102 |   # rpn anchors sampling params
103 |   RPN_FG_FRACTION: 0.5
104 |   RPN_POSITIVE_OVERLAP: 0.7
105 |   RPN_NEGATIVE_OVERLAP: 0.3
106 |   RPN_CLOBBER_POSITIVES: false
107 |   # rpn bounding box regression params
108 |   RPN_BBOX_WEIGHTS:
109 |   - 1.0
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   RPN_POSITIVE_WEIGHT: -1.0
114 |   # used for end2end training
115 |   # RPN proposal
116 |   CXX_PROPOSAL: false
117 |   RPN_NMS_THRESH: 0.7
118 |   RPN_PRE_NMS_TOP_N: 6000
119 |   RPN_POST_NMS_TOP_N: 300
120 |   RPN_MIN_SIZE: 0
121 |   # approximate bounding box regression
122 |   BBOX_NORMALIZATION_PRECOMPUTED: true
123 |   BBOX_MEANS:
124 |   - 0.0
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   BBOX_STDS:
129 |   - 0.1
130 |   - 0.1
131 |   - 0.2
132 |   - 0.2
133 | TEST:
134 |   # use rpn to generate proposal
135 |   HAS_RPN: true
136 |   # size of images for each device
137 |   BATCH_IMAGES: 1
138 |   # RPN proposal
139 |   CXX_PROPOSAL: false
140 |   RPN_NMS_THRESH: 0.7
141 |   RPN_PRE_NMS_TOP_N: 6000
142 |   RPN_POST_NMS_TOP_N: 300
143 |   RPN_MIN_SIZE: 0
144 |   # RPN generate proposal
145 |   PROPOSAL_NMS_THRESH: 0.7
146 |   PROPOSAL_PRE_NMS_TOP_N: 20000
147 |   PROPOSAL_POST_NMS_TOP_N: 2000
148 |   PROPOSAL_MIN_SIZE: 0
149 |   # RCNN nms
150 |   NMS: 0.3
151 |   test_epoch: 7
152 | 
153 | 


--------------------------------------------------------------------------------
/experiments/rfcn/cfgs/resnet_v1_101_voc0712_rfcn_end2end_ohem.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rfcn/voc"
  4 | symbol: resnet_v1_101_rfcn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 8
 47 |   - 16
 48 |   - 32
 49 |   NUM_ANCHORS: 9
 50 | dataset:
 51 |   NUM_CLASSES: 21
 52 |   dataset: PascalVOC
 53 |   dataset_path: "./data/VOCdevkit"
 54 |   image_set: 2007_trainval+2012_trainval
 55 |   root_path: "./data"
 56 |   test_image_set: 2007_test
 57 |   proposal: rpn
 58 | TRAIN:
 59 |   lr: 0.0005
 60 |   lr_step: '4.83'
 61 |   warmup: true
 62 |   warmup_lr: 0.00005
 63 |   # typically we will use 4000 warmup step for single GPU on VOC
 64 |   warmup_step: 1000
 65 |   begin_epoch: 0
 66 |   end_epoch: 7
 67 |   model_prefix: 'rfcn_voc'
 68 |   # whether resume training
 69 |   RESUME: false
 70 |   # whether flip image
 71 |   FLIP: true
 72 |   # whether shuffle image
 73 |   SHUFFLE: true
 74 |   # whether use OHEM
 75 |   ENABLE_OHEM: true
 76 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 77 |   BATCH_IMAGES: 1
 78 |   # e2e changes behavior of anchor loader and metric
 79 |   END2END: true
 80 |   # group images with similar aspect ratio
 81 |   ASPECT_GROUPING: true
 82 |   # R-CNN
 83 |   # rcnn rois batch size
 84 |   BATCH_ROIS: -1
 85 |   BATCH_ROIS_OHEM: 128
 86 |   # rcnn rois sampling params
 87 |   FG_FRACTION: 0.25
 88 |   FG_THRESH: 0.5
 89 |   BG_THRESH_HI: 0.5
 90 |   BG_THRESH_LO: 0.0
 91 |   # rcnn bounding box regression params
 92 |   BBOX_REGRESSION_THRESH: 0.5
 93 |   BBOX_WEIGHTS:
 94 |   - 1.0
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 | 
 99 |   # RPN anchor loader
100 |   # rpn anchors batch size
101 |   RPN_BATCH_SIZE: 256
102 |   # rpn anchors sampling params
103 |   RPN_FG_FRACTION: 0.5
104 |   RPN_POSITIVE_OVERLAP: 0.7
105 |   RPN_NEGATIVE_OVERLAP: 0.3
106 |   RPN_CLOBBER_POSITIVES: false
107 |   # rpn bounding box regression params
108 |   RPN_BBOX_WEIGHTS:
109 |   - 1.0
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   RPN_POSITIVE_WEIGHT: -1.0
114 |   # used for end2end training
115 |   # RPN proposal
116 |   CXX_PROPOSAL: false
117 |   RPN_NMS_THRESH: 0.7
118 |   RPN_PRE_NMS_TOP_N: 6000
119 |   RPN_POST_NMS_TOP_N: 300
120 |   RPN_MIN_SIZE: 0
121 |   # approximate bounding box regression
122 |   BBOX_NORMALIZATION_PRECOMPUTED: true
123 |   BBOX_MEANS:
124 |   - 0.0
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   BBOX_STDS:
129 |   - 0.1
130 |   - 0.1
131 |   - 0.2
132 |   - 0.2
133 | TEST:
134 |   # use rpn to generate proposal
135 |   HAS_RPN: true
136 |   # size of images for each device
137 |   BATCH_IMAGES: 1
138 |   # RPN proposal
139 |   CXX_PROPOSAL: false
140 |   RPN_NMS_THRESH: 0.7
141 |   RPN_PRE_NMS_TOP_N: 6000
142 |   RPN_POST_NMS_TOP_N: 300
143 |   RPN_MIN_SIZE: 0
144 |   # RPN generate proposal
145 |   PROPOSAL_NMS_THRESH: 0.7
146 |   PROPOSAL_PRE_NMS_TOP_N: 20000
147 |   PROPOSAL_POST_NMS_TOP_N: 2000
148 |   PROPOSAL_MIN_SIZE: 0
149 |   # RCNN nms
150 |   NMS: 0.3
151 |   test_epoch: 7
152 | 
153 | 


--------------------------------------------------------------------------------
/experiments/rfcn/cfgs/rfcn_coco_demo.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet"
  3 | output_path: "./output/rfcn"
  4 | symbol: resnet_v1_101_rfcn
  5 | gpus: '0'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 20
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 | dataset:
 52 |   NUM_CLASSES: 81
 53 |   dataset: coco
 54 |   dataset_path: "./data/coco"
 55 |   image_set: train2014+val2014
 56 |   root_path: "./data"
 57 |   test_image_set: test-dev2015
 58 |   proposal: rpn
 59 | TRAIN:
 60 |   lr: 0.0005
 61 |   lr_step: '5.333'
 62 |   warmup: false
 63 |   warmup_lr: 0.00005
 64 |   # typically we will use 4000 warmup step for single GPU
 65 |   warmup_step: 1000
 66 |   begin_epoch: 5
 67 |   end_epoch: 8
 68 |   model_prefix: 'e2e'
 69 |   # whether resume training
 70 |   RESUME: true
 71 |   # whether flip image
 72 |   FLIP: true
 73 |   # whether shuffle image
 74 |   SHUFFLE: true
 75 |   # whether use OHEM
 76 |   ENABLE_OHEM: true
 77 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 78 |   BATCH_IMAGES: 1
 79 |   # e2e changes behavior of anchor loader and metric
 80 |   END2END: true
 81 |   # group images with similar aspect ratio
 82 |   ASPECT_GROUPING: true
 83 |   # R-CNN
 84 |   # rcnn rois batch size
 85 |   BATCH_ROIS: -1
 86 |   BATCH_ROIS_OHEM: 128
 87 |   # rcnn rois sampling params
 88 |   FG_FRACTION: 0.25
 89 |   FG_THRESH: 0.5
 90 |   BG_THRESH_HI: 0.5
 91 |   BG_THRESH_LO: 0.0
 92 |   # rcnn bounding box regression params
 93 |   BBOX_REGRESSION_THRESH: 0.5
 94 |   BBOX_WEIGHTS:
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 |   - 1.0
 99 | 
100 |   # RPN anchor loader
101 |   # rpn anchors batch size
102 |   RPN_BATCH_SIZE: 256
103 |   # rpn anchors sampling params
104 |   RPN_FG_FRACTION: 0.5
105 |   RPN_POSITIVE_OVERLAP: 0.7
106 |   RPN_NEGATIVE_OVERLAP: 0.3
107 |   RPN_CLOBBER_POSITIVES: false
108 |   # rpn bounding box regression params
109 |   RPN_BBOX_WEIGHTS:
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   - 1.0
114 |   RPN_POSITIVE_WEIGHT: -1.0
115 |   # used for end2end training
116 |   # RPN proposal
117 |   CXX_PROPOSAL: false
118 |   RPN_NMS_THRESH: 0.7
119 |   RPN_PRE_NMS_TOP_N: 6000
120 |   RPN_POST_NMS_TOP_N: 300
121 |   RPN_MIN_SIZE: 0
122 |   # approximate bounding box regression
123 |   BBOX_NORMALIZATION_PRECOMPUTED: true
124 |   BBOX_MEANS:
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   - 0.0
129 |   BBOX_STDS:
130 |   - 0.1
131 |   - 0.1
132 |   - 0.2
133 |   - 0.2
134 | TEST:
135 |   # use rpn to generate proposal
136 |   HAS_RPN: true
137 |   # size of images for each device
138 |   BATCH_IMAGES: 1
139 |   # RPN proposal
140 |   CXX_PROPOSAL: false
141 |   RPN_NMS_THRESH: 0.7
142 |   RPN_PRE_NMS_TOP_N: 6000
143 |   RPN_POST_NMS_TOP_N: 300
144 |   RPN_MIN_SIZE: 0
145 |   # RPN generate proposal
146 |   PROPOSAL_NMS_THRESH: 0.7
147 |   PROPOSAL_PRE_NMS_TOP_N: 20000
148 |   PROPOSAL_POST_NMS_TOP_N: 2000
149 |   PROPOSAL_MIN_SIZE: 0
150 |   # RCNN nms
151 |   NMS: 0.3
152 |   test_epoch: 8
153 |   max_per_image: 100
154 | 
155 | 


--------------------------------------------------------------------------------
/experiments/rfcn/rfcn_alternate_train_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | import os
 9 | import sys
10 | os.environ['PYTHONUNBUFFERED'] = '1'
11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
13 | this_dir = os.path.dirname(__file__)
14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'rfcn'))
15 | 
16 | import train_alternate
17 | import test
18 | 
19 | if __name__ == "__main__":
20 |     train_alternate.main()
21 |     test.main()
22 | 
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/experiments/rfcn/rfcn_end2end_train_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | import os
 8 | import sys
 9 | os.environ['PYTHONUNBUFFERED'] = '1'
10 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
11 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
12 | this_dir = os.path.dirname(__file__)
13 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'rfcn'))
14 | 
15 | import train_end2end
16 | import test
17 | 
18 | if __name__ == "__main__":
19 |     train_end2end.main()
20 |     test.main()
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/experiments/rfcn/rfcn_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | import os
 9 | import sys
10 | os.environ['PYTHONUNBUFFERED'] = '1'
11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
13 | this_dir = os.path.dirname(__file__)
14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'rfcn'))
15 | 
16 | import test
17 | 
18 | if __name__ == "__main__":
19 |     test.main()
20 | 


--------------------------------------------------------------------------------
/experiments/rfcn/rfcn_train_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | import os
 9 | import sys
10 | os.environ['PYTHONUNBUFFERED'] = '1'
11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
13 | this_dir = os.path.dirname(__file__)
14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'rfcn'))
15 | 
16 | import train_rfcn
17 | import test
18 | 
19 | if __name__ == "__main__":
20 |     train_rfcn.main()
21 |     test.main()
22 | 
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p ./data
 4 | mkdir -p ./output
 5 | mkdir -p ./external/mxnet
 6 | mkdir -p ./model/pretrained_model
 7 | 
 8 | cd lib/bbox
 9 | python setup_linux.py build_ext --inplace
10 | cd ../dataset/pycocotools
11 | python setup_linux.py build_ext --inplace
12 | cd ../../nms
13 | python setup_linux.py build_ext --inplace
14 | cd ../..
15 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cd nms/; python setup.py build_ext --inplace; rm -rf build; cd ../../
3 | 	cd bbox/; python setup.py build_ext --inplace; rm -rf build; cd ../../
4 | 	cd dataset/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../
5 | clean:
6 | 	cd nms/; rm *.so *.c *.cpp; cd ../../
7 | 	cd bbox/; rm *.so *.c *.cpp; cd ../../
8 | 	cd dataset/pycocotools/; rm *.so; cd ../../
9 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/__init__.py


--------------------------------------------------------------------------------
/lib/bbox/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp


--------------------------------------------------------------------------------
/lib/bbox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/bbox/__init__.py


--------------------------------------------------------------------------------
/lib/bbox/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Sergey Karayev
 7 | # Modified by Yuwen Xiong, from from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 8 | # --------------------------------------------------------
 9 | 
10 | cimport cython
11 | import numpy as np
12 | cimport numpy as np
13 | 
14 | DTYPE = np.float
15 | ctypedef np.float_t DTYPE_t
16 | 
17 | def bbox_overlaps_cython(
18 |         np.ndarray[DTYPE_t, ndim=2] boxes,
19 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
20 |     """
21 |     Parameters
22 |     ----------
23 |     boxes: (N, 4) ndarray of float
24 |     query_boxes: (K, 4) ndarray of float
25 |     Returns
26 |     -------
27 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
28 |     """
29 |     cdef unsigned int N = boxes.shape[0]
30 |     cdef unsigned int K = query_boxes.shape[0]
31 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
32 |     cdef DTYPE_t iw, ih, box_area
33 |     cdef DTYPE_t ua
34 |     cdef unsigned int k, n
35 |     for k in range(K):
36 |         box_area = (
37 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
38 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
39 |         )
40 |         for n in range(N):
41 |             iw = (
42 |                 min(boxes[n, 2], query_boxes[k, 2]) -
43 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
44 |             )
45 |             if iw > 0:
46 |                 ih = (
47 |                     min(boxes[n, 3], query_boxes[k, 3]) -
48 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
49 |                 )
50 |                 if ih > 0:
51 |                     ua = float(
52 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
53 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
54 |                         box_area - iw * ih
55 |                     )
56 |                     overlaps[n, k] = iw * ih / ua
57 |     return overlaps
58 | 


--------------------------------------------------------------------------------
/lib/bbox/bbox_regression.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Deformable Convolutional Networks
  3 | # Copyright (c) 2016 by Contributors
  4 | # Copyright (c) 2017 Microsoft
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Modified by Yuwen Xiong, from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  7 | # --------------------------------------------------------
  8 | 
  9 | 
 10 | """
 11 | This file has functions about generating bounding box regression targets
 12 | """
 13 | 
 14 | import numpy as np
 15 | 
 16 | from bbox_transform import bbox_overlaps, bbox_transform
 17 | 
 18 | 
 19 | def compute_bbox_regression_targets(rois, overlaps, labels, cfg):
 20 |     """
 21 |     given rois, overlaps, gt labels, compute bounding box regression targets
 22 |     :param rois: roidb[i]['boxes'] k * 4
 23 |     :param overlaps: roidb[i]['max_overlaps'] k * 1
 24 |     :param labels: roidb[i]['max_classes'] k * 1
 25 |     :return: targets[i][class, dx, dy, dw, dh] k * 5
 26 |     """
 27 |     # Ensure ROIs are floats
 28 |     rois = rois.astype(np.float, copy=False)
 29 | 
 30 |     # Sanity check
 31 |     if len(rois) != len(overlaps):
 32 |         print 'bbox regression: this should not happen'
 33 | 
 34 |     # Indices of ground-truth ROIs
 35 |     gt_inds = np.where(overlaps == 1)[0]
 36 |     if len(gt_inds) == 0:
 37 |         print 'something wrong : zero ground truth rois'
 38 |     # Indices of examples for which we try to make predictions
 39 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_REGRESSION_THRESH)[0]
 40 | 
 41 |     # Get IoU overlap between each ex ROI and gt ROI
 42 |     ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
 43 | 
 44 |     # Find which gt ROI each ex ROI has max overlap with:
 45 |     # this will be the ex ROI's gt target
 46 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
 47 |     gt_rois = rois[gt_inds[gt_assignment], :]
 48 |     ex_rois = rois[ex_inds, :]
 49 | 
 50 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
 51 |     targets[ex_inds, 0] = labels[ex_inds]
 52 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
 53 |     return targets
 54 | 
 55 | 
 56 | def add_bbox_regression_targets(roidb, cfg):
 57 |     """
 58 |     given roidb, add ['bbox_targets'] and normalize bounding box regression targets
 59 |     :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
 60 |     :return: means, std variances of targets
 61 |     """
 62 |     print 'add bounding box regression targets'
 63 |     assert len(roidb) > 0
 64 |     assert 'max_classes' in roidb[0]
 65 | 
 66 |     num_images = len(roidb)
 67 |     num_classes = 2 if cfg.CLASS_AGNOSTIC else roidb[0]['gt_overlaps'].shape[1]
 68 | 
 69 |     for im_i in range(num_images):
 70 |         rois = roidb[im_i]['boxes']
 71 |         max_overlaps = roidb[im_i]['max_overlaps']
 72 |         max_classes = roidb[im_i]['max_classes']
 73 |         roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes, cfg)
 74 | 
 75 |     if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
 76 |         # use fixed / precomputed means and stds instead of empirical values
 77 |         means = np.tile(np.array(cfg.TRAIN.BBOX_MEANS), (num_classes, 1))
 78 |         stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS), (num_classes, 1))
 79 |     else:
 80 |         # compute mean, std values
 81 |         class_counts = np.zeros((num_classes, 1)) + 1e-14
 82 |         sums = np.zeros((num_classes, 4))
 83 |         squared_sums = np.zeros((num_classes, 4))
 84 |         for im_i in range(num_images):
 85 |             targets = roidb[im_i]['bbox_targets']
 86 |             for cls in range(1, num_classes):
 87 |                 cls_indexes = np.where(targets[:, 0] > 0)[0] if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0]
 88 |                 if cls_indexes.size > 0:
 89 |                     class_counts[cls] += cls_indexes.size
 90 |                     sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)
 91 |                     squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0)
 92 | 
 93 |         means = sums / class_counts
 94 |         # var(x) = E(x^2) - E(x)^2
 95 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 96 | 
 97 |     print 'bbox target means:'
 98 |     print means
 99 |     print means[1:, :].mean(axis=0)  # ignore bg class
100 |     print 'bbox target stdevs:'
101 |     print stds
102 |     print stds[1:, :].mean(axis=0)  # ignore bg class
103 | 
104 | 
105 |     # normalized targets
106 |     for im_i in range(num_images):
107 |         targets = roidb[im_i]['bbox_targets']
108 |         for cls in range(1, num_classes):
109 |             cls_indexes = np.where(targets[:, 0] > 0) if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0]
110 |             roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :]
111 |             roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :]
112 | 
113 |     return means.ravel(), stds.ravel()
114 | 
115 | 
116 | def expand_bbox_regression_targets(bbox_targets_data, num_classes, cfg):
117 |     """
118 |     expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets
119 |     :param bbox_targets_data: [k * 5]
120 |     :param num_classes: number of classes
121 |     :return: bbox target processed [k * 4 num_classes]
122 |     bbox_weights ! only foreground boxes have bbox regression computation!
123 |     """
124 |     classes = bbox_targets_data[:, 0]
125 |     if cfg.CLASS_AGNOSTIC:
126 |         num_classes = 2
127 |     bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32)
128 |     bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
129 |     indexes = np.where(classes > 0)[0]
130 |     for index in indexes:
131 |         cls = classes[index]
132 |         start = int(4 * 1 if cls > 0 else 0) if cfg.CLASS_AGNOSTIC else int(4 * cls)
133 |         end = start + 4
134 |         bbox_targets[index, start:end] = bbox_targets_data[index, 1:]
135 |         bbox_weights[index, start:end] = cfg.TRAIN.BBOX_WEIGHTS
136 |     return bbox_targets, bbox_weights
137 | 
138 | 


--------------------------------------------------------------------------------
/lib/bbox/bbox_transform.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from bbox import bbox_overlaps_cython
  3 | 
  4 | 
  5 | def bbox_overlaps(boxes, query_boxes):
  6 |     return bbox_overlaps_cython(boxes, query_boxes)
  7 | 
  8 | 
  9 | def bbox_overlaps_py(boxes, query_boxes):
 10 |     """
 11 |     determine overlaps between boxes and query_boxes
 12 |     :param boxes: n * 4 bounding boxes
 13 |     :param query_boxes: k * 4 bounding boxes
 14 |     :return: overlaps: n * k overlaps
 15 |     """
 16 |     n_ = boxes.shape[0]
 17 |     k_ = query_boxes.shape[0]
 18 |     overlaps = np.zeros((n_, k_), dtype=np.float)
 19 |     for k in range(k_):
 20 |         query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 21 |         for n in range(n_):
 22 |             iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1
 23 |             if iw > 0:
 24 |                 ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1
 25 |                 if ih > 0:
 26 |                     box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1)
 27 |                     all_area = float(box_area + query_box_area - iw * ih)
 28 |                     overlaps[n, k] = iw * ih / all_area
 29 |     return overlaps
 30 | 
 31 | 
 32 | def clip_boxes(boxes, im_shape):
 33 |     """
 34 |     Clip boxes to image boundaries.
 35 |     :param boxes: [N, 4* num_classes]
 36 |     :param im_shape: tuple of 2
 37 |     :return: [N, 4* num_classes]
 38 |     """
 39 |     # x1 >= 0
 40 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
 41 |     # y1 >= 0
 42 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
 43 |     # x2 < im_shape[1]
 44 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
 45 |     # y2 < im_shape[0]
 46 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
 47 |     return boxes
 48 | 
 49 | def filter_boxes(boxes, min_size):
 50 |     """
 51 |     filter small boxes.
 52 |     :param boxes: [N, 4* num_classes]
 53 |     :param min_size:
 54 |     :return: keep:
 55 |     """
 56 |     ws = boxes[:, 2] - boxes[:, 0] + 1
 57 |     hs = boxes[:, 3] - boxes[:, 1] + 1
 58 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
 59 |     return keep
 60 | 
 61 | def nonlinear_transform(ex_rois, gt_rois):
 62 |     """
 63 |     compute bounding box regression targets from ex_rois to gt_rois
 64 |     :param ex_rois: [N, 4]
 65 |     :param gt_rois: [N, 4]
 66 |     :return: [N, 4]
 67 |     """
 68 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
 69 | 
 70 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 71 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 72 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
 73 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
 74 | 
 75 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 76 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 77 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
 78 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)
 79 | 
 80 |     targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14)
 81 |     targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14)
 82 |     targets_dw = np.log(gt_widths / ex_widths)
 83 |     targets_dh = np.log(gt_heights / ex_heights)
 84 | 
 85 |     targets = np.vstack(
 86 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
 87 |     return targets
 88 | 
 89 | 
 90 | def nonlinear_pred(boxes, box_deltas):
 91 |     """
 92 |     Transform the set of class-agnostic boxes into class-specific boxes
 93 |     by applying the predicted offsets (box_deltas)
 94 |     :param boxes: !important [N 4]
 95 |     :param box_deltas: [N, 4 * num_classes]
 96 |     :return: [N 4 * num_classes]
 97 |     """
 98 |     if boxes.shape[0] == 0:
 99 |         return np.zeros((0, box_deltas.shape[1]))
100 | 
101 |     boxes = boxes.astype(np.float, copy=False)
102 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
103 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
104 |     ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
105 |     ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
106 | 
107 |     dx = box_deltas[:, 0::4]
108 |     dy = box_deltas[:, 1::4]
109 |     dw = box_deltas[:, 2::4]
110 |     dh = box_deltas[:, 3::4]
111 | 
112 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
113 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
114 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
115 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
116 | 
117 |     pred_boxes = np.zeros(box_deltas.shape)
118 |     # x1
119 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)
120 |     # y1
121 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)
122 |     # x2
123 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)
124 |     # y2
125 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
126 | 
127 |     return pred_boxes
128 | 
129 | 
130 | def iou_transform(ex_rois, gt_rois):
131 |     """ return bbox targets, IoU loss uses gt_rois as gt """
132 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
133 |     return gt_rois
134 | 
135 | 
136 | def iou_pred(boxes, box_deltas):
137 |     """
138 |     Transform the set of class-agnostic boxes into class-specific boxes
139 |     by applying the predicted offsets (box_deltas)
140 |     :param boxes: !important [N 4]
141 |     :param box_deltas: [N, 4 * num_classes]
142 |     :return: [N 4 * num_classes]
143 |     """
144 |     if boxes.shape[0] == 0:
145 |         return np.zeros((0, box_deltas.shape[1]))
146 | 
147 |     boxes = boxes.astype(np.float, copy=False)
148 |     x1 = boxes[:, 0]
149 |     y1 = boxes[:, 1]
150 |     x2 = boxes[:, 2]
151 |     y2 = boxes[:, 3]
152 | 
153 |     dx1 = box_deltas[:, 0::4]
154 |     dy1 = box_deltas[:, 1::4]
155 |     dx2 = box_deltas[:, 2::4]
156 |     dy2 = box_deltas[:, 3::4]
157 | 
158 |     pred_boxes = np.zeros(box_deltas.shape)
159 |     # x1
160 |     pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis]
161 |     # y1
162 |     pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis]
163 |     # x2
164 |     pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis]
165 |     # y2
166 |     pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis]
167 | 
168 |     return pred_boxes
169 | 
170 | 
171 | # define bbox_transform and bbox_pred
172 | bbox_transform = nonlinear_transform
173 | bbox_pred = nonlinear_pred
174 | 


--------------------------------------------------------------------------------
/lib/bbox/setup_linux.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 7 | # --------------------------------------------------------
 8 | 
 9 | 
10 | import os
11 | from os.path import join as pjoin
12 | from setuptools import setup
13 | from distutils.extension import Extension
14 | from Cython.Distutils import build_ext
15 | import numpy as np
16 | 
17 | # Obtain the numpy include directory.  This logic works across numpy versions.
18 | try:
19 |     numpy_include = np.get_include()
20 | except AttributeError:
21 |     numpy_include = np.get_numpy_include()
22 | 
23 | 
24 | def customize_compiler_for_nvcc(self):
25 |     """inject deep into distutils to customize how the dispatch
26 |     to gcc/nvcc works.
27 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
28 |     injected in, and still have the right customizations (i.e.
29 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
30 |     the OO route, I have this. Note, it's kindof like a wierd functional
31 |     subclassing going on."""
32 | 
33 |     # tell the compiler it can processes .cu
34 |     self.src_extensions.append('.cu')
35 | 
36 |     # save references to the default compiler_so and _comple methods
37 |     default_compiler_so = self.compiler_so
38 |     super = self._compile
39 | 
40 |     # now redefine the _compile method. This gets executed for each
41 |     # object but distutils doesn't have the ability to change compilers
42 |     # based on source extension: we add it.
43 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
44 |         if os.path.splitext(src)[1] == '.cu':
45 |             # use the cuda for .cu files
46 |             self.set_executable('compiler_so', CUDA['nvcc'])
47 |             # use only a subset of the extra_postargs, which are 1-1 translated
48 |             # from the extra_compile_args in the Extension class
49 |             postargs = extra_postargs['nvcc']
50 |         else:
51 |             postargs = extra_postargs['gcc']
52 | 
53 |         super(obj, src, ext, cc_args, postargs, pp_opts)
54 |         # reset the default compiler_so, which we might have changed for cuda
55 |         self.compiler_so = default_compiler_so
56 | 
57 |     # inject our redefined _compile method into the class
58 |     self._compile = _compile
59 | 
60 | 
61 | # run the customize_compiler
62 | class custom_build_ext(build_ext):
63 |     def build_extensions(self):
64 |         customize_compiler_for_nvcc(self.compiler)
65 |         build_ext.build_extensions(self)
66 | 
67 | 
68 | ext_modules = [
69 |     Extension(
70 |         "bbox",
71 |         ["bbox.pyx"],
72 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
73 |         include_dirs=[numpy_include]
74 |     ),
75 | ]
76 | 
77 | setup(
78 |     name='bbox_cython',
79 |     ext_modules=ext_modules,
80 |     # inject our custom trigger
81 |     cmdclass={'build_ext': custom_build_ext},
82 | )
83 | 


--------------------------------------------------------------------------------
/lib/bbox/setup_windows.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 7 | # --------------------------------------------------------
 8 | 
 9 | import numpy as np
10 | import os
11 | from os.path import join as pjoin
12 | #from distutils.core import setup
13 | from setuptools import setup
14 | from distutils.extension import Extension
15 | from Cython.Distutils import build_ext
16 | import subprocess
17 | 
18 | #change for windows, by MrX
19 | nvcc_bin = 'nvcc.exe'
20 | lib_dir = 'lib/x64'
21 | 
22 | import distutils.msvc9compiler
23 | distutils.msvc9compiler.VERSION = 14.0
24 | 
25 | # Obtain the numpy include directory.  This logic works across numpy versions.
26 | try:
27 |     numpy_include = np.get_include()
28 | except AttributeError:
29 |     numpy_include = np.get_numpy_include()
30 | 
31 | ext_modules = [
32 |     # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts
33 |     Extension(
34 |         "bbox",
35 |         sources=["bbox.pyx"],
36 |         extra_compile_args={},
37 |         include_dirs = [numpy_include]
38 |     ),
39 | ]
40 | 
41 | setup(
42 |     name='fast_rcnn',
43 |     ext_modules=ext_modules,
44 |     # inject our custom trigger
45 |     cmdclass={'build_ext': build_ext},
46 | )
47 | 


--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from imdb import IMDB
2 | from pascal_voc import PascalVOC
3 | from cityscape import CityScape
4 | from cityscape_video import CityScape_Video
5 | from coco import coco
6 | 


--------------------------------------------------------------------------------
/lib/dataset/ds_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def unique_boxes(boxes, scale=1.0):
 5 |     """ return indices of unique boxes """
 6 |     v = np.array([1, 1e3, 1e6, 1e9])
 7 |     hashes = np.round(boxes * scale).dot(v)
 8 |     _, index = np.unique(hashes, return_index=True)
 9 |     return np.sort(index)
10 | 
11 | 
12 | def filter_small_boxes(boxes, min_size):
13 |     w = boxes[:, 2] - boxes[:, 0]
14 |     h = boxes[:, 3] - boxes[:, 1]
15 |     keep = np.where((w >= min_size) & (h > min_size))[0]
16 |     return keep


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/.gitignore:
--------------------------------------------------------------------------------
1 | _mask.c
2 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | import _mask as _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | #  Rs     = encode( masks )
41 | #  masks  = decode( Rs )
42 | #  R      = merge( Rs, intersect=false )
43 | #  o      = iou( dt, gt, iscrowd )
44 | #  a      = area( Rs )
45 | #  bbs    = toBbox( Rs )
46 | #  Rs     = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | #  Rs      - [dict] Run-length encoding of binary masks
50 | #  R       - dict Run-length encoding of binary mask
51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | #  dt,gt   - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox.      version 2.0
72 | # Data, paper, and tutorials available at:  http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 | 
76 | encode      = _mask.encode
77 | #decode      = _mask.decode
78 | def decode(rleObjs):
79 |     if type(rleObjs) == list:
80 |         return _mask.decode(rleObjs)
81 |     else:
82 |         return _mask.decode([rleObjs])[:,:,0]
83 | iou         = _mask.iou
84 | merge       = _mask.merge
85 | area        = _mask.area
86 | toBbox      = _mask.toBbox
87 | frPyObjects = _mask.frPyObjects
88 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/setup_linux.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from Cython.Build import cythonize
 3 | from distutils.extension import Extension
 4 | import numpy as np
 5 | 
 6 | # To compile and install locally run "python setup.py build_ext --inplace"
 7 | # To install library to Python site-packages run "python setup.py build_ext install"
 8 | 
 9 | ext_modules = [
10 |     Extension(
11 |         '_mask',
12 |         sources=['maskApi.c', '_mask.pyx'],
13 |         include_dirs=[np.get_include()],
14 |         extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
15 |     )
16 | ]
17 | 
18 | setup(name='pycocotools',
19 |       ext_modules=cythonize(ext_modules)
20 | )
21 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/setup_windows.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from Cython.Build import cythonize
 3 | from distutils.extension import Extension
 4 | import numpy as np
 5 | 
 6 | import distutils.msvc9compiler
 7 | distutils.msvc9compiler.VERSION = 14.0
 8 | 
 9 | 
10 | # To compile and install locally run "python setup.py build_ext --inplace"
11 | # To install library to Python site-packages run "python setup.py build_ext install"
12 | 
13 | ext_modules = [
14 |     Extension(
15 |         '_mask',
16 |         sources=['maskApi.c', '_mask.pyx'],
17 |         include_dirs=[np.get_include()],
18 |         extra_compile_args=[],
19 |     )
20 | ]
21 | 
22 | setup(name='pycocotools',
23 |       ext_modules=cythonize(ext_modules)
24 | )
25 | 


--------------------------------------------------------------------------------
/lib/logger/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/logger/__init__.py


--------------------------------------------------------------------------------
/lib/logger/logger.py:
--------------------------------------------------------------------------------
 1 | """ Logging values to various sinks """
 2 | 
 3 | class Logger(object):
 4 |     _fields = None
 5 | 
 6 |     @property
 7 |     def fields(self):
 8 |         assert self._fields is not None, "self.fields is not set!"
 9 |         return self._fields
10 | 
11 |     @fields.setter
12 |     def fields(self, value):
13 |         self._fields
14 | 
15 |     def __init__(self, fields=None):
16 |         """ Automatically logs the variables in 'fields' """
17 |         self.fields = fields
18 | 
19 |     def log(self, *args, **kwargs):
20 |         pass
21 | 
22 |     def log_state(self, state_dict):
23 |         pass


--------------------------------------------------------------------------------
/lib/logger/readme.md:
--------------------------------------------------------------------------------
1 | This folder was copied from https://github.com/pytorch/tnt/blob/master/torchnet/logger


--------------------------------------------------------------------------------
/lib/mask/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/mask/__init__.py


--------------------------------------------------------------------------------
/lib/mask/mask_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Haozhi Qi, Yi Li, Guodong Zhang
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | def intersect_box_mask(ex_box, gt_box, gt_mask):
12 |     """
13 |     This function calculate the intersection part of a external box
14 |     and gt_box, mask it according to gt_mask
15 |     Args:
16 |         ex_box: external ROIS
17 |         gt_box: ground truth boxes
18 |         gt_mask: ground truth masks, not been resized yet
19 |     Returns:
20 |         regression_target: logical numpy array
21 |     """
22 |     x1 = max(ex_box[0], gt_box[0])
23 |     y1 = max(ex_box[1], gt_box[1])
24 |     x2 = min(ex_box[2], gt_box[2])
25 |     y2 = min(ex_box[3], gt_box[3])
26 |     if x1 > x2 or y1 > y2:
27 |         return np.zeros((21, 21), dtype=bool)
28 |     w = x2 - x1 + 1
29 |     h = y2 - y1 + 1
30 |     ex_starty = y1 - ex_box[1]
31 |     ex_startx = x1 - ex_box[0]
32 | 
33 |     inter_maskb = gt_mask[y1:y2+1 , x1:x2+1]
34 |     regression_target = np.zeros((ex_box[3] - ex_box[1] + 1, ex_box[2] - ex_box[0] + 1))
35 |     regression_target[ex_starty: ex_starty + h, ex_startx: ex_startx + w] = inter_maskb
36 | 
37 |     return regression_target
38 | 
39 | 
40 | def mask_overlap(box1, box2, mask1, mask2):
41 |     """
42 |     This function calculate region IOU when masks are
43 |     inside different boxes
44 |     Returns:
45 |         intersection over unions of this two masks
46 |     """
47 |     x1 = max(box1[0], box2[0])
48 |     y1 = max(box1[1], box2[1])
49 |     x2 = min(box1[2], box2[2])
50 |     y2 = min(box1[3], box2[3])
51 |     if x1 > x2 or y1 > y2:
52 |         return 0
53 |     w = x2 - x1 + 1
54 |     h = y2 - y1 + 1
55 |     # get masks in the intersection part
56 |     start_ya = y1 - box1[1]
57 |     start_xa = x1 - box1[0]
58 |     inter_maska = mask1[start_ya: start_ya + h, start_xa:start_xa + w]
59 | 
60 |     start_yb = y1 - box2[1]
61 |     start_xb = x1 - box2[0]
62 |     inter_maskb = mask2[start_yb: start_yb + h, start_xb:start_xb + w]
63 | 
64 |     assert inter_maska.shape == inter_maskb.shape
65 | 
66 |     inter = np.logical_and(inter_maskb, inter_maska).sum()
67 |     union = mask1.sum() + mask2.sum() - inter
68 |     if union < 1.0:
69 |         return 0
70 |     return float(inter) / float(union)
71 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i')
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int32_t, ndim=1] \
26 |         order = scores.argsort()[::-1].astype(np.int32)
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/lib/nms/nms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from cpu_nms import cpu_nms
 4 | from gpu_nms import gpu_nms
 5 | 
 6 | def py_nms_wrapper(thresh):
 7 |     def _nms(dets):
 8 |         return nms(dets, thresh)
 9 |     return _nms
10 | 
11 | 
12 | def cpu_nms_wrapper(thresh):
13 |     def _nms(dets):
14 |         return cpu_nms(dets, thresh)
15 |     return _nms
16 | 
17 | 
18 | def gpu_nms_wrapper(thresh, device_id):
19 |     def _nms(dets):
20 |         return gpu_nms(dets, thresh, device_id)
21 |     return _nms
22 | 
23 | 
24 | def nms(dets, thresh):
25 |     """
26 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
27 |     rule out overlap >= thresh
28 |     :param dets: [[x1, y1, x2, y2 score]]
29 |     :param thresh: retain overlap < thresh
30 |     :return: indexes to keep
31 |     """
32 |     if dets.shape[0] == 0:
33 |         return []
34 | 
35 |     x1 = dets[:, 0]
36 |     y1 = dets[:, 1]
37 |     x2 = dets[:, 2]
38 |     y2 = dets[:, 3]
39 |     scores = dets[:, 4]
40 | 
41 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
42 |     order = scores.argsort()[::-1]
43 | 
44 |     keep = []
45 |     while order.size > 0:
46 |         i = order[0]
47 |         keep.append(i)
48 |         xx1 = np.maximum(x1[i], x1[order[1:]])
49 |         yy1 = np.maximum(y1[i], y1[order[1:]])
50 |         xx2 = np.minimum(x2[i], x2[order[1:]])
51 |         yy2 = np.minimum(y2[i], y2[order[1:]])
52 | 
53 |         w = np.maximum(0.0, xx2 - xx1 + 1)
54 |         h = np.maximum(0.0, yy2 - yy1 + 1)
55 |         inter = w * h
56 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
57 | 
58 |         inds = np.where(ovr <= thresh)[0]
59 |         order = order[inds + 1]
60 | 
61 |     return keep
62 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Deformable Convolutional Networks
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License
  5 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/nms/setup_linux.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Deformable Convolutional Networks
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import numpy as np
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 |     and values giving the absolute path to each directory.
 31 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 32 |     is based on finding 'nvcc' in the PATH.
 33 |     """
 34 | 
 35 |     # first check if the CUDAHOME env variable is in use
 36 |     if 'CUDAHOME' in os.environ:
 37 |         home = os.environ['CUDAHOME']
 38 |         nvcc = pjoin(home, 'bin', 'nvcc')
 39 |     else:
 40 |         # otherwise, search the PATH for NVCC
 41 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 42 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 43 |         if nvcc is None:
 44 |             raise EnvironmentError('The nvcc binary could not be '
 45 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 46 |         home = os.path.dirname(os.path.dirname(nvcc))
 47 | 
 48 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 49 |                   'include': pjoin(home, 'include'),
 50 |                   'lib64': pjoin(home, 'lib64')}
 51 |     for k, v in cudaconfig.iteritems():
 52 |         if not os.path.exists(v):
 53 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 54 | 
 55 |     return cudaconfig
 56 | CUDA = locate_cuda()
 57 | 
 58 | 
 59 | # Obtain the numpy include directory.  This logic works across numpy versions.
 60 | try:
 61 |     numpy_include = np.get_include()
 62 | except AttributeError:
 63 |     numpy_include = np.get_numpy_include()
 64 | 
 65 | 
 66 | def customize_compiler_for_nvcc(self):
 67 |     """inject deep into distutils to customize how the dispatch
 68 |     to gcc/nvcc works.
 69 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 70 |     injected in, and still have the right customizations (i.e.
 71 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 72 |     the OO route, I have this. Note, it's kindof like a wierd functional
 73 |     subclassing going on."""
 74 | 
 75 |     # tell the compiler it can processes .cu
 76 |     self.src_extensions.append('.cu')
 77 | 
 78 |     # save references to the default compiler_so and _comple methods
 79 |     default_compiler_so = self.compiler_so
 80 |     super = self._compile
 81 | 
 82 |     # now redefine the _compile method. This gets executed for each
 83 |     # object but distutils doesn't have the ability to change compilers
 84 |     # based on source extension: we add it.
 85 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 86 |         if os.path.splitext(src)[1] == '.cu':
 87 |             # use the cuda for .cu files
 88 |             self.set_executable('compiler_so', CUDA['nvcc'])
 89 |             # use only a subset of the extra_postargs, which are 1-1 translated
 90 |             # from the extra_compile_args in the Extension class
 91 |             postargs = extra_postargs['nvcc']
 92 |         else:
 93 |             postargs = extra_postargs['gcc']
 94 | 
 95 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 96 |         # reset the default compiler_so, which we might have changed for cuda
 97 |         self.compiler_so = default_compiler_so
 98 | 
 99 |     # inject our redefined _compile method into the class
100 |     self._compile = _compile
101 | 
102 | 
103 | # run the customize_compiler
104 | class custom_build_ext(build_ext):
105 |     def build_extensions(self):
106 |         customize_compiler_for_nvcc(self.compiler)
107 |         build_ext.build_extensions(self)
108 | 
109 | 
110 | ext_modules = [
111 |     Extension(
112 |         "cpu_nms",
113 |         ["cpu_nms.pyx"],
114 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
115 |         include_dirs = [numpy_include]
116 |     ),
117 |     Extension('gpu_nms',
118 |         ['nms_kernel.cu', 'gpu_nms.pyx'],
119 |         library_dirs=[CUDA['lib64']],
120 |         libraries=['cudart'],
121 |         language='c++',
122 |         runtime_library_dirs=[CUDA['lib64']],
123 |         # this syntax is specific to this build system
124 |         # we're only going to use certain compiler args with nvcc and not with
125 |         # gcc the implementation of this trick is in customize_compiler() below
126 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
127 |                             'nvcc': ['-arch=sm_35',
128 |                                      '--ptxas-options=-v',
129 |                                      '-c',
130 |                                      '--compiler-options',
131 |                                      "'-fPIC'"]},
132 |         include_dirs = [numpy_include, CUDA['include']]
133 |     ),
134 | ]
135 | 
136 | setup(
137 |     name='nms',
138 |     ext_modules=ext_modules,
139 |     # inject our custom trigger
140 |     cmdclass={'build_ext': custom_build_ext},
141 | )
142 | 


--------------------------------------------------------------------------------
/lib/nms/setup_windows.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Deformable Convolutional Networks
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | import os
 10 | from os.path import join as pjoin
 11 | #from distutils.core import setup
 12 | from setuptools import setup
 13 | from distutils.extension import Extension
 14 | from Cython.Distutils import build_ext
 15 | import subprocess
 16 | 
 17 | #change for windows, by MrX
 18 | nvcc_bin = 'nvcc.exe'
 19 | lib_dir = 'lib/x64'
 20 | 
 21 | import distutils.msvc9compiler
 22 | distutils.msvc9compiler.VERSION = 14.0
 23 | 
 24 | 
 25 | def find_in_path(name, path):
 26 |     "Find a file in a search path"
 27 |     # Adapted fom
 28 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 29 |     for dir in path.split(os.pathsep):
 30 |         binpath = pjoin(dir, name)
 31 |         if os.path.exists(binpath):
 32 |             return os.path.abspath(binpath)
 33 |     return None
 34 | 
 35 | 
 36 | def locate_cuda():
 37 |     """Locate the CUDA environment on the system
 38 | 
 39 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 40 |     and values giving the absolute path to each directory.
 41 | 
 42 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 43 |     is based on finding 'nvcc' in the PATH.
 44 |     """
 45 | 
 46 |     # first check if the CUDAHOME env variable is in use
 47 |     if 'CUDA_PATH' in os.environ:
 48 |         home = os.environ['CUDA_PATH']
 49 |         print("home = %s\n" % home)
 50 |         nvcc = pjoin(home, 'bin', nvcc_bin)
 51 |     else:
 52 |         # otherwise, search the PATH for NVCC
 53 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 54 |         nvcc = find_in_path(nvcc_bin, os.environ['PATH'] + os.pathsep + default_path)
 55 |         if nvcc is None:
 56 |             raise EnvironmentError('The nvcc binary could not be '
 57 |                 'located in your $PATH. Either add it to your path, or set $CUDA_PATH')
 58 |         home = os.path.dirname(os.path.dirname(nvcc))
 59 |         print("home = %s, nvcc = %s\n" % (home, nvcc))
 60 | 
 61 | 
 62 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 63 |                   'include': pjoin(home, 'include'),
 64 |                   'lib64': pjoin(home, lib_dir)}
 65 |     for k, v in cudaconfig.iteritems():
 66 |         if not os.path.exists(v):
 67 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 68 | 
 69 |     return cudaconfig
 70 | CUDA = locate_cuda()
 71 | 
 72 | 
 73 | # Obtain the numpy include directory.  This logic works across numpy versions.
 74 | try:
 75 |     numpy_include = np.get_include()
 76 | except AttributeError:
 77 |     numpy_include = np.get_numpy_include()
 78 | 
 79 | 
 80 | def customize_compiler_for_nvcc(self):
 81 |     """inject deep into distutils to customize how the dispatch
 82 |     to gcc/nvcc works.
 83 | 
 84 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 85 |     injected in, and still have the right customizations (i.e.
 86 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 87 |     the OO route, I have this. Note, it's kindof like a wierd functional
 88 |     subclassing going on."""
 89 | 
 90 |     # tell the compiler it can processes .cu
 91 |     #self.src_extensions.append('.cu')
 92 | 
 93 | 	
 94 |     # save references to the default compiler_so and _comple methods
 95 |     #default_compiler_so = self.spawn 
 96 |     #default_compiler_so = self.rc
 97 |     super = self.compile
 98 | 
 99 |     # now redefine the _compile method. This gets executed for each
100 |     # object but distutils doesn't have the ability to change compilers
101 |     # based on source extension: we add it.
102 |     def compile(sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None):
103 |         postfix=os.path.splitext(sources[0])[1]
104 |         
105 |         if postfix == '.cu':
106 |             # use the cuda for .cu files
107 |             #self.set_executable('compiler_so', CUDA['nvcc'])
108 |             # use only a subset of the extra_postargs, which are 1-1 translated
109 |             # from the extra_compile_args in the Extension class
110 |             postargs = extra_postargs['nvcc']
111 |         else:
112 |             postargs = extra_postargs['gcc']
113 | 
114 | 
115 |         return super(sources, output_dir, macros, include_dirs, debug, extra_preargs, postargs, depends)
116 |         # reset the default compiler_so, which we might have changed for cuda
117 |         #self.rc = default_compiler_so
118 | 
119 |     # inject our redefined _compile method into the class
120 |     self.compile = compile
121 | 
122 | 
123 | # run the customize_compiler
124 | class custom_build_ext(build_ext):
125 |     def build_extensions(self):
126 |         customize_compiler_for_nvcc(self.compiler)
127 |         build_ext.build_extensions(self)
128 | 
129 | 
130 | ext_modules = [
131 |     # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts
132 |     Extension(
133 |         "cpu_nms",
134 |         sources=["cpu_nms.pyx"],
135 |         extra_compile_args={'gcc': []},
136 |         include_dirs = [numpy_include],
137 |     ),
138 | ]
139 | 
140 | setup(
141 |     name='fast_rcnn',
142 |     ext_modules=ext_modules,
143 |     # inject our custom trigger
144 |     cmdclass={'build_ext': custom_build_ext},
145 | )
146 | 


--------------------------------------------------------------------------------
/lib/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/rpn/__init__.py


--------------------------------------------------------------------------------
/lib/rpn/generate_anchor.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generate base anchors on index 0
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 9 |                      scales=2 ** np.arange(3, 6)):
10 |     """
11 |     Generate anchor (reference) windows by enumerating aspect ratios X
12 |     scales wrt a reference (0, 0, 15, 15) window.
13 |     """
14 | 
15 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
16 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
17 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
18 |                          for i in xrange(ratio_anchors.shape[0])])
19 |     return anchors
20 | 
21 | 
22 | def _whctrs(anchor):
23 |     """
24 |     Return width, height, x center, and y center for an anchor (window).
25 |     """
26 | 
27 |     w = anchor[2] - anchor[0] + 1
28 |     h = anchor[3] - anchor[1] + 1
29 |     x_ctr = anchor[0] + 0.5 * (w - 1)
30 |     y_ctr = anchor[1] + 0.5 * (h - 1)
31 |     return w, h, x_ctr, y_ctr
32 | 
33 | 
34 | def _mkanchors(ws, hs, x_ctr, y_ctr):
35 |     """
36 |     Given a vector of widths (ws) and heights (hs) around a center
37 |     (x_ctr, y_ctr), output a set of anchors (windows).
38 |     """
39 | 
40 |     ws = ws[:, np.newaxis]
41 |     hs = hs[:, np.newaxis]
42 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
43 |                          y_ctr - 0.5 * (hs - 1),
44 |                          x_ctr + 0.5 * (ws - 1),
45 |                          y_ctr + 0.5 * (hs - 1)))
46 |     return anchors
47 | 
48 | 
49 | def _ratio_enum(anchor, ratios):
50 |     """
51 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
52 |     """
53 | 
54 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
55 |     size = w * h
56 |     size_ratios = size / ratios
57 |     ws = np.round(np.sqrt(size_ratios))
58 |     hs = np.round(ws * ratios)
59 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
60 |     return anchors
61 | 
62 | 
63 | def _scale_enum(anchor, scales):
64 |     """
65 |     Enumerate a set of anchors for each scale wrt an anchor.
66 |     """
67 | 
68 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
69 |     ws = w * scales
70 |     hs = h * scales
71 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
72 |     return anchors
73 | 


--------------------------------------------------------------------------------
/lib/segmentation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/segmentation/__init__.py


--------------------------------------------------------------------------------
/lib/segmentation/segmentation.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Copyright (c) 2016 by Contributors
  3 | # Copyright (c) 2017 Microsoft
  4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Written by Zheng Zhang
  7 | # Written by Songyang Zhang 
  8 | # E-main: sy.zhangbuaa#gmail.com
  9 | # --------------------------------------------------------
 10 | 
 11 | """
 12 | Segmentation:
 13 | data =
 14 |     {'data': [num_images, c, h, w],
 15 |      'im_info': [num_images, 4] (optional)}
 16 | label =
 17 |     {'label': [batch_size, 1] <- [batch_size, c, h, w]}
 18 | """
 19 | 
 20 | import numpy as np
 21 | from utils.image import get_segmentation_image, tensor_vstack
 22 | from utils.image import get_segmentation_image_video
 23 | 
 24 | def get_segmentation_test_batch(segdb, config):
 25 |     """
 26 |     return a dict of train batch
 27 |     :param segdb: ['image', 'flipped']
 28 |     :param config: the config setting
 29 |     :return: data, label, im_info
 30 |     """
 31 |     imgs, seg_cls_gts, segdb = get_segmentation_image(segdb, config)
 32 |     im_array = imgs
 33 |     im_info = [np.array([segdb[i]['im_info']], dtype=np.float32) for i in xrange(len(segdb))]
 34 | 
 35 |     data = [{'data': im_array[i],
 36 |             'im_info': im_info[i]} for i in xrange(len(segdb))]
 37 |     label = [{'label':seg_cls_gts[i]} for i in xrange(len(segdb))]
 38 | 
 39 |     return data, label, im_info
 40 | 
 41 | def get_segmentation_train_batch(segdb, config):
 42 |     """
 43 |     return a dict of train batch
 44 |     :param segdb: ['image', 'flipped']
 45 |     :param config: the config setting
 46 |     :return: data, label, im_info
 47 |     """
 48 |     # assert len(segdb) == 1, 'Single batch only'
 49 |     assert len(segdb) == 1, 'Single batch only'
 50 | 
 51 |     imgs, seg_cls_gts, segdb = get_segmentation_image(segdb, config)
 52 |     im_array = imgs[0]
 53 |     seg_cls_gt = seg_cls_gts[0]
 54 | 
 55 |     im_info = np.array([segdb[0]['im_info']], dtype=np.float32)
 56 | 
 57 |     data = {'data': im_array,
 58 |             'im_info': im_info}
 59 |     label = {'label': seg_cls_gt}
 60 | 
 61 |     return data, label
 62 | 
 63 | def get_segmentation_test_batch_video(segdb, config):
 64 |     """
 65 |     return a dict of train batch
 66 |     :param segdb: ['image', 'flipped']
 67 |     :param config: the config setting
 68 |     :return: data, label, im_info
 69 |     """
 70 |     imgs, ref_imgs, eq_flags, seg_cls_gts, segdb = get_segmentation_image_video(segdb, config,is_train=False)
 71 |     im_array = imgs
 72 |     ref_im_array = ref_imgs
 73 |     eq_flag_array = eq_flags
 74 | 
 75 |     im_info = [np.array([segdb[i]['im_info']], dtype=np.float32) for i in xrange(len(segdb))]
 76 |     eq_flag_array = [np.array([eq_flags[i],], dtype=np.float32) for i in xrange(len(segdb))]
 77 | 
 78 |     data = [{'data':im_array[i],
 79 |              'data_ref': ref_im_array[i],
 80 |              'eq_flag': eq_flag_array[i],
 81 |              'im_info': im_info[i]} for i in xrange(len(segdb))]
 82 |     label = [{'label': seg_cls_gts[i]} for i in xrange(len(segdb))]
 83 | 
 84 |     return data, label, im_info
 85 | 
 86 | def get_segmentation_train_batch_video(segdb, config):
 87 |     """
 88 |     return a dict of train batch
 89 |     :param segdb: ['image', 'flipped']
 90 |     :param config: the config setting
 91 |     :return: data, label, im_info
 92 |     """
 93 |     # assert len(segdb) == 1, 'Single batch only'
 94 |     assert len(segdb) == 1, 'Single batch only'
 95 |     imgs, ref_imgs, eq_flags,seg_cls_gts, segdb = get_segmentation_image_video(segdb, config)
 96 |     im_array = imgs[0]
 97 |     ref_im_array = ref_imgs[0]
 98 |     eq_flag_array = np.array([eq_flags[0],], dtype=np.float32)
 99 |     seg_cls_gt = seg_cls_gts[0]
100 | 
101 |     im_info = np.array([segdb[0]['im_info']], dtype=np.float32)
102 | 
103 |     data = {'data': im_array,
104 |             'data_ref': ref_im_array,
105 |             'eq_flag': eq_flag_array,
106 |             'im_info': im_info}
107 |     label = {'label': seg_cls_gt}
108 |     return data, label
109 | 


--------------------------------------------------------------------------------
/lib/utils/PrefetchingIter.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Deformable Convolutional Networks
  3 | # Copyright (c) 2016 by Contributors
  4 | # Copyright (c) 2017 Microsoft
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Modified by Yuwen Xiong
  7 | # --------------------------------------------------------
  8 | 
  9 | 
 10 | import mxnet as mx
 11 | from mxnet.io import DataDesc, DataBatch
 12 | import threading
 13 | 
 14 | 
 15 | class PrefetchingIter(mx.io.DataIter):
 16 |     """Base class for prefetching iterators. Takes one or more DataIters (
 17 |     or any class with "reset" and "next" methods) and combine them with
 18 |     prefetching. For example:
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     iters : DataIter or list of DataIter
 23 |         one or more DataIters (or any class with "reset" and "next" methods)
 24 |     rename_data : None or list of dict
 25 |         i-th element is a renaming map for i-th iter, in the form of
 26 |         {'original_name' : 'new_name'}. Should have one entry for each entry
 27 |         in iter[i].provide_data
 28 |     rename_label : None or list of dict
 29 |         Similar to rename_data
 30 | 
 31 |     Examples
 32 |     --------
 33 |     iter = PrefetchingIter([NDArrayIter({'data': X1}), NDArrayIter({'data': X2})],
 34 |                            rename_data=[{'data': 'data1'}, {'data': 'data2'}])
 35 |     """
 36 |     def __init__(self, iters, rename_data=None, rename_label=None):
 37 |         super(PrefetchingIter, self).__init__()
 38 |         if not isinstance(iters, list):
 39 |             iters = [iters]
 40 |         self.n_iter = len(iters)
 41 |         assert self.n_iter ==1, "Our prefetching iter only support 1 DataIter"
 42 |         self.iters = iters
 43 |         self.rename_data = rename_data
 44 |         self.rename_label = rename_label
 45 |         self.batch_size = len(self.provide_data) * self.provide_data[0][0][1][0]
 46 |         self.data_ready = [threading.Event() for i in range(self.n_iter)]
 47 |         self.data_taken = [threading.Event() for i in range(self.n_iter)]
 48 |         for e in self.data_taken:
 49 |             e.set()
 50 |         self.started = True
 51 |         self.current_batch = [None for _ in range(self.n_iter)]
 52 |         self.next_batch = [None for _ in range(self.n_iter)]
 53 |         def prefetch_func(self, i):
 54 |             """Thread entry"""
 55 |             while True:
 56 |                 self.data_taken[i].wait()
 57 |                 if not self.started:
 58 |                     break
 59 |                 try:
 60 |                     self.next_batch[i] = self.iters[i].next()
 61 |                 except StopIteration:
 62 |                     self.next_batch[i] = None
 63 |                 self.data_taken[i].clear()
 64 |                 self.data_ready[i].set()
 65 |         self.prefetch_threads = [threading.Thread(target=prefetch_func, args=[self, i]) \
 66 |                                  for i in range(self.n_iter)]
 67 |         for thread in self.prefetch_threads:
 68 |             thread.setDaemon(True)
 69 |             thread.start()
 70 | 
 71 |     def __del__(self):
 72 |         self.started = False
 73 |         for e in self.data_taken:
 74 |             e.set()
 75 |         for thread in self.prefetch_threads:
 76 |             thread.join()
 77 | 
 78 |     @property
 79 |     def provide_data(self):
 80 |         """The name and shape of data provided by this iterator"""
 81 |         if self.rename_data is None:
 82 |             return sum([i.provide_data for i in self.iters], [])
 83 |         else:
 84 |             return sum([[
 85 |                 DataDesc(r[x.name], x.shape, x.dtype)
 86 |                 if isinstance(x, DataDesc) else DataDesc(*x)
 87 |                 for x in i.provide_data
 88 |             ] for r, i in zip(self.rename_data, self.iters)], [])
 89 | 
 90 |     @property
 91 |     def provide_label(self):
 92 |         """The name and shape of label provided by this iterator"""
 93 |         if self.rename_label is None:
 94 |             return sum([i.provide_label for i in self.iters], [])
 95 |         else:
 96 |             return sum([[
 97 |                 DataDesc(r[x.name], x.shape, x.dtype)
 98 |                 if isinstance(x, DataDesc) else DataDesc(*x)
 99 |                 for x in i.provide_label
100 |             ] for r, i in zip(self.rename_label, self.iters)], [])
101 | 
102 |     def reset(self):
103 |         for e in self.data_ready:
104 |             e.wait()
105 |         for i in self.iters:
106 |             i.reset()
107 |         for e in self.data_ready:
108 |             e.clear()
109 |         for e in self.data_taken:
110 |             e.set()
111 | 
112 |     def iter_next(self):
113 |         for e in self.data_ready:
114 |             e.wait()
115 |         if self.next_batch[0] is None:
116 |             return False
117 |         else:
118 |             self.current_batch = self.next_batch[0]
119 |             for e in self.data_ready:
120 |                 e.clear()
121 |             for e in self.data_taken:
122 |                 e.set()
123 |             return True
124 | 
125 |     def next(self):
126 |         if self.iter_next():
127 |             return self.current_batch
128 |         else:
129 |             raise StopIteration
130 | 
131 |     def getdata(self):
132 |         return self.current_batch.data
133 | 
134 |     def getlabel(self):
135 |         return self.current_batch.label
136 | 
137 |     def getindex(self):
138 |         return self.current_batch.index
139 | 
140 |     def getpad(self):
141 |         return self.current_batch.pad
142 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysy/Deep-Feature-Flow-Segmentation/4372bd417ac1d0815566c87fd612b5ed08f99d49/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/combine_model.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | from load_model import load_checkpoint
12 | from save_model import save_checkpoint
13 | 
14 | 
15 | def combine_model(prefix1, epoch1, prefix2, epoch2, prefix_out, epoch_out):
16 |     args1, auxs1 = load_checkpoint(prefix1, epoch1)
17 |     args2, auxs2 = load_checkpoint(prefix2, epoch2)
18 |     arg_names = args1.keys() + args2.keys()
19 |     aux_names = auxs1.keys() + auxs2.keys()
20 |     args = dict()
21 |     for arg in arg_names:
22 |         if arg in args1:
23 |             args[arg] = args1[arg]
24 |         if arg in args2:
25 |             args[arg] = args2[arg]
26 |     auxs = dict()
27 |     for aux in aux_names:
28 |         if aux in auxs1:
29 |             auxs[aux] = auxs1[aux]
30 |         if aux in auxs2:
31 |             auxs[aux] = auxs2[aux]
32 |     save_checkpoint(prefix_out, epoch_out, args, auxs)
33 | 


--------------------------------------------------------------------------------
/lib/utils/create_logger.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Bin Xiao
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import os
12 | import logging
13 | import time
14 | 
15 | def create_logger(root_output_path, cfg, image_set):
16 |     """Create logger and get the output path
17 | 
18 |     Args:
19 |         root_output_path:
20 | 
21 |         cfg:
22 |             configuration file path
23 |         dataset:
24 |             dataset image path
25 |     
26 |     Return:
27 |         logger
28 | 
29 |         final output folder path 
30 |     """
31 |     # set up logger
32 |     if not os.path.exists(root_output_path):
33 |         os.makedirs(root_output_path)
34 |     assert os.path.exists(root_output_path), '{} does not exist'.format(root_output_path)
35 | 
36 |     cfg_name = os.path.basename(cfg).split('.')[0]
37 |     config_output_path = os.path.join(root_output_path, '{}'.format(cfg_name))
38 |     if not os.path.exists(config_output_path):
39 |         os.makedirs(config_output_path)
40 | 
41 |     image_sets = [iset for iset in image_set.split('+')]
42 |     final_output_path = os.path.join(config_output_path, '{}'.format('_'.join(image_sets)))
43 |     if not os.path.exists(final_output_path):
44 |         os.makedirs(final_output_path)
45 | 
46 |     log_file = '{}_{}.log'.format(cfg_name, time.strftime('%Y-%m-%d-%H-%M'))
47 |     head = '%(asctime)-15s %(message)s'
48 |     logging.basicConfig(filename=os.path.join(final_output_path, log_file), format=head)
49 |     logger = logging.getLogger()
50 |     logger.setLevel(logging.INFO)
51 | 
52 |     return logger, final_output_path
53 | 
54 | 


--------------------------------------------------------------------------------
/lib/utils/image_processing.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import numpy as np
12 | import cv2
13 | 
14 | 
15 | def resize(im, target_size, max_size):
16 |     """
17 |     only resize input image to target size and return scale
18 |     :param im: BGR image input by opencv
19 |     :param target_size: one dimensional size (the short side)
20 |     :param max_size: one dimensional max size (the long side)
21 |     :return:
22 |     """
23 |     im_shape = im.shape
24 |     im_size_min = np.min(im_shape[0:2])
25 |     im_size_max = np.max(im_shape[0:2])
26 |     im_scale = float(target_size) / float(im_size_min)
27 |     # prevent bigger axis from being more than max_size:
28 |     if np.round(im_scale * im_size_max) > max_size:
29 |         im_scale = float(max_size) / float(im_size_max)
30 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
31 |     return im, im_scale
32 | 
33 | 
34 | def transform(im, pixel_means, need_mean=False):
35 |     """
36 |     transform into mxnet tensor
37 |     subtract pixel size and transform to correct format
38 |     :param im: [height, width, channel] in BGR
39 |     :param pixel_means: [[[R, G, B pixel means]]]
40 |     :return: [batch, channel, height, width]
41 |     """
42 |     assert False, "shouldn't reach here."
43 |     im = im.copy()
44 |     im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)]
45 |     im = im.astype(float)
46 |     if need_mean:
47 |         im -= pixel_means
48 |     im_tensor = im[np.newaxis, :]
49 |     # put channel first
50 |     channel_swap = (0, 3, 1, 2)
51 |     im_tensor = im_tensor.transpose(channel_swap)
52 |     return im_tensor
53 | 
54 | 
55 | def transform_inverse(im_tensor, pixel_means):
56 |     """
57 |     transform from mxnet im_tensor to ordinary RGB image
58 |     im_tensor is limited to one image
59 |     :param im_tensor: [batch, channel, height, width]
60 |     :param pixel_means: [[[R, G, B pixel means]]]
61 |     :return: im [height, width, channel(RGB)]
62 |     """
63 |     assert im_tensor.shape[0] == 1
64 |     im_tensor = im_tensor.copy()
65 |     # put channel back
66 |     channel_swap = (0, 2, 3, 1)
67 |     im_tensor = im_tensor.transpose(channel_swap)
68 |     im = im_tensor[0]
69 |     assert im.shape[2] == 3
70 |     im += pixel_means
71 |     im = im.astype(np.uint8)
72 |     return im
73 | 
74 | 
75 | def tensor_vstack(tensor_list, pad=0):
76 |     """
77 |     vertically stack tensors
78 |     :param tensor_list: list of tensor to be stacked vertically
79 |     :param pad: label to pad with
80 |     :return: tensor with max shape
81 |     """
82 |     ndim = len(tensor_list[0].shape)
83 |     if ndim == 1:
84 |         return np.hstack(tensor_list)
85 |     dimensions = [0]
86 |     for dim in range(1, ndim):
87 |         dimensions.append(max([tensor.shape[dim] for tensor in tensor_list]))
88 |     for ind, tensor in enumerate(tensor_list):
89 |         pad_shape = [(0, 0)]
90 |         for dim in range(1, ndim):
91 |             pad_shape.append((0, dimensions[dim] - tensor.shape[dim]))
92 |         tensor_list[ind] = np.lib.pad(tensor, pad_shape, 'constant', constant_values=pad)
93 |     all_tensor = np.vstack(tensor_list)
94 |     return all_tensor
95 | 


--------------------------------------------------------------------------------
/lib/utils/load_data.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import numpy as np
12 | from dataset import *
13 | 
14 | 
15 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None,
16 |                   flip=False):
17 |     """ load ground truth roidb """
18 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path)
19 |     roidb = imdb.gt_roidb()
20 |     if flip:
21 |         roidb = imdb.append_flipped_images(roidb)
22 |     return roidb
23 | 
24 | 
25 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None,
26 |                         proposal='rpn', append_gt=True, flip=False):
27 |     """ load proposal roidb (append_gt when training) """
28 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path)
29 | 
30 |     gt_roidb = imdb.gt_roidb()
31 |     roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt)
32 |     if flip:
33 |         roidb = imdb.append_flipped_images(roidb)
34 |     return roidb
35 | 
36 | 
37 | def merge_roidb(roidbs):
38 |     """ roidb are list, concat them together """
39 |     roidb = roidbs[0]
40 |     for r in roidbs[1:]:
41 |         roidb.extend(r)
42 |     return roidb
43 | 
44 | 
45 | def filter_roidb(roidb, config):
46 |     """ remove roidb entries without usable rois """
47 | 
48 |     def is_valid(entry):
49 |         """ valid images have at least 1 fg or bg roi """
50 |         overlaps = entry['max_overlaps']
51 |         fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0]
52 |         bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
53 |         valid = len(fg_inds) > 0 or len(bg_inds) > 0
54 |         return valid
55 | 
56 |     num = len(roidb)
57 |     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
58 |     num_after = len(filtered_roidb)
59 |     print 'filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after)
60 | 
61 |     return filtered_roidb
62 | 
63 | 
64 | def load_gt_segdb(dataset_name, image_set_name, root_path, dataset_path, result_path=None,
65 |                   flip=False, video=False):
66 |     """ load ground truth segdb """
67 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path)
68 |     segdb = imdb.gt_segdb()
69 |     if flip:
70 |         segdb = imdb.append_flipped_images_for_segmentation(segdb, video=video)
71 |     return segdb
72 | 
73 | 
74 | def merge_segdb(segdbs):
75 |     """ segdb are list, concat them together """
76 |     segdb = segdbs[0]
77 |     for r in segdbs[1:]:
78 |         segdb.extend(r)
79 |     return segdb
80 | 


--------------------------------------------------------------------------------
/lib/utils/load_model.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import mxnet as mx
12 | 
13 | 
14 | def load_checkpoint(prefix, epoch):
15 |     """
16 |     Load model checkpoint from file.
17 |     :param prefix: Prefix of model name.
18 |     :param epoch: Epoch number of model we would like to load.
19 |     :return: (arg_params, aux_params)
20 |     arg_params : dict of str to NDArray
21 |         Model parameter, dict of name to NDArray of net's weights.
22 |     aux_params : dict of str to NDArray
23 |         Model parameter, dict of name to NDArray of net's auxiliary states.
24 |     """
25 |     save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
26 |     arg_params = {}
27 |     aux_params = {}
28 |     for k, v in save_dict.items():
29 |         tp, name = k.split(':', 1)
30 |         if tp == 'arg':
31 |             arg_params[name] = v
32 |         if tp == 'aux':
33 |             aux_params[name] = v
34 |     return arg_params, aux_params
35 | 
36 | 
37 | def convert_context(params, ctx):
38 |     """
39 |     :param params: dict of str to NDArray
40 |     :param ctx: the context to convert to
41 |     :return: dict of str of NDArray with context ctx
42 |     """
43 |     new_params = dict()
44 |     for k, v in params.items():
45 |         new_params[k] = v.as_in_context(ctx)
46 |     return new_params
47 | 
48 | 
49 | def load_param(prefix, epoch, convert=False, ctx=None, process=False):
50 |     """
51 |     wrapper for load checkpoint
52 |     :param prefix: Prefix of model name.
53 |     :param epoch: Epoch number of model we would like to load.
54 |     :param convert: reference model should be converted to GPU NDArray first
55 |     :param ctx: if convert then ctx must be designated.
56 |     :param process: model should drop any test
57 |     :return: (arg_params, aux_params)
58 |     """
59 |     arg_params, aux_params = load_checkpoint(prefix, epoch)
60 |     if convert:
61 |         if ctx is None:
62 |             ctx = mx.cpu()
63 |         arg_params = convert_context(arg_params, ctx)
64 |         aux_params = convert_context(aux_params, ctx)
65 |     if process:
66 |         tests = [k for k in arg_params.keys() if '_test' in k]
67 |         for test in tests:
68 |             arg_params[test.replace('_test', '')] = arg_params.pop(test)
69 |     return arg_params, aux_params
70 | 


--------------------------------------------------------------------------------
/lib/utils/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | 
12 | import logging
13 | from mxnet.lr_scheduler import LRScheduler
14 | 
15 | class WarmupMultiFactorScheduler(LRScheduler):
16 |     """Reduce learning rate in factor at steps specified in a list
17 | 
18 |     Assume the weight has been updated by n times, then the learning rate will
19 |     be
20 | 
21 |     base_lr * factor^(sum((step/n)<=1)) # step is an array
22 | 
23 |     Parameters
24 |     ----------
25 |     step: list of int
26 |         schedule learning rate after n updates
27 |     factor: float
28 |         the factor for reducing the learning rate
29 |     """
30 |     def __init__(self, step, factor=1, warmup=False, warmup_lr=0, warmup_step=0):
31 |         super(WarmupMultiFactorScheduler, self).__init__()
32 |         assert isinstance(step, list) and len(step) >= 1
33 |         for i, _step in enumerate(step):
34 |             if i != 0 and step[i] <= step[i-1]:
35 |                 raise ValueError("Schedule step must be an increasing integer list")
36 |             if _step < 1:
37 |                 raise ValueError("Schedule step must be greater or equal than 1 round")
38 |         if factor > 1.0:
39 |             raise ValueError("Factor must be no more than 1 to make lr reduce")
40 |         self.step = step
41 |         self.cur_step_ind = 0
42 |         self.factor = factor
43 |         self.count = 0
44 |         self.warmup = warmup
45 |         self.warmup_lr = warmup_lr
46 |         self.warmup_step = warmup_step
47 | 
48 |     def __call__(self, num_update):
49 |         """
50 |         Call to schedule current learning rate
51 | 
52 |         Parameters
53 |         ----------
54 |         num_update: int
55 |             the maximal number of updates applied to a weight.
56 |         """
57 | 
58 |         # NOTE: use while rather than if  (for continuing training via load_epoch)
59 |         if self.warmup and num_update < self.warmup_step:
60 |             return self.warmup_lr
61 |         while self.cur_step_ind <= len(self.step)-1:
62 |             if num_update > self.step[self.cur_step_ind]:
63 |                 self.count = self.step[self.cur_step_ind]
64 |                 self.cur_step_ind += 1
65 |                 self.base_lr *= self.factor
66 |                 logging.info("Update[%d]: Change learning rate to %0.5e",
67 |                              num_update, self.base_lr)
68 |             else:
69 |                 return self.base_lr
70 |         return self.base_lr
71 | 


--------------------------------------------------------------------------------
/lib/utils/mask_coco2voc.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yi Li
 6 | # --------------------------------------------------------
 7 | 
 8 | from skimage.draw import polygon
 9 | import numpy as np
10 | 
11 | def segToMask( S, h, w ):
12 |     """
13 |     Convert polygon segmentation to binary mask.
14 |     :param   S (float array)   : polygon segmentation mask
15 |     :param   h (int)           : target mask height
16 |     :param   w (int)           : target mask width
17 |     :return: M (bool 2D array) : binary mask
18 |     """
19 |     M = np.zeros((h,w), dtype=np.bool)
20 |     for s in S:
21 |         N = len(s)
22 |         rr, cc = polygon(np.array(s[1:N:2]).clip(max=h-1), \
23 |                       np.array(s[0:N:2]).clip(max=w-1)) # (y, x)
24 |         M[rr, cc] = 1
25 |     return M
26 | 
27 | 
28 | def decodeMask(R):
29 |     """
30 |     Decode binary mask M encoded via run-length encoding.
31 |     :param   R (object RLE)    : run-length encoding of binary mask
32 |     :return: M (bool 2D array) : decoded binary mask
33 |     """
34 |     N = len(R['counts'])
35 |     M = np.zeros( (R['size'][0]*R['size'][1], ))
36 |     n = 0
37 |     val = 1
38 |     for pos in range(N):
39 |         val = not val
40 |         for c in range(R['counts'][pos]):
41 |             R['counts'][pos]
42 |             M[n] = val
43 |             n += 1
44 |     return M.reshape((R['size']), order='F')
45 | 
46 | def mask_coco2voc(coco_masks, im_height, im_width):
47 |     voc_masks = np.zeros((len(coco_masks), im_height, im_width))
48 |     for i, ann in enumerate(coco_masks):
49 |         if type(ann) == list:
50 |             # polygon
51 |             m = segToMask(ann, im_height, im_width)
52 |         else:
53 |             # rle
54 |             m = decodeMask(ann)
55 |         voc_masks[i,:,:]=m;
56 |     return voc_masks
57 | 


--------------------------------------------------------------------------------
/lib/utils/mask_voc2coco.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yi Li
 6 | # --------------------------------------------------------
 7 | 
 8 | from skimage.draw import polygon
 9 | import numpy as np
10 | import cv2
11 | from utils.tictoc import tic, toc
12 | from dataset.pycocotools.mask import encode as encodeMask_c
13 | 
14 | def encodeMask(M):
15 |     """
16 |     Encode binary mask M using run-length encoding.
17 |     :param   M (bool 2D array)  : binary mask to encode
18 |     :return: R (object RLE)     : run-length encoding of binary mask
19 |     """
20 |     [h, w] = M.shape
21 |     M = M.flatten(order='F')
22 |     N = len(M)
23 |     counts_list = []
24 |     pos = 0
25 |     # counts
26 |     counts_list.append(1)
27 |     diffs = np.logical_xor(M[0:N - 1], M[1:N])
28 |     for diff in diffs:
29 |         if diff:
30 |             pos += 1
31 |             counts_list.append(1)
32 |         else:
33 |             counts_list[pos] += 1
34 |     # if array starts from 1. start with 0 counts for 0
35 |     if M[0] == 1:
36 |         counts_list = [0] + counts_list
37 |     return {'size': [h, w],
38 |             'counts': counts_list,
39 |             }
40 | 
41 | def mask_voc2coco(voc_masks, voc_boxes, im_height, im_width, binary_thresh = 0.4):
42 |     num_pred = len(voc_masks)
43 |     assert(num_pred==voc_boxes.shape[0])
44 |     mask_img = np.zeros((im_height, im_width, num_pred), dtype=np.uint8, order='F')
45 |     for i in xrange(num_pred):
46 |         pred_box = np.round(voc_boxes[i, :4]).astype(int)
47 |         pred_mask = voc_masks[i]
48 |         pred_mask = cv2.resize(pred_mask.astype(np.float32), (pred_box[2] - pred_box[0] + 1, pred_box[3] - pred_box[1] + 1))
49 |         mask_img[pred_box[1]:pred_box[3]+1, pred_box[0]:pred_box[2]+1, i] = pred_mask >= binary_thresh
50 |     coco_mask = encodeMask_c(mask_img)
51 |     return coco_mask
52 | 


--------------------------------------------------------------------------------
/lib/utils/network_visualization.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (c) 2016 by Contributors
 3 | # Copyright (c) 2017 Microsoft
 4 | # Copyright (c) 2017 ShanghaiTech PLUS Group
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Zheng Zhang
 7 | # Written by Songyang Zhang 
 8 | # E-main: sy.zhangbuaa#gmail.com
 9 | # --------------------------------------------------------
10 | 
11 | import mxnet as mx 
12 | from deeplab import _init_paths
13 | # from deeplab.symbols.resnet_v1_101_deeplab_dcn_duc import resnet_v1_101_deeplab_dcn_duc
14 | # from deeplab.symbols.duc_hdc_symbol.network_duc_hdc import get_symbol_duc_hdc
15 | from deeplab.symbols.densenet_bc_deeplab_base import densenet_bc_deeplab_base
16 | 
17 | def plot_network(symbol, input_data_shape):
18 |     t = mx.viz.plot_network(symbol, shape={'data' : input_data_shape})
19 |     t.render()
20 |     
21 | 
22 | if __name__ == '__main__':
23 |     # For resnet-dcn 
24 |     # resnet_dcn = resnet_v1_101_deeplab_dcn_duc()
25 |     # symbol = resnet_dcn.get_train_duc_symbol(19)
26 |     # input_data_shape = (1, 3, 1024, 2048)
27 |     # plot_network(symbol, input_data_shape)
28 | 
29 |     # symbol_duc = get_symbol_duc_hdc(19, 16)
30 |     # plot_network(symbol_duc, input_data_shape)
31 |     
32 |     # For DenseNet
33 |     # depth = 121
34 | 
35 |     # if depth == 121:
36 |     #     units = [6, 12, 24, 16]
37 |     # elif depth == 169:
38 |     #     units = [6, 12, 32, 32]
39 |     # elif depth == 201:
40 |     #     units = [6, 12, 48, 32]
41 |     # elif depth == 161:
42 |     #     units = [6, 12, 36, 24]
43 |     # else:
44 |     #     raise ValueError("no experiments done on detph {}, you can do it youself".format(depth))
45 |     
46 |     # reduction = 0.5
47 | 
48 |     # symbol_densenet = DenseNet(units=units, num_stage=4, growth_rate=48 if depth==161 else 32, 
49 |     #                     num_class=1000, data_type='imagenet', reduction=reduction, drop_out=0, bottle_neck=True,
50 |     #                     bn_mom=0.9, workspace=512)
51 |     densenets = densenet_bc_deeplab_base()
52 |     input_data_shape = (1, 3, 1024, 2048)
53 |     symbol_densenet = densenets.get_train_symbol(19)
54 |     plot_network(symbol_densenet, input_data_shape)
55 | 


--------------------------------------------------------------------------------
/lib/utils/roidb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | roidb
 3 | basic format [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
 4 | extended ['image', 'max_classes', 'max_overlaps', 'bbox_targets']
 5 | """
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | 
10 | from bbox.bbox_regression import compute_bbox_regression_targets
11 | 
12 | 
13 | def prepare_roidb(imdb, roidb, cfg):
14 |     """
15 |     add image path, max_classes, max_overlaps to roidb
16 |     :param imdb: image database, provide path
17 |     :param roidb: roidb
18 |     :return: None
19 |     """
20 |     print 'prepare roidb'
21 |     for i in range(len(roidb)):  # image_index
22 |         roidb[i]['image'] = imdb.image_path_from_index(imdb.image_set_index[i])
23 |         if cfg.TRAIN.ASPECT_GROUPING:
24 |             size = cv2.imread(roidb[i]['image']).shape
25 |             roidb[i]['height'] = size[0]
26 |             roidb[i]['width'] = size[1]
27 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
28 |         max_overlaps = gt_overlaps.max(axis=1)
29 |         max_classes = gt_overlaps.argmax(axis=1)
30 |         roidb[i]['max_overlaps'] = max_overlaps
31 |         roidb[i]['max_classes'] = max_classes
32 | 
33 |         # background roi => background class
34 |         zero_indexes = np.where(max_overlaps == 0)[0]
35 |         assert all(max_classes[zero_indexes] == 0)
36 |         # foreground roi => foreground class
37 |         nonzero_indexes = np.where(max_overlaps > 0)[0]
38 |         assert all(max_classes[nonzero_indexes] != 0)
39 | 


--------------------------------------------------------------------------------
/lib/utils/save_model.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | def save_checkpoint(prefix, epoch, arg_params, aux_params):
 5 |     """Checkpoint the model data into file.
 6 |     :param prefix: Prefix of model name.
 7 |     :param epoch: The epoch number of the model.
 8 |     :param arg_params: dict of str to NDArray
 9 |         Model parameter, dict of name to NDArray of net's weights.
10 |     :param aux_params: dict of str to NDArray
11 |         Model parameter, dict of name to NDArray of net's auxiliary states.
12 |     :return: None
13 |     prefix-epoch.params will be saved for parameters.
14 |     """
15 |     save_dict = {('arg:%s' % k) : v for k, v in arg_params.items()}
16 |     save_dict.update({('aux:%s' % k) : v for k, v in aux_params.items()})
17 |     param_name = '%s-%04d.params' % (prefix, epoch)
18 |     mx.nd.save(param_name, save_dict)
19 | 


--------------------------------------------------------------------------------
/lib/utils/show_boxes.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yi Li, Haocheng Zhang
 6 | # --------------------------------------------------------
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | from random import random as rand
10 | def show_boxes(im, dets, classes, scale = 1.0):
11 |     plt.cla()
12 |     plt.axis("off")
13 |     plt.imshow(im)
14 |     for cls_idx, cls_name in enumerate(classes):
15 |         cls_dets = dets[cls_idx]
16 |         for det in cls_dets:
17 |             bbox = det[:4] * scale
18 |             color = (rand(), rand(), rand())
19 |             rect = plt.Rectangle((bbox[0], bbox[1]),
20 |                                   bbox[2] - bbox[0],
21 |                                   bbox[3] - bbox[1], fill=False,
22 |                                   edgecolor=color, linewidth=2.5)
23 |             plt.gca().add_patch(rect)
24 | 
25 |             if cls_dets.shape[1] == 5:
26 |                 score = det[-1]
27 |                 plt.gca().text(bbox[0], bbox[1],
28 |                                '{:s} {:.3f}'.format(cls_name, score),
29 |                                bbox=dict(facecolor=color, alpha=0.5), fontsize=9, color='white')
30 |     plt.show()
31 |     return im
32 | 
33 | 


--------------------------------------------------------------------------------
/lib/utils/show_masks.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import random
 4 | import cv2
 5 | 
 6 | def show_masks(im, dets, msks, show = True, thresh = 1e-3, scale = 1.0):
 7 |     plt.cla()
 8 |     plt.imshow(im)
 9 |     for det, msk in zip(dets, msks):
10 |         color = (random.random(), random.random(), random.random())  # generate a random color
11 |         bbox = det[:4] * scale
12 |         cod = np.zeros(4).astype(int)
13 |         cod[0] = int(bbox[0])
14 |         cod[1] = int(bbox[1])
15 |         cod[2] = int(bbox[2])
16 |         cod[3] = int(bbox[3])
17 |         if im[cod[0]:cod[2], cod[1]:cod[3], 0].size > 0:
18 |             msk = cv2.resize(msk, im[cod[1]:cod[3], cod[0]:cod[2], 0].T.shape)
19 |             bimsk = msk > thresh
20 |             bimsk = bimsk.astype(int)
21 |             bimsk = np.repeat(bimsk[:, :, np.newaxis], 3, axis=2)
22 |             mskd = im[cod[1]:cod[3], cod[0]:cod[2], :] * bimsk
23 |             clmsk = np.ones(bimsk.shape) * bimsk
24 |             clmsk[:, :, 0] = clmsk[:, :, 0] * color[0] * 256;
25 |             clmsk[:, :, 1] = clmsk[:, :, 1] * color[1] * 256;
26 |             clmsk[:, :, 2] = clmsk[:, :, 2] * color[2] * 256;
27 |             im[cod[1]:cod[3], cod[0]:cod[2], :] = im[cod[1]:cod[3], cod[0]:cod[2], :] + 0.8 * clmsk - 0.8 * mskd
28 |     plt.imshow(im)
29 |     if(show):
30 |         plt.show()
31 |     return im
32 | 
33 | 


--------------------------------------------------------------------------------
/lib/utils/show_offset.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Deformable Convolutional Networks
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  5 | # Written by Guodong Zhang
  6 | # --------------------------------------------------------
  7 | 
  8 | import matplotlib.pyplot as plt
  9 | import numpy as np
 10 | 
 11 | def show_boxes_simple(bbox, color='r', lw=2):
 12 |     rect = plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0],
 13 |                           bbox[3] - bbox[1], fill=False, edgecolor=color, linewidth=lw)
 14 |     plt.gca().add_patch(rect)
 15 | 
 16 | def kernel_inv_map(vis_attr, target_point, map_h, map_w):
 17 |     pos_shift = [vis_attr['dilation'] * 0 - vis_attr['pad'],
 18 |                  vis_attr['dilation'] * 1 - vis_attr['pad'],
 19 |                  vis_attr['dilation'] * 2 - vis_attr['pad']]
 20 |     source_point = []
 21 |     for idx in range(vis_attr['filter_size']**2):
 22 |         cur_source_point = np.array([target_point[0] + pos_shift[idx / 3],
 23 |                                      target_point[1] + pos_shift[idx % 3]])
 24 |         if cur_source_point[0] < 0 or cur_source_point[1] < 0 \
 25 |                 or cur_source_point[0] > map_h - 1 or cur_source_point[1] > map_w - 1:
 26 |             continue
 27 |         source_point.append(cur_source_point.astype('f'))
 28 |     return source_point
 29 | 
 30 | def offset_inv_map(source_points, offset):
 31 |     for idx, _ in enumerate(source_points):
 32 |         source_points[idx][0] += offset[2*idx]
 33 |         source_points[idx][1] += offset[2*idx + 1]
 34 |     return source_points
 35 | 
 36 | def get_bottom_position(vis_attr, top_points, all_offset):
 37 |     map_h = all_offset[0].shape[2]
 38 |     map_w = all_offset[0].shape[3]
 39 | 
 40 |     for level in range(vis_attr['plot_level']):
 41 |         source_points = []
 42 |         for idx, cur_top_point in enumerate(top_points):
 43 |             cur_top_point = np.round(cur_top_point)
 44 |             if cur_top_point[0] < 0 or cur_top_point[1] < 0 \
 45 |                 or cur_top_point[0] > map_h-1 or cur_top_point[1] > map_w-1:
 46 |                 continue
 47 |             cur_source_point = kernel_inv_map(vis_attr, cur_top_point, map_h, map_w)
 48 |             cur_offset = np.squeeze(all_offset[level][:, :, int(cur_top_point[0]), int(cur_top_point[1])])
 49 |             cur_source_point = offset_inv_map(cur_source_point, cur_offset)
 50 |             source_points = source_points + cur_source_point
 51 |         top_points = source_points
 52 |     return source_points
 53 | 
 54 | def plot_according_to_point(vis_attr, im, source_points, map_h, map_w, color=[255,0,0]):
 55 |     plot_area = vis_attr['plot_area']
 56 |     for idx, cur_source_point in enumerate(source_points):
 57 |         y = np.round((cur_source_point[0] + 0.5) * im.shape[0] / map_h).astype('i')
 58 |         x = np.round((cur_source_point[1] + 0.5) * im.shape[1] / map_w).astype('i')
 59 | 
 60 |         if x < 0 or y < 0 or x > im.shape[1]-1 or y > im.shape[0]-1:
 61 |             continue
 62 |         y = min(y, im.shape[0] - vis_attr['plot_area'] - 1)
 63 |         x = min(x, im.shape[1] - vis_attr['plot_area'] - 1)
 64 |         y = max(y, vis_attr['plot_area'])
 65 |         x = max(x, vis_attr['plot_area'])
 66 |         im[y-plot_area:y+plot_area+1, x-plot_area:x+plot_area+1, :] = np.tile(
 67 |             np.reshape(color, (1, 1, 3)), (2*plot_area+1, 2*plot_area+1, 1)
 68 |         )
 69 |     return im
 70 | 
 71 | 
 72 | 
 73 | def show_dpsroi_offset(im, boxes, offset, classes, trans_std=0.1):
 74 |     plt.cla
 75 |     for idx, bbox in enumerate(boxes):
 76 |         plt.figure(idx+1)
 77 |         plt.axis("off")
 78 |         plt.imshow(im)
 79 | 
 80 |         offset_w = np.squeeze(offset[idx, classes[idx]*2, :, :]) * trans_std
 81 |         offset_h = np.squeeze(offset[idx, classes[idx]*2+1, :, :]) * trans_std
 82 |         x1 = int(bbox[0])
 83 |         y1 = int(bbox[1])
 84 |         x2 = int(bbox[2])
 85 |         y2 = int(bbox[3])
 86 |         roi_width = x2-x1+1
 87 |         roi_height = y2-y1+1
 88 |         part_size = offset_w.shape[0]
 89 |         bin_size_w = roi_width / part_size
 90 |         bin_size_h = roi_height / part_size
 91 |         show_boxes_simple(bbox, color='b')
 92 |         for ih in range(part_size):
 93 |             for iw in range(part_size):
 94 |                 sub_box = np.array([x1+iw*bin_size_w, y1+ih*bin_size_h,
 95 |                                     x1+(iw+1)*bin_size_w, y1+(ih+1)*bin_size_h])
 96 |                 sub_offset = offset_h[ih, iw] * np.array([0, 1, 0, 1]) * roi_height \
 97 |                              + offset_w[ih, iw] * np.array([1, 0, 1, 0]) * roi_width
 98 |                 sub_box = sub_box + sub_offset
 99 |                 show_boxes_simple(sub_box)
100 |         plt.show()
101 | 
102 | def show_dconv_offset(im, all_offset, step=[2, 2], filter_size=3,
103 |                       dilation=2, pad=2, plot_area=2, plot_level=3):
104 |     vis_attr = {'filter_size': filter_size, 'dilation': dilation, 'pad': pad,
105 |                 'plot_area': plot_area, 'plot_level': plot_level}
106 | 
107 |     map_h = all_offset[0].shape[2]
108 |     map_w = all_offset[0].shape[3]
109 | 
110 |     step_h = step[0]
111 |     step_w = step[1]
112 |     start_h = np.round(step_h / 2)
113 |     start_w = np.round(step_w / 2)
114 | 
115 |     plt.figure()
116 |     for im_h in range(start_h, map_h, step_h):
117 |         for im_w in range(start_w, map_w, step_w):
118 |             target_point = np.array([im_h, im_w])
119 |             source_y = np.round(target_point[0] * im.shape[0] / map_h)
120 |             source_x = np.round(target_point[1] * im.shape[1] / map_w)
121 |             if source_y < plot_area or source_x < plot_area \
122 |                     or source_y >= im.shape[0] - plot_area or source_x >= im.shape[1] - plot_area:
123 |                 continue
124 | 
125 |             cur_im = np.copy(im)
126 |             source_points = get_bottom_position(vis_attr, [target_point], all_offset)
127 |             cur_im = plot_according_to_point(vis_attr, cur_im, source_points, map_h, map_w)
128 |             cur_im[source_y-plot_area:source_y+plot_area+1, source_x-plot_area:source_x+plot_area+1, :] = \
129 |                 np.tile(np.reshape([0, 255, 0], (1, 1, 3)), (2*plot_area+1, 2*plot_area+1, 1))
130 | 
131 | 
132 |             plt.axis("off")
133 |             plt.imshow(cur_im)
134 |             plt.show(block=False)
135 |             plt.pause(0.01)
136 |             plt.clf()
137 | 


--------------------------------------------------------------------------------
/lib/utils/symbol.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | class Symbol:
10 |     def __init__(self):
11 |         self.arg_shape_dict = None
12 |         self.out_shape_dict = None
13 |         self.aux_shape_dict = None
14 |         self.sym = None
15 | 
16 |     @property
17 |     def symbol(self):
18 |         return self.sym
19 | 
20 |     def get_symbol(self, cfg, is_train=True):
21 |         """
22 |         return a generated symbol, it also need to be assigned to self.sym
23 |         """
24 |         raise NotImplementedError()
25 | 
26 |     def init_weights(self, cfg, arg_params, aux_params):
27 |         raise NotImplementedError()
28 | 
29 |     def get_msra_std(self, shape):
30 |         fan_in = float(shape[1])
31 |         if len(shape) > 2:
32 |             fan_in *= np.prod(shape[2:])
33 |         print(np.sqrt(2 / fan_in))
34 |         return np.sqrt(2 / fan_in)
35 | 
36 |     def infer_shape(self, data_shape_dict):
37 |         # infer shape
38 |         arg_shape, out_shape, aux_shape = self.sym.infer_shape(**data_shape_dict)
39 |         self.arg_shape_dict = dict(zip(self.sym.list_arguments(), arg_shape))
40 |         self.out_shape_dict = dict(zip(self.sym.list_outputs(), out_shape))
41 |         self.aux_shape_dict = dict(zip(self.sym.list_auxiliary_states(), aux_shape))
42 | 
43 |     def check_parameter_shapes(self, arg_params, aux_params, data_shape_dict, is_train=True):
44 |         for k in self.sym.list_arguments():
45 |             if k in data_shape_dict or (False if is_train else 'label' in k):
46 |                 continue
47 |             assert k in arg_params, k + ' not initialized'
48 |             assert arg_params[k].shape == self.arg_shape_dict[k], \
49 |                 'shape inconsistent for ' + k + ' inferred ' + str(self.arg_shape_dict[k]) + ' provided ' + str(
50 |                     arg_params[k].shape)
51 |         for k in self.sym.list_auxiliary_states():
52 |             assert k in aux_params, k + ' not initialized'
53 |             assert aux_params[k].shape == self.aux_shape_dict[k], \
54 |                 'shape inconsistent for ' + k + ' inferred ' + str(self.aux_shape_dict[k]) + ' provided ' + str(
55 |                     aux_params[k].shape)
56 | 


--------------------------------------------------------------------------------
/lib/utils/tictoc.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | def tic():
 4 |     import time
 5 |     global startTime_for_tictoc
 6 |     startTime_for_tictoc = time.time()
 7 |     return startTime_for_tictoc
 8 | 
 9 | def toc():
10 |     if 'startTime_for_tictoc' in globals():
11 |         endTime = time.time()
12 |         return endTime - startTime_for_tictoc
13 |     else:
14 |         return None


--------------------------------------------------------------------------------
/model/pretrained_model/resnet_v1_101-0000.params:
--------------------------------------------------------------------------------
1 | /home/PublicModel/mxnet/pretrained_model/resnet_v1_101-0000.params


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | nose 
 2 | numpy==1.14.0 
 3 | nose-timer 
 4 | requests>=2.20.0
 5 | Pillow 
 6 | easydict 
 7 | pyyaml 
 8 | sacred 
 9 | visdom 
10 | Cython 
11 | matplotlib 
12 | scikit-image 
13 | tqdm 
14 | mxnet-cu90
15 | opencv-python==3.4.1.15
16 | 


--------------------------------------------------------------------------------