├── __init__.py
├── config
    ├── __init__.py
    ├── config.py
    └── utils.py
├── detect
    ├── __init__.py
    └── detector.py
├── symbol
    ├── __init__.py
    ├── README.md
    ├── vgg16_reduced.py
    ├── mobilenet.py
    ├── symbol_builder.py
    ├── resnet.py
    ├── densenet.py
    ├── symbol_factory.py
    ├── legacy_vgg16_ssd_300.py
    ├── inceptionv3.py
    └── legacy_vgg16_ssd_512.py
├── tools
    ├── __init__.py
    ├── caffe_converter
    │   ├── caffe_parse
    │   │   ├── __init__.py
    │   │   └── parse_from_protobuf.py
    │   ├── make_win32.bat
    │   ├── Makefile
    │   ├── run.sh
    │   ├── mean_image.py
    │   ├── README.md
    │   └── convert_model.py
    ├── prepare_pascal.sh
    ├── prepare_coco.sh
    ├── find_mxnet.py
    ├── visualize_net.py
    ├── image_processing.py
    ├── prepare_dataset.py
    └── rand_sampler.py
├── train
    ├── __init__.py
    └── metric.py
├── dataset
    ├── __init__.py
    ├── pycocotools
    │   ├── __init__.py
    │   └── README.md
    ├── names
    │   ├── pascal_voc.names
    │   └── mscoco.names
    ├── testdb.py
    ├── imdb.py
    ├── concat_db.py
    ├── mscoco.py
    ├── yolo_format.py
    └── iterator.py
├── evaluate
    ├── __init__.py
    ├── evaluate_net.py
    ├── eval_voc.py
    └── custom_callbacks.py
├── scripts
    ├── __init__.py
    ├── train_script.sh
    ├── run_ssd_docker.sh
    └── run_tensorboard.sh
├── data
    └── demo
    │   ├── dog.jpg
    │   ├── eagle.jpg
    │   ├── 000001.jpg
    │   ├── 000002.jpg
    │   ├── 000003.jpg
    │   ├── 000004.jpg
    │   ├── 000006.jpg
    │   ├── 000008.jpg
    │   ├── 000010.jpg
    │   ├── 000022.jpg
    │   ├── horses.jpg
    │   ├── person.jpg
    │   └── street.jpg
├── .gitmodules
├── model
    └── README.md
├── docker
    ├── mxnet_0.12
    │   └── Dockerfile
    ├── mxnet_0_11
    │   └── Dockerfile
    ├── cudnn5.1
    │   └── Dockerfile
    └── cudnn6.0
    │   └── Dockerfile
├── LICENSE
├── deploy.py
├── .gitignore
├── evaluate.py
├── demo.py
└── train.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/detect/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/symbol/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/train/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluate/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/caffe_converter/caffe_parse/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/data/demo/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/dog.jpg


--------------------------------------------------------------------------------
/data/demo/eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/eagle.jpg


--------------------------------------------------------------------------------
/data/demo/000001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000001.jpg


--------------------------------------------------------------------------------
/data/demo/000002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000002.jpg


--------------------------------------------------------------------------------
/data/demo/000003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000003.jpg


--------------------------------------------------------------------------------
/data/demo/000004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000004.jpg


--------------------------------------------------------------------------------
/data/demo/000006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000006.jpg


--------------------------------------------------------------------------------
/data/demo/000008.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000008.jpg


--------------------------------------------------------------------------------
/data/demo/000010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000010.jpg


--------------------------------------------------------------------------------
/data/demo/000022.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000022.jpg


--------------------------------------------------------------------------------
/data/demo/horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/horses.jpg


--------------------------------------------------------------------------------
/data/demo/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/person.jpg


--------------------------------------------------------------------------------
/data/demo/street.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/street.jpg


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "mxnet"]
2 | 	path = mxnet
3 | 	url = https://github.com/zhreshold/mxnet.git
4 | 


--------------------------------------------------------------------------------
/model/README.md:
--------------------------------------------------------------------------------
1 | #### This is the default directory to store all the models, including `*.params` and `*.json`
2 | 


--------------------------------------------------------------------------------
/tools/caffe_converter/make_win32.bat:
--------------------------------------------------------------------------------
1 | @protoc --python_out=./ ./caffe_parse/caffe.proto
2 | @echo done.
3 | @pause
4 | 


--------------------------------------------------------------------------------
/dataset/pycocotools/README.md:
--------------------------------------------------------------------------------
1 | This is a modified version of https://github.com/pdollar/coco python API.
2 | No `make` is required, but this will not support mask functions.
3 | 


--------------------------------------------------------------------------------
/dataset/names/pascal_voc.names:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 | 


--------------------------------------------------------------------------------
/docker/mxnet_0.12/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM    mxnet/python:gpu_0.12.0
 2 | 
 3 | RUN     apt-get update && apt-get install -y \
 4 |         nano \
 5 |         wget \
 6 |         graphviz \
 7 |         python-tk
 8 | 
 9 | 
10 | RUN     pip install ipython jupyter matplotlib scipy graphviz tensorboard future
11 | 
12 | WORKDIR /mxnet/example/ssd
13 | 


--------------------------------------------------------------------------------
/tools/prepare_pascal.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
3 | python $DIR/prepare_dataset.py --dataset pascal --year 2007,2012 --set trainval --target $DIR/../data/train.lst
4 | python $DIR/prepare_dataset.py --dataset pascal --year 2007 --set test --target $DIR/../data/val.lst --shuffle False
5 | 


--------------------------------------------------------------------------------
/docker/mxnet_0_11/Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | FROM    mxnet/python:gpu_0.11.0
 3 | 
 4 | RUN     apt-get update && apt-get install -y \
 5 |         nano \
 6 |         wget \
 7 |         graphviz \
 8 |         python-tk
 9 | 
10 | 
11 | RUN     pip install ipython jupyter matplotlib scipy graphviz tensorboard future
12 | 
13 | WORKDIR /mxnet/example/ssd
14 | 


--------------------------------------------------------------------------------
/tools/prepare_coco.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
3 | python $DIR/prepare_dataset.py --dataset coco --set train2014,valminusminival2014 --target $DIR/../data/train.lst  --root $DIR/../data/coco
4 | python $DIR/prepare_dataset.py --dataset coco --set minival2014 --target $DIR/../data/val.lst --shuffle False --root $DIR/../data/coco
5 | 


--------------------------------------------------------------------------------
/tools/caffe_converter/Makefile:
--------------------------------------------------------------------------------
 1 | # find protoc
 2 | ifndef PROTOC
 3 | DEPS_PROTOC=../../deps/bin/protoc
 4 | ifneq ("$(wildcard $(DEPS_PROTOC))","")
 5 | PROTOC = $(DEPS_PROTOC)
 6 | else
 7 | PROTOC = protoc
 8 | endif
 9 | endif
10 | 
11 | all: caffe_parse/caffe_pb2.py
12 | 
13 | clean:
14 | 	rm caffe_parse/caffe_pb2.py*
15 | 
16 | caffe_parse/caffe_pb2.py:
17 | 	$(PROTOC) --python_out=./ ./caffe_parse/caffe.proto
18 | 


--------------------------------------------------------------------------------
/tools/find_mxnet.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | try:
 4 |     if os.environ.get('MXNET_EXAMPLE_SSD_DISABLE_PRE_INSTALLED', 0):
 5 |         raise ImportError
 6 |     import mxnet as mx
 7 |     print("Using mxnet as:")
 8 |     print(mx)
 9 |     print("Warning: using pre-installed version of mxnet may cause unexpected error...")
10 |     print("(export MXNET_EXAMPLE_SSD_DISABLE_PRE_INSTALLED=1) to prevent loading pre-installed mxnet.")
11 | except ImportError:
12 |     import os, sys
13 |     curr_path = os.path.abspath(os.path.dirname(__file__))
14 |     sys.path.insert(0, os.path.join(curr_path, "../mxnet/python"))
15 |     import mxnet as mx
16 | 


--------------------------------------------------------------------------------
/scripts/train_script.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # this is a training script
 4 | # defining global parameters
 5 | GPUS='0,1,2,3'
 6 | TRAIN_REC_PATH=./data/train.rec
 7 | VAL_REC_PATH=./data/val.rec
 8 | NETWORK=vgg16_reduced
 9 | BATCH_SIZE=128
10 | DATA_SHAPE=300
11 | PRETRAINED=./model/vgg16_reduced/vgg16_reduced
12 | OPTIMIZER=rmsprop
13 | TENSORBOARD=True
14 | LR_STEPS=20,40,60
15 | 
16 | python ./train.py \
17 |     --train-path ${TRAIN_REC_PATH} \
18 |     --val-path ${VAL_REC_PATH} \
19 |     --network ${NETWORK} \
20 |     --batch-size ${BATCH_SIZE} \
21 |     --data-shape ${DATA_SHAPE} \
22 |     --gpus ${GPUS} \
23 |     --pretrained ${PRETRAINED} \
24 |     --optimizer ${OPTIMIZER} \
25 |     --tensorboard ${TENSORBOARD} \
26 |     --lr-steps ${LR_STEPS} \
27 |     --freeze ''


--------------------------------------------------------------------------------
/tools/caffe_converter/caffe_parse/parse_from_protobuf.py:
--------------------------------------------------------------------------------
 1 | from google.protobuf import text_format
 2 | import numpy as np
 3 | import caffe_parse.caffe_pb2 as caffe_pb2
 4 | 
 5 | 
 6 | def parse_caffemodel(file_path):
 7 |     """
 8 |     parses the trained .caffemodel file
 9 | 
10 |     filepath: /path/to/trained-model.caffemodel
11 | 
12 |     returns: layers
13 |     """
14 |     f = open(file_path, 'rb')
15 |     contents = f.read()
16 | 
17 |     net_param = caffe_pb2.NetParameter()
18 |     net_param.ParseFromString(contents)
19 | 
20 |     layers = find_layers(net_param)
21 |     return layers
22 | 
23 | 
24 | def find_layers(net_param):
25 |     if len(net_param.layers) > 0:
26 |         return net_param.layers
27 |     elif len(net_param.layer) > 0:
28 |         return net_param.layer
29 |     else:
30 |         raise Exception("Couldn't find layers")
31 | 
32 | 
33 | def main():
34 |     param_dict = parse_caffemodel('xxx.caffemodel')
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     main()
39 | 


--------------------------------------------------------------------------------
/dataset/names/mscoco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Joshua Z. Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/scripts/run_ssd_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | nvidia-docker run -it --rm \
 4 | -e MXNET_CUDNN_AUTOTUNE_DEFAULT=0 \
 5 | -v /home/oper/Datasets:/mxnet/example/ssd/data \
 6 | -v /home/oper/david/mxnet-ssd/model:/mxnet/example/ssd/model \
 7 | -v /home/oper/david/mxnet-ssd/config:/mxnet/example/ssd/config \
 8 | -v /home/oper/david/mxnet-ssd/output:/mxnet/example/ssd/output \
 9 | -v /home/oper/david/mxnet-ssd/dataset:/mxnet/example/ssd/dataset \
10 | -v /home/oper/david/mxnet-ssd/train:/mxnet/example/ssd/train \
11 | -v /home/oper/david/mxnet-ssd/tools:/mxnet/example/ssd/tools \
12 | -v /home/oper/david/mxnet-ssd/symbol:/mxnet/example/ssd/symbol \
13 | -v /home/oper/david/mxnet-ssd/detect:/mxnet/example/ssd/detect \
14 | -v /home/oper/david/mxnet-ssd/evaluate:/mxnet/example/ssd/evaluate \
15 | -v /home/oper/david/mxnet-ssd/scripts:/mxnet/example/ssd/scripts \
16 | -v /home/oper/david/mxnet-ssd/deploy.py:/mxnet/example/ssd/deploy.py \
17 | -v /home/oper/david/mxnet-ssd/evaluate.py:/mxnet/example/ssd/evaluate.py \
18 | -v /home/oper/david/mxnet-ssd/train.py:/mxnet/example/ssd/train.py \
19 | -v /home/oper/david/mxnet-ssd/demo.py:/mxnet/example/ssd/demo.py \
20 | mxnet/ssd:gpu_0.12.0_cuda9
21 | 


--------------------------------------------------------------------------------
/scripts/run_tensorboard.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | nvidia-docker run -it --rm -p 0.0.0.0:6006:6006 \
 4 | -e MXNET_CUDNN_AUTOTUNE_DEFAULT=0 \
 5 | -v /home/oper/Datasets:/mxnet/example/ssd/data \
 6 | -v /home/oper/david/mxnet-ssd/model:/mxnet/example/ssd/model \
 7 | -v /home/oper/david/mxnet-ssd/config:/mxnet/example/ssd/config \
 8 | -v /home/oper/david/mxnet-ssd/output:/mxnet/example/ssd/output \
 9 | -v /home/oper/david/mxnet-ssd/dataset:/mxnet/example/ssd/dataset \
10 | -v /home/oper/david/mxnet-ssd/train:/mxnet/example/ssd/train \
11 | -v /home/oper/david/mxnet-ssd/tools:/mxnet/example/ssd/tools \
12 | -v /home/oper/david/mxnet-ssd/symbol:/mxnet/example/ssd/symbol \
13 | -v /home/oper/david/mxnet-ssd/detect:/mxnet/example/ssd/detect \
14 | -v /home/oper/david/mxnet-ssd/evaluate:/mxnet/example/ssd/evaluate \
15 | -v /home/oper/david/mxnet-ssd/scripts:/mxnet/example/ssd/scripts \
16 | -v /home/oper/david/mxnet-ssd/deploy.py:/mxnet/example/ssd/deploy.py \
17 | -v /home/oper/david/mxnet-ssd/evaluate.py:/mxnet/example/ssd/evaluate.py \
18 | -v /home/oper/david/mxnet-ssd/train.py:/mxnet/example/ssd/train.py \
19 | -v /home/oper/david/mxnet-ssd/demo.py:/mxnet/example/ssd/demo.py \
20 | mxnet/ssd:gpu_0.12.0_cuda9
21 | 


--------------------------------------------------------------------------------
/tools/caffe_converter/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [[ $# -ne 1 ]]; then
 3 |     echo "usage: $0 model_name"
 4 |     echo "   model_name: [vgg16|vgg19], ..."
 5 |     exit -1
 6 | fi
 7 | 
 8 | if [[ $1 == "vgg19" ]]; then
 9 |     if [[ ! -f VGG_ILSVRC_19_layers_deploy.prototxt ]]; then
10 |         wget -c https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/bb2b4fe0a9bb0669211cf3d0bc949dfdda173e9e/VGG_ILSVRC_19_layers_deploy.prototxt
11 |     fi
12 | 
13 |     if [[ ! -f VGG_ILSVRC_19_layers.caffemodel ]]; then
14 |         wget -c http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel
15 |     fi
16 | 
17 |     echo "converting"
18 |     python `dirname $0`/convert_model.py VGG_ILSVRC_19_layers_deploy.prototxt VGG_ILSVRC_19_layers.caffemodel vgg19
19 | elif [[ $1 == "vgg16" ]]; then
20 |     if [[ ! -f VGG_ILSVRC_16_layers_deploy.prototxt ]]; then
21 |         wget -c https://gist.githubusercontent.com/ksimonyan/211839e770f7b538e2d8/raw/c3ba00e272d9f48594acef1f67e5fd12aff7a806/VGG_ILSVRC_16_layers_deploy.prototxt
22 |     fi
23 | 
24 |     if [[ ! -f VGG_ILSVRC_16_layers.caffemodel ]]; then
25 |         wget -c http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel
26 |     fi
27 | 
28 |     echo "converting"
29 |     python `dirname $0`/convert_model.py VGG_ILSVRC_16_layers_deploy.prototxt VGG_ILSVRC_16_layers.caffemodel vgg16
30 | else
31 |     echo "unsupported model: $1"
32 | fi
33 | 


--------------------------------------------------------------------------------
/docker/cudnn5.1/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Start with cuDNN base image
 2 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04
 3 | MAINTAINER DavidSolomon <solomond78@gmail.com>
 4 | 
 5 | # Install git, wget and other dependencies
 6 | RUN apt-get update && apt-get install -y \
 7 |   nano \
 8 |   git \
 9 |   libopenblas-dev \
10 |   libopencv-dev \
11 |   python-dev \
12 |   python-numpy \
13 |   python-setuptools \
14 |   python-opencv \
15 |   python-matplotlib \
16 |   python-tk \
17 |   wget \
18 |   graphviz
19 | 
20 | # Clone MXNet repo and move into it
21 | RUN cd /root && git clone --recursive https://github.com/zhreshold/mxnet-ssd.git && cd mxnet-ssd/mxnet && \
22 | # Copy config.mk
23 |   cp make/config.mk config.mk && \
24 | # Set OpenBLAS
25 |   sed -i 's/USE_BLAS = atlas/USE_BLAS = openblas/g' config.mk && \
26 | # Set CUDA flag
27 |   sed -i 's/USE_CUDA = 0/USE_CUDA = 1/g' config.mk && \
28 |   sed -i 's/USE_CUDA_PATH = NONE/USE_CUDA_PATH = \/usr\/local\/cuda/g' config.mk && \
29 | # Set cuDNN flag
30 |   sed -i 's/USE_CUDNN = 0/USE_CUDNN = 1/g' config.mk && \
31 | # Make
32 |   make -j $(nproc)
33 | 
34 | # Install Python package
35 | RUN cd /root/mxnet-ssd/mxnet/python && python setup.py install
36 | 
37 | # Add to Python path
38 | RUN echo "export PYTHONPATH=$/root/mxnet-ssd/mxnet/python:$PYTHONPATH" >> /root/.bashrc
39 | 
40 | # Install pip
41 | RUN easy_install -U pip
42 | 
43 | # Install graphviz and jupyter
44 | RUN pip install graphviz jupyter ipython matplotlib tensorboard future scipy
45 | 
46 | # Set ~/mxnet as working directory
47 | WORKDIR /root/mxnet-ssd
48 | 
49 | # TODO add tensorboard code change to the docker...
50 | # the installation was /usr/local/lib/python2.7/dist-packages/tensorboard/summary:186
51 | 
52 | 


--------------------------------------------------------------------------------
/docker/cudnn6.0/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Start with cuDNN base image
 2 | FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
 3 | MAINTAINER DavidSolomon <solomond78@gmail.com>
 4 | 
 5 | # Install git, wget and other dependencies
 6 | RUN apt-get update && apt-get install -y \
 7 |   nano \
 8 |   git \
 9 |   libopenblas-dev \
10 |   libopencv-dev \
11 |   python-dev \
12 |   python-numpy \
13 |   python-setuptools \
14 |   python-opencv \
15 |   python-matplotlib \
16 |   python-tk \
17 |   wget \
18 |   graphviz
19 | 
20 | # Clone MXNet repo and move into it
21 | RUN cd /root && git clone --recursive https://github.com/zhreshold/mxnet-ssd.git && cd mxnet-ssd/mxnet && \
22 | # Copy config.mk
23 |   cp make/config.mk config.mk && \
24 | # Set OpenBLAS
25 |   sed -i 's/USE_BLAS = atlas/USE_BLAS = openblas/g' config.mk && \
26 | # Set CUDA flag
27 |   sed -i 's/USE_CUDA = 0/USE_CUDA = 1/g' config.mk && \
28 |   sed -i 's/USE_CUDA_PATH = NONE/USE_CUDA_PATH = \/usr\/local\/cuda/g' config.mk && \
29 | # Set cuDNN flag
30 |   sed -i 's/USE_CUDNN = 0/USE_CUDNN = 1/g' config.mk && \
31 | # Make
32 |   make -j $(nproc)
33 | 
34 | # Install Python package
35 | RUN cd /root/mxnet-ssd/mxnet/python && python setup.py install
36 | 
37 | # Add to Python path
38 | RUN echo "export PYTHONPATH=$/root/mxnet-ssd/mxnet/python:$PYTHONPATH" >> /root/.bashrc
39 | 
40 | # Install pip
41 | RUN easy_install -U pip
42 | 
43 | # Install graphviz and jupyter
44 | RUN pip install graphviz jupyter ipython matplotlib tensorboard future scipy
45 | 
46 | # Set ~/mxnet as working directory
47 | WORKDIR /root/mxnet-ssd
48 | 
49 | # TODO add tensorboard code change to the docker...
50 | # the installation was /usr/local/lib/python2.7/dist-packages/tensorboard/summary:186
51 | 
52 | 


--------------------------------------------------------------------------------
/tools/caffe_converter/mean_image.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import mxnet as mx
 3 | import numpy as np
 4 | import argparse
 5 | 
 6 | caffe_flag = True
 7 | try:
 8 |     import caffe
 9 |     from caffe.proto import caffe_pb2
10 | except ImportError:
11 |     caffe_flag = False
12 |     from .caffe_parse import caffe_pb2
13 | 
14 | 
15 | def protoBlobFileToND(proto_file):
16 |     data = ''
17 |     file = open(proto_file, "r")
18 |     if not file:
19 |         raise Exception("ERROR (" + proto_file + ")!")
20 |     data = file.read()
21 |     file.close()
22 | 
23 |     if caffe_flag:
24 |         mean_blob = caffe.proto.caffe_pb2.BlobProto()
25 |     else:
26 |         mean_blob = caffe_parse.caffe_pb2.BlobProto()
27 | 
28 |     mean_blob.ParseFromString(data)
29 |     img_mean_np = np.array(mean_blob.data)
30 |     img_mean_np = img_mean_np.reshape(
31 |         mean_blob.channels, mean_blob.height, mean_blob.width
32 |     )
33 |     # swap channels from Caffe BGR to RGB
34 |     img_mean_np2 = img_mean_np
35 |     img_mean_np[0] = img_mean_np2[2]
36 |     img_mean_np[2] = img_mean_np2[0]
37 |     return mx.nd.array(img_mean_np)
38 | 
39 | 
40 | def main():
41 |     parser = argparse.ArgumentParser(description='Caffe prototxt to mxnet model parameter converter.\
42 |                     Note that only basic functions are implemented. You are welcomed to contribute to this file.')
43 |     parser.add_argument('mean_image_proto', help='The protobuf file in Caffe format')
44 |     parser.add_argument('save_name', help='The name of the output file prefix')
45 |     args = parser.parse_args()
46 |     nd = protoBlobFileToND(args.mean_image_proto)
47 |     mx.nd.save(args.save_name + ".nd", {"mean_image": nd})
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     main()
52 | 


--------------------------------------------------------------------------------
/dataset/testdb.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import os
 3 | from .imdb import Imdb
 4 | 
 5 | 
 6 | class TestDB(Imdb):
 7 |     """
 8 |     A simple wrapper class for converting list of image to Imdb during testing
 9 | 
10 |     Parameters:
11 |     ----------
12 |     images : str or list of str
13 |         image path or list of images, if directory and extension not
14 |         specified, root_dir and extension are required
15 |     root_dir : str or None
16 |         directory of input images, optional if image path already
17 |         has full directory information
18 |     extension : str or None
19 |         image extension, eg. ".jpg", optional
20 |     """
21 |     def __init__(self, images, root_dir=None, extension=None):
22 |         if not isinstance(images, list):
23 |             images = [images]
24 |         num_images = len(images)
25 |         super(TestDB, self).__init__("test" + str(num_images))
26 |         self.image_set_index = images
27 |         self.num_images = num_images
28 |         self.root_dir = root_dir if root_dir else None
29 |         self.extension = extension if extension else None
30 | 
31 | 
32 |     def image_path_from_index(self, index):
33 |         """
34 |         given image index, return full path
35 | 
36 |         Parameters:
37 |         ----------
38 |         index: int
39 |             index of a specific image
40 |         Returns
41 |         ----------
42 |         path of this image
43 |         """
44 |         name = self.image_set_index[index]
45 |         if self.extension:
46 |             name += self.extension
47 |         if self.root_dir:
48 |             name = os.path.join(self.root_dir, name)
49 |         assert os.path.exists(name), 'Path does not exist: {}'.format(name)
50 |         return name
51 | 
52 |     def label_from_index(self, index):
53 |         return RuntimeError("Testdb does not support label loading")
54 | 


--------------------------------------------------------------------------------
/deploy.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | import tools.find_mxnet
 4 | import mxnet as mx
 5 | import os
 6 | import importlib
 7 | import sys
 8 | from symbol.symbol_factory import get_symbol
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(description='Convert a trained model to deploy model')
12 |     parser.add_argument('--network', dest='network', type=str, default='vgg16_reduced',
13 |                         help='which network to use')
14 |     parser.add_argument('--epoch', dest='epoch', help='epoch of trained model',
15 |                         default=0, type=int)
16 |     parser.add_argument('--prefix', dest='prefix', help='trained model prefix',
17 |                         default=os.path.join(os.getcwd(), 'model', 'ssd_'), type=str)
18 |     parser.add_argument('--data-shape', dest='data_shape', type=int, default=300,
19 |                         help='data shape')
20 |     parser.add_argument('--num-class', dest='num_classes', help='number of classes',
21 |                         default=20, type=int)
22 |     parser.add_argument('--nms', dest='nms_thresh', type=float, default=0.5,
23 |                         help='non-maximum suppression threshold, default 0.5')
24 |     parser.add_argument('--force', dest='force_nms', type=bool, default=True,
25 |                         help='force non-maximum suppression on different class')
26 |     parser.add_argument('--topk', dest='nms_topk', type=int, default=400,
27 |                         help='apply nms only to top k detections based on scores.')
28 |     args = parser.parse_args()
29 |     return args
30 | 
31 | if __name__ == '__main__':
32 |     args = parse_args()
33 |     net = get_symbol(args.network, args.data_shape,
34 |         num_classes=args.num_classes, nms_thresh=args.nms_thresh,
35 |         force_suppress=args.force_nms, nms_topk=args.nms_topk)
36 |     if args.prefix.endswith('_'):
37 |         prefix = args.prefix + args.network + '_' + str(args.data_shape)
38 |     else:
39 |         prefix = args.prefix
40 |     _, arg_params, aux_params = mx.model.load_checkpoint(prefix, args.epoch)
41 |     # new name
42 |     tmp = prefix.rsplit('/', 1)
43 |     save_prefix = '/deploy_'.join(tmp)
44 |     mx.model.save_checkpoint(save_prefix, args.epoch, net, arg_params, aux_params)
45 |     print("Saved model: {}-{:04d}.params".format(save_prefix, args.epoch))
46 |     print("Saved symbol: {}-symbol.json".format(save_prefix))
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # User defined
  2 | data/*
  3 | !data/demo/
  4 | model/*
  5 | !model/README.md
  6 | cache/*
  7 | tools/caffe_converter/*.params
  8 | tools/caffe_converter/*.json
  9 | tools/caffe_converter/*.prototxt
 10 | tools/caffe_converter/*.caffemodel
 11 | .DS_Store
 12 | 
 13 | # Compiled Object files
 14 | *.slo
 15 | *.lo
 16 | *.o
 17 | *.obj
 18 | *.d
 19 | 
 20 | # Precompiled Headers
 21 | *.gch
 22 | *.pch
 23 | 
 24 | # Compiled Dynamic libraries
 25 | *.so
 26 | *.dylib
 27 | *.dll
 28 | 
 29 | # Fortran module files
 30 | *.mod
 31 | 
 32 | # Compiled Static libraries
 33 | *.lai
 34 | *.la
 35 | *.a
 36 | *.lib
 37 | 
 38 | # Executables
 39 | *.exe
 40 | *.out
 41 | *.app
 42 | *~
 43 | 
 44 | *.pyc
 45 | .Rhistory
 46 | *log
 47 | Debug
 48 | *suo
 49 | tracker
 50 | 
 51 | # vim
 52 | *.swp
 53 | *.swo
 54 | *.swn
 55 | .vimrc
 56 | .ycm_extra_conf.py
 57 | .ycm_extra_conf.pyc
 58 | 
 59 | # Byte-compiled / optimized / DLL files
 60 | __pycache__/
 61 | *.py[cod]
 62 | *$py.class
 63 | 
 64 | # C extensions
 65 | *.so
 66 | 
 67 | # Distribution / packaging
 68 | .Python
 69 | env/
 70 | build/
 71 | develop-eggs/
 72 | dist/
 73 | downloads/
 74 | eggs/
 75 | .eggs/
 76 | lib/
 77 | lib64/
 78 | parts/
 79 | sdist/
 80 | var/
 81 | *.egg-info/
 82 | .installed.cfg
 83 | *.egg
 84 | 
 85 | # PyInstaller
 86 | #  Usually these files are written by a python script from a template
 87 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 88 | *.manifest
 89 | *.spec
 90 | 
 91 | # Installer logs
 92 | pip-log.txt
 93 | pip-delete-this-directory.txt
 94 | 
 95 | # Unit test / coverage reports
 96 | htmlcov/
 97 | .tox/
 98 | .coverage
 99 | .coverage.*
100 | .cache
101 | nosetests.xml
102 | coverage.xml
103 | *,cover
104 | .hypothesis/
105 | 
106 | # Translations
107 | *.mo
108 | *.pot
109 | 
110 | # Django stuff:
111 | *.log
112 | local_settings.py
113 | 
114 | # Flask stuff:
115 | instance/
116 | .webassets-cache
117 | 
118 | # Scrapy stuff:
119 | .scrapy
120 | 
121 | # Sphinx documentation
122 | docs/_build/
123 | 
124 | # PyBuilder
125 | target/
126 | 
127 | # IPython Notebook
128 | .ipynb_checkpoints
129 | 
130 | # pyenv
131 | .python-version
132 | 
133 | # celery beat schedule file
134 | celerybeat-schedule
135 | 
136 | # dotenv
137 | .env
138 | 
139 | # virtualenv
140 | venv/
141 | ENV/
142 | 
143 | # ide project settings
144 | .spyderproject
145 | .idea
146 | 
147 | # Rope project settings
148 | .ropeproject
149 | 


--------------------------------------------------------------------------------
/train/metric.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | import numpy as np
 3 | 
 4 | 
 5 | class MultiBoxMetric(mx.metric.EvalMetric):
 6 |     """Calculate metrics for Multibox training """
 7 |     def __init__(self, eps=1e-8):
 8 |         super(MultiBoxMetric, self).__init__('MultiBox')
 9 |         self.eps = eps
10 |         self.num = 2
11 |         self.name = ['CrossEntropy', 'SmoothL1']
12 |         self.reset()
13 | 
14 |     def reset(self):
15 |         """
16 |         override reset behavior
17 |         """
18 |         if getattr(self, 'num', None) is None:
19 |             self.num_inst = 0
20 |             self.sum_metric = 0.0
21 |         else:
22 |             self.num_inst = [0] * self.num
23 |             self.sum_metric = [0.0] * self.num
24 | 
25 |     def update(self, labels, preds):
26 |         """
27 |         Implementation of updating metrics
28 |         """
29 |         # get generated multi label from network
30 |         cls_prob = preds[0].asnumpy()
31 |         loc_loss = preds[1].asnumpy()
32 |         cls_label = preds[2].asnumpy()
33 |         valid_count = np.sum(cls_label >= 0)
34 |         # overall accuracy & object accuracy
35 |         label = cls_label.flatten()
36 |         # in case you have a 'other' class
37 |         label[np.where(label >= cls_prob.shape[1])] = 0
38 |         mask = np.where(label >= 0)[0]
39 |         indices = np.int64(label[mask])
40 |         prob = cls_prob.transpose((0, 2, 1)).reshape((-1, cls_prob.shape[1]))
41 |         prob = prob[mask, indices]
42 |         self.sum_metric[0] += (-np.log(prob + self.eps)).sum()
43 |         self.num_inst[0] += valid_count
44 |         # smoothl1loss
45 |         self.sum_metric[1] += np.sum(loc_loss)
46 |         self.num_inst[1] += valid_count
47 | 
48 |     def get(self):
49 |         """Get the current evaluation result.
50 |         Override the default behavior
51 | 
52 |         Returns
53 |         -------
54 |         name : str
55 |            Name of the metric.
56 |         value : float
57 |            Value of the evaluation.
58 |         """
59 |         if self.num is None:
60 |             if self.num_inst == 0:
61 |                 return (self.name, float('nan'))
62 |             else:
63 |                 return (self.name, self.sum_metric / self.num_inst)
64 |         else:
65 |             names = ['%s'%(self.name[i]) for i in range(self.num)]
66 |             values = [x / y if y != 0 else float('nan') \
67 |                 for x, y in zip(self.sum_metric, self.num_inst)]
68 |             return (names, values)
69 | 


--------------------------------------------------------------------------------
/tools/caffe_converter/README.md:
--------------------------------------------------------------------------------
 1 | # Convert Caffe Model to Mxnet Format
 2 | 
 3 | ### Build (Linux)
 4 | 
 5 | Either [Caffe's python package](http://caffe.berkeleyvision.org/installation.html) or [Google protobuf](https://developers.google.com/protocol-buffers/?hl=en) is required. The latter is often much easier to install:  
 6 | 
 7 | 1. We first install the protobuf compiler. If you compiled mxnet with `USE_DIST_KVSTORE = 1` then it is already built. Otherwise, install `protobuf-compiler` by your favor package manager, e.g. `sudo apt-get install protobuf-compiler` for ubuntu and `sudo yum install protobuf-compiler` for redhat/fedora.
 8 | 
 9 | 2. Then install the protobuf's python binding. For example `sudo pip install protobuf`
10 | 
11 | Now we can build the tool by running `make` in the current directory.
12 | 
13 | ### Build (Windows)
14 | 
15 | Note: this tool currently only works on python 2.
16 | 
17 | We must make sure that the installed python binding and protobuf compiler are using the same version of protobuf,
18 | so we install the bindings first, and then install the corresponding compiler.
19 | 
20 | 1. Install the protobuf bindings. At time of writing, the conda package manager has the most up to date version. Either run `conda install -c conda-forge protobuf` or `pip install protobuf`
21 | 2. Download the win32 build of protoc from [Protocol Buffers Releases](https://github.com/google/protobuf/releases). Make sure to download the version that corresponds to the version of the bindings. Extract to any location then add that location to your `PATH`
22 | 3. Run `make_win32.bat` to build the package
23 | 
24 | 
25 | ### How to use
26 | To convert ssd caffemodels, Use: `python convert_model.py prototxt caffemodel outputprefix`
27 | 
28 | Linux: Use `./run.sh model_name` to download and convert a model. E.g. `./run.sh vgg19`
29 | 
30 | Windows: Use `python convert_model.py prototxt caffemodel outputprefix`  
31 | For example: `python convert_model.py VGG_ILSVRC_16_layers_deploy.prototxt VGG_ILSVRC_16_layers.caffemodel vgg16`
32 | 
33 | 
34 | ### Note
35 | 
36 | * We have verified the results of VGG_16/VGG_19 model and BVLC_googlenet results from Caffe model zoo.
37 | * The tool only supports single input and single output network.
38 | * The tool can only work with the L2LayerParameter in Caffe.
39 | * Caffe uses a convention for multi-strided pooling output shape inconsistent with MXNet
40 |     * This importer doesn't handle this problem properly yet
41 |     * And example of this failure is importing bvlc_Googlenet. The user needs to add padding to stride-2 pooling to make this work right now.
42 | 


--------------------------------------------------------------------------------
/config/config.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import os
 3 | from .utils import DotDict, namedtuple_with_defaults, zip_namedtuple, config_as_dict
 4 | 
 5 | RandCropper = namedtuple_with_defaults('RandCropper',
 6 |     'min_crop_scales, max_crop_scales, \
 7 |     min_crop_aspect_ratios, max_crop_aspect_ratios, \
 8 |     min_crop_overlaps, max_crop_overlaps, \
 9 |     min_crop_sample_coverages, max_crop_sample_coverages, \
10 |     min_crop_object_coverages, max_crop_object_coverages, \
11 |     max_crop_trials',
12 |     [0.0, 1.0,
13 |     0.5, 2.0,
14 |     0.0, 1.0,
15 |     0.0, 1.0,
16 |     0.0, 1.0,
17 |     25])
18 | 
19 | RandPadder = namedtuple_with_defaults('RandPadder',
20 |     'rand_pad_prob, max_pad_scale, fill_value',
21 |     [0.0, 1.0, 127])
22 | 
23 | ColorJitter = namedtuple_with_defaults('ColorJitter',
24 |     'random_hue_prob, max_random_hue, \
25 |     random_saturation_prob, max_random_saturation, \
26 |     random_illumination_prob, max_random_illumination, \
27 |     random_contrast_prob, max_random_contrast',
28 |     [0.0, 18,
29 |     0.0, 32,
30 |     0.0, 32,
31 |     0.0, 0.5])
32 | 
33 | 
34 | cfg = DotDict()
35 | cfg.ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
36 | 
37 | # training configs
38 | cfg.train = DotDict()
39 | # random cropping samplers
40 | cfg.train.rand_crop_samplers = [
41 |     RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.1),
42 |     RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.3),
43 |     RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.5),
44 |     RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.7),
45 |     RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.9),]
46 | cfg.train.crop_emit_mode = 'center'
47 | # cfg.train.emit_overlap_thresh = 0.4
48 | # random padding
49 | cfg.train.rand_pad = RandPadder(rand_pad_prob=0.5, max_pad_scale=4.0)
50 | # random color jitter
51 | cfg.train.color_jitter = ColorJitter(random_hue_prob=0.5, random_saturation_prob=0.5,
52 |     random_illumination_prob=0.5, random_contrast_prob=0.5)
53 | cfg.train.inter_method = 10  # random interpolation
54 | cfg.train.rand_mirror_prob = 0.5
55 | cfg.train.shuffle = True
56 | cfg.train.seed = 233
57 | cfg.train.preprocess_threads = 48
58 | cfg.train = config_as_dict(cfg.train)  # convert to normal dict
59 | 
60 | # validation
61 | cfg.valid = DotDict()
62 | cfg.valid.rand_crop_samplers = []
63 | cfg.valid.rand_pad = RandPadder()
64 | cfg.valid.color_jitter = ColorJitter()
65 | cfg.valid.rand_mirror_prob = 0
66 | cfg.valid.shuffle = False
67 | cfg.valid.seed = 0
68 | cfg.valid.preprocess_threads = 32
69 | cfg.valid = config_as_dict(cfg.valid)  # convert to normal dict
70 | 


--------------------------------------------------------------------------------
/symbol/README.md:
--------------------------------------------------------------------------------
 1 | ## How to compose SSD network on top of mainstream classification networks
 2 | 
 3 | 1. Have the base network ready in this directory as `name.py`, such as `inceptionv3.py`.
 4 | 2. Add configuration to `symbol_factory.py`, an example would be:
 5 | ```
 6 | if network == 'vgg16_reduced':
 7 |     if data_shape >= 448:
 8 |         from_layers = ['relu4_3', 'relu7', '', '', '', '', '']
 9 |         num_filters = [512, -1, 512, 256, 256, 256, 256]
10 |         strides = [-1, -1, 2, 2, 2, 2, 1]
11 |         pads = [-1, -1, 1, 1, 1, 1, 1]
12 |         sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \
13 |             [.75, .8216], [.9, .9721]]
14 |         ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
15 |             [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
16 |         normalizations = [20, -1, -1, -1, -1, -1, -1]
17 |         steps = [] if data_shape != 512 else [x / 512.0 for x in
18 |             [8, 16, 32, 64, 128, 256, 512]]
19 |     else:
20 |         from_layers = ['relu4_3', 'relu7', '', '', '', '']
21 |         num_filters = [512, -1, 512, 256, 256, 256]
22 |         strides = [-1, -1, 2, 2, 1, 1]
23 |         pads = [-1, -1, 1, 1, 0, 0]
24 |         sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
25 |         ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
26 |             [1,2,.5], [1,2,.5]]
27 |         normalizations = [20, -1, -1, -1, -1, -1]
28 |         steps = [] if data_shape != 300 else [x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
29 |     return locals()
30 | elif network == 'inceptionv3':
31 |     from_layers = ['ch_concat_mixed_7_chconcat', 'ch_concat_mixed_10_chconcat', '', '', '', '']
32 |     num_filters = [-1, -1, 512, 256, 256, 128]
33 |     strides = [-1, -1, 2, 2, 2, 2]
34 |     pads = [-1, -1, 1, 1, 1, 1]
35 |     sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
36 |     ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
37 |         [1,2,.5], [1,2,.5]]
38 |     normalizations = -1
39 |     steps = []
40 |     return locals()
41 | ```
42 | Here `from_layers` indicate the feature layer you would like to extract from the base network.
43 | `''` indicate that we want add extra new layers on top of the last feature layer,
44 | and the number of filters must be specified in `num_filters`. Similarly, `strides` and `pads`
45 | are required to compose these new layers. `sizes` and `ratios` are the parameters controlling
46 | the anchor generation algorithm. `normalizations` is used to normalize and rescale feature if
47 | not `-1`. `steps`: optional, used to calculate the anchor sliding steps.
48 | 
49 | 3. Train or test with arguments `--network name --data-shape xxx --pretrained pretrained_model`
50 | 


--------------------------------------------------------------------------------
/tools/visualize_net.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import find_mxnet
 3 | import mxnet as mx
 4 | import argparse
 5 | import sys, os
 6 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'symbol'))
 7 | import symbol_factory
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='network visualization')
11 |     parser.add_argument('--network', dest='network', type=str, default='vgg16_reduced',
12 |                         help='the cnn to use')
13 |     parser.add_argument('--num-classes', dest='num_classes', type=int, default=20,
14 |                         help='the number of classes')
15 |     parser.add_argument('--data-shape', dest='data_shape', type=int, default=300,
16 |                         help='set image\'s shape')
17 |     parser.add_argument('--train', dest='train', type=bool, default=False, help='show train net')
18 |     parser.add_argument('--output-dir', dest='output_dir', type=str, default=os.path.dirname(__file__),
19 |                         help='path of the output visualized net')
20 |     parser.add_argument('--print-net', dest='print_net', type=bool, default=False,
21 |                         help='print the network as json')
22 |     args = parser.parse_args()
23 |     return args
24 | 
25 | def net_visualization(network=None,
26 |                       num_classes=None,
27 |                       data_shape=None,
28 |                       train=None,
29 |                       output_dir=None,
30 |                       print_net=False,
31 |                       net=None):
32 |     # if you specify your net, this means that you are calling this function from somewhere else..
33 |     if net is None:
34 |         if not train:
35 |             net = symbol_factory.get_symbol(network, data_shape, num_classes=num_classes)
36 |         else:
37 |             net = symbol_factory.get_symbol_train(network, data_shape, num_classes=num_classes)
38 | 
39 |     if not train:
40 |         a = mx.viz.plot_network(net, shape={"data": (1, 3, data_shape, data_shape)}, \
41 |                                 node_attrs={"shape": 'rect', "fixedsize": 'false'})
42 |         filename = "ssd_" + network + '_' + str(data_shape)+'_'+'test'
43 |     else:
44 |         a = mx.viz.plot_network(net, shape=None, \
45 |                                 node_attrs={"shape": 'rect', "fixedsize": 'false'})
46 |         filename = "ssd_" + network + '_' + 'train'
47 | 
48 |     a.render(os.path.join(output_dir, filename))
49 |     if print_net:
50 |         print(net.tojson())
51 | 
52 | if __name__ == '__main__':
53 |     args = parse_args()
54 |     net_visualization(network=args.network,
55 |                       num_classes=args.num_classes,
56 |                       data_shape=args.data_shape,
57 |                       train=args.train,
58 |                       output_dir=args.output_dir,
59 |                       print_net=args.print_net)
60 | 


--------------------------------------------------------------------------------
/tools/image_processing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | def rescale(im, target_size, max_size):
 5 |     """
 6 |     only resize input image to target size and return scale
 7 | 
 8 |     Parameters:
 9 |     ----------
10 |     im : numpy.array
11 |         BGR image input by opencv
12 |     target_size: int
13 |         one dimensional size (the short side)
14 |     max_size: int
15 |         one dimensional max size (the long side)
16 | 
17 |     Returns:
18 |     ----------
19 |     numpy.array, rescaled image
20 |     """
21 |     im_shape = im.shape
22 |     im_size_min = np.min(im_shape[0:2])
23 |     im_size_max = np.min(im_shape[0:2])
24 |     im_scale = float(target_size) / float(im_size_min)
25 |     # prevent bigger axis from being more than max_size:
26 |     if np.round(im_scale * im_size_max) > max_size:
27 |         im_scale = float(max_size) / float(im_size_max)
28 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
29 |     return im, im_scale
30 | 
31 | def resize(im, target_size, interp_method=cv2.INTER_LINEAR):
32 |     """
33 |     resize image to target size regardless of aspect ratio
34 | 
35 |     Parameters:
36 |     ----------
37 |     im : numpy.array
38 |         BGR image input by opencv
39 |     target_size : tuple (int, int)
40 |         (h, w) two dimensional size
41 |     Returns:
42 |     ----------
43 |     numpy.array, resized image
44 |     """
45 |     return cv2.resize(im, target_size, interpolation=interp_method)
46 | 
47 | def transform(im, pixel_means):
48 |     """
49 |     transform into mxnet tensor
50 |     substract pixel size and transform to correct format
51 | 
52 |     Parameters:
53 |     ----------
54 |     im : numpy.array
55 |         [height, width, channel] in BGR
56 |     pixel_means : list
57 |         [[[R, G, B pixel means]]]
58 | 
59 |     Returns:
60 |     ----------
61 |     numpy.array as in shape [channel, height, width]
62 |     """
63 |     im = im.copy()
64 |     im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)]
65 |     im = im.astype(float)
66 |     im -= pixel_means
67 |     # put channel first
68 |     channel_swap = (2, 0, 1)
69 |     im_tensor = im.transpose(channel_swap)
70 |     return im_tensor
71 | 
72 | 
73 | def transform_inverse(im_tensor, pixel_means):
74 |     """
75 |     transform from mxnet im_tensor to ordinary RGB image
76 |     im_tensor is limited to one image
77 | 
78 |     Parameters:
79 |     ----------
80 |     im_tensor : numpy.array
81 |         in shape [batch, channel, height, width]
82 |     pixel_means: list
83 |         [[[R, G, B pixel means]]]
84 | 
85 |     Returns:
86 |     ----------
87 |     im [height, width, channel(RGB)]
88 |     """
89 |     assert im_tensor.shape[0] == 1
90 |     im_tensor = im_tensor.copy()
91 |     # put channel back
92 |     channel_swap = (0, 2, 3, 1)
93 |     im_tensor = im_tensor.transpose(channel_swap)
94 |     im = im_tensor[0]
95 |     assert im.shape[2] == 3
96 |     im += pixel_means
97 |     im = im.astype(np.uint8)
98 |     return im
99 | 


--------------------------------------------------------------------------------
/config/utils.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | class DotDict(dict):
 4 |     """
 5 |     Simple class for dot access elements in dict, support nested initialization
 6 |     Example:
 7 |     d = DotDict({'child': 'dotdict'}, name='dotdict', index=1, contents=['a', 'b'])
 8 |     # add new key
 9 |     d.new_key = '!' # or d['new_key'] = '!'
10 |     # update values
11 |     d.new_key = '!!!'
12 |     # delete keys
13 |     del d.new_key
14 |     """
15 |     def __init__(self, *args, **kwargs):
16 |         super(DotDict, self).__init__(*args, **kwargs)
17 |         for arg in args:
18 |             if isinstance(arg, dict):
19 |                 for k, v in arg.items():
20 |                     self[k] = v
21 | 
22 |         if kwargs:
23 |             for k, v in kwargs.items():
24 |                 self[k] = v
25 | 
26 |     def __getattr__(self, attr):
27 |         return self.get(attr)
28 | 
29 |     def __setattr__(self, key, value):
30 |         self.__setitem__(key, value)
31 | 
32 |     def __setitem__(self, key, value):
33 |         super(DotDict, self).__setitem__(key, value)
34 |         self.__dict__.update({key: value})
35 | 
36 |     def __delattr__(self, item):
37 |         self.__delitem__(item)
38 | 
39 |     def __delitem__(self, key):
40 |         super(DotDict, self).__delitem__(key)
41 |         del self.__dict__[key]
42 | 
43 | 
44 | def namedtuple_with_defaults(typename, field_names, default_values=()):
45 |     """ create a namedtuple with default values """
46 |     T = collections.namedtuple(typename, field_names)
47 |     T.__new__.__defaults__ = (None, ) * len(T._fields)
48 |     if isinstance(default_values, collections.Mapping):
49 |         prototype = T(**default_values)
50 |     else:
51 |         prototype = T(*default_values)
52 |     T.__new__.__defaults__ = tuple(prototype)
53 |     return T
54 | 
55 | def merge_dict(a, b):
56 |     """ merge dict a, b, with b overriding keys in a """
57 |     c = a.copy()
58 |     c.update(b)
59 |     return c
60 | 
61 | def zip_namedtuple(nt_list):
62 |     """ accept list of namedtuple, return a dict of zipped fields """
63 |     if not nt_list:
64 |         return dict()
65 |     if not isinstance(nt_list, list):
66 |         nt_list = [nt_list]
67 |     for nt in nt_list:
68 |         assert type(nt) == type(nt_list[0])
69 |     ret = {k : [v] for k, v in nt_list[0]._asdict().items()}
70 |     for nt in nt_list[1:]:
71 |         for k, v in nt._asdict().items():
72 |             ret[k].append(v)
73 |     return ret
74 | 
75 | def config_as_dict(cfg):
76 |     """ convert raw configuration to unified dictionary """
77 |     ret = cfg.__dict__.copy()
78 |     # random cropping params
79 |     del ret['rand_crop_samplers']
80 |     assert isinstance(cfg.rand_crop_samplers, list)
81 |     ret = merge_dict(ret, zip_namedtuple(cfg.rand_crop_samplers))
82 |     num_crop_sampler = len(cfg.rand_crop_samplers)
83 |     ret['num_crop_sampler'] = num_crop_sampler  # must specify the #
84 |     ret['rand_crop_prob'] = 1.0 / (num_crop_sampler + 1) * num_crop_sampler
85 |     # random padding params
86 |     del ret['rand_pad']
87 |     ret = merge_dict(ret, cfg.rand_pad._asdict())
88 |     # color jitter
89 |     del ret['color_jitter']
90 |     ret = merge_dict(ret, cfg.color_jitter._asdict())
91 |     return ret
92 | 


--------------------------------------------------------------------------------
/dataset/imdb.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os.path as osp
  3 | 
  4 | class Imdb(object):
  5 |     """
  6 |     Base class for dataset loading
  7 | 
  8 |     Parameters:
  9 |     ----------
 10 |     name : str
 11 |         name of dataset
 12 |     """
 13 |     def __init__(self, name):
 14 |         self.name = name
 15 |         self.classes = []
 16 |         self.num_classes = 0
 17 |         self.image_set_index = None
 18 |         self.num_images = 0
 19 |         self.labels = None
 20 |         self.padding = 0
 21 | 
 22 |     def image_path_from_index(self, index):
 23 |         """
 24 |         load image full path given specified index
 25 | 
 26 |         Parameters:
 27 |         ----------
 28 |         index : int
 29 |             index of image requested in dataset
 30 | 
 31 |         Returns:
 32 |         ----------
 33 |         full path of specified image
 34 |         """
 35 |         raise NotImplementedError
 36 | 
 37 |     def label_from_index(self, index):
 38 |         """
 39 |         load ground-truth of image given specified index
 40 | 
 41 |         Parameters:
 42 |         ----------
 43 |         index : int
 44 |             index of image requested in dataset
 45 | 
 46 |         Returns:
 47 |         ----------
 48 |         object ground-truths, in format
 49 |         numpy.array([id, xmin, ymin, xmax, ymax]...)
 50 |         """
 51 |         raise NotImplementedError
 52 | 
 53 |     def save_imglist(self, fname=None, root=None, shuffle=False):
 54 |         """
 55 |         save imglist to disk
 56 | 
 57 |         Parameters:
 58 |         ----------
 59 |         fname : str
 60 |             saved filename
 61 |         """
 62 |         def progress_bar(count, total, suffix=''):
 63 |             import sys
 64 |             bar_len = 24
 65 |             filled_len = int(round(bar_len * count / float(total)))
 66 | 
 67 |             percents = round(100.0 * count / float(total), 1)
 68 |             bar = '=' * filled_len + '-' * (bar_len - filled_len)
 69 |             sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', suffix))
 70 |             sys.stdout.flush()
 71 | 
 72 |         str_list = []
 73 |         for index in range(self.num_images):
 74 |             progress_bar(index, self.num_images)
 75 |             label = self.label_from_index(index)
 76 |             if label.size < 1:
 77 |                 continue
 78 |             path = self.image_path_from_index(index)
 79 |             if root:
 80 |                 path = osp.relpath(path, root)
 81 |             str_list.append('\t'.join([str(index), str(2), str(label.shape[1])] \
 82 |               + ["{0:.4f}".format(x) for x in label.ravel()] + [path,]) + '\n')
 83 |         if str_list:
 84 |             if shuffle:
 85 |                 import random
 86 |                 random.shuffle(str_list)
 87 |             if not fname:
 88 |                 fname = self.name + '.lst'
 89 |             with open(fname, 'w') as f:
 90 |                 for line in str_list:
 91 |                     f.write(line)
 92 |         else:
 93 |             raise RuntimeError("No image in imdb")
 94 | 
 95 |     def _load_class_names(self, filename, dirname):
 96 |         """
 97 |         load class names from text file
 98 | 
 99 |         Parameters:
100 |         ----------
101 |         filename: str
102 |             file stores class names
103 |         dirname: str
104 |             file directory
105 |         """
106 |         full_path = osp.join(dirname, filename)
107 |         classes = []
108 |         with open(full_path, 'r') as f:
109 |             classes = [l.strip() for l in f.readlines()]
110 |         return classes
111 | 


--------------------------------------------------------------------------------
/dataset/concat_db.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from .imdb import Imdb
  3 | import random
  4 | 
  5 | class ConcatDB(Imdb):
  6 |     """
  7 |     ConcatDB is used to concatenate multiple imdbs to form a larger db.
  8 |     It is very useful to combine multiple dataset with same classes.
  9 |     Parameters
 10 |     ----------
 11 |     imdbs : Imdb or list of Imdb
 12 |         Imdbs to be concatenated
 13 |     shuffle : bool
 14 |         whether to shuffle the initial list
 15 |     """
 16 |     def __init__(self, imdbs, shuffle):
 17 |         super(ConcatDB, self).__init__('concatdb')
 18 |         if not isinstance(imdbs, list):
 19 |             imdbs = [imdbs]
 20 |         self.imdbs = imdbs
 21 |         self._check_classes()
 22 |         self.image_set_index = self._load_image_set_index(shuffle)
 23 | 
 24 |     def _check_classes(self):
 25 |         """
 26 |         check input imdbs, make sure they have same classes
 27 |         """
 28 |         try:
 29 |             self.classes = self.imdbs[0].classes
 30 |             self.num_classes = len(self.classes)
 31 |         except AttributeError:
 32 |             # fine, if no classes is provided
 33 |             pass
 34 | 
 35 |         if self.num_classes > 0:
 36 |             for db in self.imdbs:
 37 |                 assert self.classes == db.classes, "Multiple imdb must have same classes"
 38 | 
 39 |     def _load_image_set_index(self, shuffle):
 40 |         """
 41 |         get total number of images, init indices
 42 | 
 43 |         Parameters
 44 |         ----------
 45 |         shuffle : bool
 46 |             whether to shuffle the initial indices
 47 |         """
 48 |         self.num_images = 0
 49 |         for db in self.imdbs:
 50 |             self.num_images += db.num_images
 51 |         indices = list(range(self.num_images))
 52 |         if shuffle:
 53 |             random.shuffle(indices)
 54 |         return indices
 55 | 
 56 |     def _locate_index(self, index):
 57 |         """
 58 |         given index, find out sub-db and sub-index
 59 | 
 60 |         Parameters
 61 |         ----------
 62 |         index : int
 63 |             index of a specific image
 64 | 
 65 |         Returns
 66 |         ----------
 67 |         a tuple (sub-db, sub-index)
 68 |         """
 69 |         assert index >= 0 and index < self.num_images, "index out of range"
 70 |         pos = self.image_set_index[index]
 71 |         for k, v in enumerate(self.imdbs):
 72 |             if pos >= v.num_images:
 73 |                 pos -= v.num_images
 74 |             else:
 75 |                 return (k, pos)
 76 | 
 77 |     def image_path_from_index(self, index):
 78 |         """
 79 |         given image index, find out full path
 80 | 
 81 |         Parameters
 82 |         ----------
 83 |         index: int
 84 |             index of a specific image
 85 | 
 86 |         Returns
 87 |         ----------
 88 |         full path of this image
 89 |         """
 90 |         assert self.image_set_index is not None, "Dataset not initialized"
 91 |         pos = self.image_set_index[index]
 92 |         n_db, n_index = self._locate_index(index)
 93 |         return self.imdbs[n_db].image_path_from_index(n_index)
 94 | 
 95 |     def label_from_index(self, index):
 96 |         """
 97 |         given image index, return preprocessed ground-truth
 98 | 
 99 |         Parameters
100 |         ----------
101 |         index: int
102 |             index of a specific image
103 | 
104 |         Returns
105 |         ----------
106 |         ground-truths of this image
107 |         """
108 |         assert self.image_set_index is not None, "Dataset not initialized"
109 |         pos = self.image_set_index[index]
110 |         n_db, n_index = self._locate_index(index)
111 |         return self.imdbs[n_db].label_from_index(n_index)
112 | 


--------------------------------------------------------------------------------
/dataset/mscoco.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import os
  3 | import numpy as np
  4 | from .imdb import Imdb
  5 | from .pycocotools.coco import COCO
  6 | 
  7 | 
  8 | class Coco(Imdb):
  9 |     """
 10 |     Implementation of Imdb for MSCOCO dataset: https://http://mscoco.org
 11 | 
 12 |     Parameters:
 13 |     ----------
 14 |     anno_file : str
 15 |         annotation file for coco, a json file
 16 |     image_dir : str
 17 |         image directory for coco images
 18 |     shuffle : bool
 19 |         whether initially shuffle image list
 20 | 
 21 |     """
 22 |     def __init__(self, anno_file, image_dir, shuffle=True, names='mscoco.names'):
 23 |         assert os.path.isfile(anno_file), "Invalid annotation file: " + anno_file
 24 |         basename = os.path.splitext(os.path.basename(anno_file))[0]
 25 |         super(Coco, self).__init__('coco_' + basename)
 26 |         self.image_dir = image_dir
 27 | 
 28 |         self.classes = self._load_class_names(names,
 29 |             os.path.join(os.path.dirname(__file__), 'names'))
 30 | 
 31 |         self.num_classes = len(self.classes)
 32 |         self._load_all(anno_file, shuffle)
 33 |         self.num_images = len(self.image_set_index)
 34 | 
 35 | 
 36 |     def image_path_from_index(self, index):
 37 |         """
 38 |         given image index, find out full path
 39 | 
 40 |         Parameters:
 41 |         ----------
 42 |         index: int
 43 |             index of a specific image
 44 |         Returns:
 45 |         ----------
 46 |         full path of this image
 47 |         """
 48 |         assert self.image_set_index is not None, "Dataset not initialized"
 49 |         name = self.image_set_index[index]
 50 |         image_file = os.path.join(self.image_dir, 'images', name)
 51 |         assert os.path.isfile(image_file), 'Path does not exist: {}'.format(image_file)
 52 |         return image_file
 53 | 
 54 |     def label_from_index(self, index):
 55 |         """
 56 |         given image index, return preprocessed ground-truth
 57 | 
 58 |         Parameters:
 59 |         ----------
 60 |         index: int
 61 |             index of a specific image
 62 |         Returns:
 63 |         ----------
 64 |         ground-truths of this image
 65 |         """
 66 |         assert self.labels is not None, "Labels not processed"
 67 |         return self.labels[index]
 68 | 
 69 |     def _load_all(self, anno_file, shuffle):
 70 |         """
 71 |         initialize all entries given annotation json file
 72 | 
 73 |         Parameters:
 74 |         ----------
 75 |         anno_file: str
 76 |             annotation json file
 77 |         shuffle: bool
 78 |             whether to shuffle image list
 79 |         """
 80 |         image_set_index = []
 81 |         labels = []
 82 |         coco = COCO(anno_file)
 83 |         img_ids = coco.getImgIds()
 84 |         for img_id in img_ids:
 85 |             # filename
 86 |             image_info = coco.loadImgs(img_id)[0]
 87 |             filename = image_info["file_name"]
 88 |             subdir = filename.split('_')[1]
 89 |             height = image_info["height"]
 90 |             width = image_info["width"]
 91 |             # label
 92 |             anno_ids = coco.getAnnIds(imgIds=img_id)
 93 |             annos = coco.loadAnns(anno_ids)
 94 |             label = []
 95 |             for anno in annos:
 96 |                 cat_id = int(anno["category_id"])
 97 |                 bbox = anno["bbox"]
 98 |                 assert len(bbox) == 4
 99 |                 xmin = float(bbox[0]) / width
100 |                 ymin = float(bbox[1]) / height
101 |                 xmax = xmin + float(bbox[2]) / width
102 |                 ymax = ymin + float(bbox[3]) / height
103 |                 label.append([cat_id, xmin, ymin, xmax, ymax, 0])
104 |             if label:
105 |                 labels.append(np.array(label))
106 |                 image_set_index.append(os.path.join(subdir, filename))
107 | 
108 |         if shuffle:
109 |             import random
110 |             indices = list(range(len(image_set_index)))
111 |             random.shuffle(indices)
112 |             image_set_index = [image_set_index[i] for i in indices]
113 |             labels = [labels[i] for i in indices]
114 |         # store the results
115 |         self.image_set_index = image_set_index
116 |         self.labels = labels
117 | 


--------------------------------------------------------------------------------
/evaluate/evaluate_net.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import sys
  4 | import importlib
  5 | import mxnet as mx
  6 | from dataset.iterator import DetRecordIter
  7 | from config.config import cfg
  8 | from evaluate.eval_metric import MApMetric, VOC07MApMetric
  9 | import logging
 10 | from symbol.symbol_factory import get_symbol
 11 | 
 12 | def evaluate_net(net, path_imgrec, num_classes, mean_pixels, data_shape,
 13 |                  model_prefix, epoch, ctx=mx.cpu(), batch_size=1,
 14 |                  path_imglist="", nms_thresh=0.45, force_nms=False,
 15 |                  ovp_thresh=0.5, use_difficult=False, class_names=None,
 16 |                  voc07_metric=False, frequent=20):
 17 |     """
 18 |     evalute network given validation record file
 19 | 
 20 |     Parameters:
 21 |     ----------
 22 |     net : str or None
 23 |         Network name or use None to load from json without modifying
 24 |     path_imgrec : str
 25 |         path to the record validation file
 26 |     path_imglist : str
 27 |         path to the list file to replace labels in record file, optional
 28 |     num_classes : int
 29 |         number of classes, not including background
 30 |     mean_pixels : tuple
 31 |         (mean_r, mean_g, mean_b)
 32 |     data_shape : tuple or int
 33 |         (3, height, width) or height/width
 34 |     model_prefix : str
 35 |         model prefix of saved checkpoint
 36 |     epoch : int
 37 |         load model epoch
 38 |     ctx : mx.ctx
 39 |         mx.gpu() or mx.cpu()
 40 |     batch_size : int
 41 |         validation batch size
 42 |     nms_thresh : float
 43 |         non-maximum suppression threshold
 44 |     force_nms : boolean
 45 |         whether suppress different class objects
 46 |     ovp_thresh : float
 47 |         AP overlap threshold for true/false postives
 48 |     use_difficult : boolean
 49 |         whether to use difficult objects in evaluation if applicable
 50 |     class_names : comma separated str
 51 |         class names in string, must correspond to num_classes if set
 52 |     voc07_metric : boolean
 53 |         whether to use 11-point evluation as in VOC07 competition
 54 |     frequent : int
 55 |         frequency to print out validation status
 56 |     """
 57 |     # set up logger
 58 |     logging.basicConfig()
 59 |     logger = logging.getLogger()
 60 |     logger.setLevel(logging.INFO)
 61 | 
 62 |     # args
 63 |     if isinstance(data_shape, int):
 64 |         data_shape = (3, data_shape, data_shape)
 65 |     assert len(data_shape) == 3 and data_shape[0] == 3
 66 |     #model_prefix += '_' + str(data_shape[1])
 67 | 
 68 |     # iterator
 69 |     eval_iter = DetRecordIter(path_imgrec, batch_size, data_shape,
 70 |                               path_imglist=path_imglist, **cfg.valid)
 71 |     # model params
 72 |     load_net, args, auxs = mx.model.load_checkpoint(model_prefix, epoch)
 73 |     # network
 74 |     if net is None:
 75 |         net = load_net
 76 |     else:
 77 |         net = get_symbol(net, data_shape[1], num_classes=num_classes,
 78 |             nms_thresh=nms_thresh, force_suppress=force_nms)
 79 |     if not 'label' in net.list_arguments():
 80 |         label = mx.sym.Variable(name='label')
 81 |         net = mx.sym.Group([net, label])
 82 | 
 83 |     # init module
 84 |     mod = mx.mod.Module(net, label_names=('label',), logger=logger, context=ctx,
 85 |         fixed_param_names=net.list_arguments())
 86 |     mod.bind(data_shapes=eval_iter.provide_data, label_shapes=eval_iter.provide_label)
 87 |     mod.set_params(args, auxs, allow_missing=False, force_init=True)
 88 | 
 89 |     # run evaluation
 90 |     if voc07_metric:
 91 |         metric = VOC07MApMetric(ovp_thresh, use_difficult, class_names,
 92 |                                 roc_output_path=os.path.join(os.path.dirname(model_prefix), 'roc'))
 93 |     else:
 94 |         metric = MApMetric(ovp_thresh, use_difficult, class_names,
 95 |                             roc_output_path=os.path.join(os.path.dirname(model_prefix), 'roc'))
 96 |     results = mod.score(eval_iter, metric, num_batch=None,
 97 |                         batch_end_callback=mx.callback.Speedometer(batch_size,
 98 |                                                                    frequent=frequent,
 99 |                                                                    auto_reset=False))
100 |     for k, v in results:
101 |         print("{}: {}".format(k, v))
102 | 


--------------------------------------------------------------------------------
/symbol/vgg16_reduced.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | def get_symbol(num_classes=1000, **kwargs):
 4 |     """
 5 |     VGG 16 layers network
 6 |     This is a modified version, with fc6/fc7 layers replaced by conv layers
 7 |     And the network is slightly smaller than original VGG 16 network
 8 |     """
 9 |     data = mx.symbol.Variable(name="data")
10 |     label = mx.symbol.Variable(name="label")
11 | 
12 |     # group 1
13 |     conv1_1 = mx.symbol.Convolution(
14 |         data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
15 |     relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
16 |     conv1_2 = mx.symbol.Convolution(
17 |         data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
18 |     relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
19 |     pool1 = mx.symbol.Pooling(
20 |         data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
21 |     # group 2
22 |     conv2_1 = mx.symbol.Convolution(
23 |         data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
24 |     relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
25 |     conv2_2 = mx.symbol.Convolution(
26 |         data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
27 |     relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
28 |     pool2 = mx.symbol.Pooling(
29 |         data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
30 |     # group 3
31 |     conv3_1 = mx.symbol.Convolution(
32 |         data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
33 |     relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
34 |     conv3_2 = mx.symbol.Convolution(
35 |         data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
36 |     relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
37 |     conv3_3 = mx.symbol.Convolution(
38 |         data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
39 |     relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
40 |     pool3 = mx.symbol.Pooling(
41 |         data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
42 |         pooling_convention="full", name="pool3")
43 |     # group 4
44 |     conv4_1 = mx.symbol.Convolution(
45 |         data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
46 |     relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
47 |     conv4_2 = mx.symbol.Convolution(
48 |         data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
49 |     relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
50 |     conv4_3 = mx.symbol.Convolution(
51 |         data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
52 |     relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
53 |     pool4 = mx.symbol.Pooling(
54 |         data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
55 |     # group 5
56 |     conv5_1 = mx.symbol.Convolution(
57 |         data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
58 |     relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
59 |     conv5_2 = mx.symbol.Convolution(
60 |         data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
61 |     relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
62 |     conv5_3 = mx.symbol.Convolution(
63 |         data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
64 |     relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
65 |     pool5 = mx.symbol.Pooling(
66 |         data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
67 |         pad=(1,1), name="pool5")
68 |     # group 6
69 |     conv6 = mx.symbol.Convolution(
70 |         data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
71 |         num_filter=1024, name="fc6")
72 |     relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
73 |     # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
74 |     # group 7
75 |     conv7 = mx.symbol.Convolution(
76 |         data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="fc7")
77 |     relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
78 |     # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
79 | 
80 |     gpool = mx.symbol.Pooling(data=relu7, pool_type='avg', kernel=(7, 7),
81 |         global_pool=True, name='global_pool')
82 |     conv8 = mx.symbol.Convolution(data=gpool, num_filter=num_classes, kernel=(1, 1),
83 |         name='fc8')
84 |     flat = mx.symbol.Flatten(data=conv8)
85 |     softmax = mx.symbol.SoftmaxOutput(data=flat, name='softmax')
86 |     return softmax
87 | 


--------------------------------------------------------------------------------
/symbol/mobilenet.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied.  See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | 
18 | import mxnet as mx
19 | 
20 | def Conv(data, num_filter=1, kernel=(1, 1), stride=(1, 1), pad=(0, 0), num_group=1, name=None, suffix=''):
21 |     conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, num_group=num_group, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix))
22 |     bn = mx.sym.BatchNorm(data=conv, name='%s%s_batchnorm' %(name, suffix), fix_gamma=True)
23 |     act = mx.sym.Activation(data=bn, act_type='relu', name='%s%s_relu' %(name, suffix))
24 |     return act
25 | 
26 | def get_symbol(num_classes, **kwargs):
27 |     data = mx.symbol.Variable(name="data") # 224
28 |     conv_1 = Conv(data, num_filter=32, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_1") # 224/112
29 |     conv_2_dw = Conv(conv_1, num_group=32, num_filter=32, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_2_dw") # 112/112
30 |     conv_2 = Conv(conv_2_dw, num_filter=64, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_2") # 112/112
31 |     conv_3_dw = Conv(conv_2, num_group=64, num_filter=64, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_3_dw") # 112/56
32 |     conv_3 = Conv(conv_3_dw, num_filter=128, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_3") # 56/56
33 |     conv_4_dw = Conv(conv_3, num_group=128, num_filter=128, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_4_dw") # 56/56
34 |     conv_4 = Conv(conv_4_dw, num_filter=128, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_4") # 56/56
35 |     conv_5_dw = Conv(conv_4, num_group=128, num_filter=128, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_5_dw") # 56/28
36 |     conv_5 = Conv(conv_5_dw, num_filter=256, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_5") # 28/28
37 |     conv_6_dw = Conv(conv_5, num_group=256, num_filter=256, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_6_dw") # 28/28
38 |     conv_6 = Conv(conv_6_dw, num_filter=256, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_6") # 28/28
39 |     conv_7_dw = Conv(conv_6, num_group=256, num_filter=256, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_7_dw") # 28/14
40 |     conv_7 = Conv(conv_7_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_7") # 14/14
41 | 
42 |     conv_8_dw = Conv(conv_7, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_8_dw") # 14/14
43 |     conv_8 = Conv(conv_8_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_8") # 14/14
44 |     conv_9_dw = Conv(conv_8, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_9_dw") # 14/14
45 |     conv_9 = Conv(conv_9_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_9") # 14/14
46 |     conv_10_dw = Conv(conv_9, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_10_dw") # 14/14
47 |     conv_10 = Conv(conv_10_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_10") # 14/14
48 |     conv_11_dw = Conv(conv_10, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_11_dw") # 14/14
49 |     conv_11 = Conv(conv_11_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_11") # 14/14
50 |     conv_12_dw = Conv(conv_11, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_12_dw") # 14/14
51 |     conv_12 = Conv(conv_12_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_12") # 14/14
52 | 
53 |     conv_13_dw = Conv(conv_12, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_13_dw") # 14/7
54 |     conv_13 = Conv(conv_13_dw, num_filter=1024, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_13") # 7/7
55 |     conv_14_dw = Conv(conv_13, num_group=1024, num_filter=1024, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_14_dw") # 7/7
56 |     conv_14 = Conv(conv_14_dw, num_filter=1024, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_14") # 7/7
57 | 
58 |     pool = mx.sym.Pooling(data=conv_14, kernel=(7, 7), stride=(1, 1), pool_type="avg", name="global_pool", global_pool=True)
59 |     flatten = mx.sym.Flatten(data=pool, name="flatten")
60 |     fc = mx.symbol.FullyConnected(data=flatten, num_hidden=num_classes, name='fc')
61 |     softmax = mx.symbol.SoftmaxOutput(data=fc, name='softmax')
62 |     return softmax
63 | 


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import tools.find_mxnet
 3 | import mxnet as mx
 4 | import os
 5 | import sys
 6 | from evaluate.evaluate_net import evaluate_net
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Evaluate a network')
11 |     parser.add_argument('--rec-path', dest='rec_path', help='which record file to use',
12 |                         default=os.path.join(os.getcwd(), 'data', 'val.rec'), type=str)
13 |     parser.add_argument('--list-path', dest='list_path', help='which list file to use',
14 |                         default="", type=str)
15 |     parser.add_argument('--network', dest='network', type=str, default='vgg16_reduced',
16 |                         help='which network to use')
17 |     parser.add_argument('--batch-size', dest='batch_size', type=int, default=32,
18 |                         help='evaluation batch size')
19 |     parser.add_argument('--num-class', dest='num_class', type=int, default=20,
20 |                         help='number of classes')
21 |     parser.add_argument('--class-names', dest='class_names', type=str,
22 |                         default='aeroplane, bicycle, bird, boat, bottle, bus, \
23 |                         car, cat, chair, cow, diningtable, dog, horse, motorbike, \
24 |                         person, pottedplant, sheep, sofa, train, tvmonitor',
25 |                         help='string of comma separated names, or text filename')
26 |     parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model',
27 |                         default=0, type=int)
28 |     parser.add_argument('--prefix', dest='prefix', help='load model prefix',
29 |                         default=os.path.join(os.getcwd(), 'model', 'ssd_'), type=str)
30 |     parser.add_argument('--gpus', dest='gpu_id', help='GPU devices to evaluate with',
31 |                         default='0', type=str)
32 |     parser.add_argument('--cpu', dest='cpu', help='use cpu to evaluate, this can be slow',
33 |                         action='store_true')
34 |     parser.add_argument('--data-shape', dest='data_shape', type=int, default=300,
35 |                         help='set image shape')
36 |     parser.add_argument('--mean-r', dest='mean_r', type=float, default=123,
37 |                         help='red mean value')
38 |     parser.add_argument('--mean-g', dest='mean_g', type=float, default=117,
39 |                         help='green mean value')
40 |     parser.add_argument('--mean-b', dest='mean_b', type=float, default=104,
41 |                         help='blue mean value')
42 |     parser.add_argument('--nms', dest='nms_thresh', type=float, default=0.45,
43 |                         help='non-maximum suppression threshold')
44 |     parser.add_argument('--overlap', dest='overlap_thresh', type=float, default=0.5,
45 |                         help='evaluation overlap threshold')
46 |     parser.add_argument('--force', dest='force_nms', type=bool, default=False,
47 |                         help='force non-maximum suppression on different class')
48 |     parser.add_argument('--use-difficult', dest='use_difficult', type=bool, default=False,
49 |                         help='use difficult ground-truths in evaluation')
50 |     parser.add_argument('--voc07', dest='use_voc07_metric', type=bool, default=True,
51 |                         help='use PASCAL VOC 07 metric')
52 |     parser.add_argument('--deploy', dest='deploy_net', help='Load network from model',
53 |                         action='store_true', default=False)
54 |     parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
55 |                         default=20, type=int)
56 |     args = parser.parse_args()
57 |     return args
58 | 
59 | if __name__ == '__main__':
60 |     args = parse_args()
61 |     # choose ctx
62 |     if args.cpu:
63 |         ctx = mx.cpu()
64 |     else:
65 |         ctx = [mx.gpu(int(i)) for i in args.gpu_id.split(',')]
66 |     # parse # classes and class_names if applicable
67 |     num_class = args.num_class
68 |     if len(args.class_names) > 0:
69 |         if os.path.isfile(args.class_names):
70 |                 # try to open it to read class names
71 |                 with open(args.class_names, 'r') as f:
72 |                     class_names = [l.strip() for l in f.readlines()]
73 |         else:
74 |             class_names = [c.strip() for c in args.class_names.split(',')]
75 |         assert len(class_names) == num_class
76 |         for name in class_names:
77 |             assert len(name) > 0
78 |     else:
79 |         class_names = None
80 | 
81 |     network = None if args.deploy_net else args.network
82 |     if args.prefix.endswith('_'):
83 |         prefix = args.prefix + args.network
84 |     else:
85 |         prefix = args.prefix
86 |     evaluate_net(network, args.rec_path, num_class,
87 |                  (args.mean_r, args.mean_g, args.mean_b), args.data_shape,
88 |                  prefix, args.epoch, ctx, batch_size=args.batch_size,
89 |                  path_imglist=args.list_path, nms_thresh=args.nms_thresh,
90 |                  force_nms=args.force_nms, ovp_thresh=args.overlap_thresh,
91 |                  use_difficult=args.use_difficult, class_names=class_names,
92 |                  voc07_metric=args.use_voc07_metric, frequent=args.frequent)
93 | 


--------------------------------------------------------------------------------
/tools/prepare_dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys, os
  3 | import argparse
  4 | import subprocess
  5 | import mxnet
  6 | curr_path = os.path.abspath(os.path.dirname(__file__))
  7 | sys.path.append(os.path.join(curr_path, '..'))
  8 | from dataset.pascal_voc import PascalVoc
  9 | from dataset.mscoco import Coco
 10 | from dataset.concat_db import ConcatDB
 11 | 
 12 | def load_pascal(image_set, year, devkit_path, shuffle=False, class_names=None, true_negative=None):
 13 |     """
 14 |     wrapper function for loading pascal voc dataset
 15 | 
 16 |     Parameters:
 17 |     ----------
 18 |     image_set : str
 19 |         train, trainval...
 20 |     year : str
 21 |         2007, 2012 or combinations splitted by comma
 22 |     devkit_path : str
 23 |         root directory of dataset
 24 |     shuffle : bool
 25 |         whether to shuffle initial list
 26 | 
 27 |     Returns:
 28 |     ----------
 29 |     Imdb
 30 |     """
 31 |     image_set = [y.strip() for y in image_set.split(',')]
 32 |     assert image_set, "No image_set specified"
 33 |     year = [y.strip() for y in year.split(',')]
 34 |     assert year, "No year specified"
 35 | 
 36 |     # make sure (# sets == # years)
 37 |     if len(image_set) > 1 and len(year) == 1:
 38 |         year = year * len(image_set)
 39 |     if len(image_set) == 1 and len(year) > 1:
 40 |         image_set = image_set * len(year)
 41 |     assert len(image_set) == len(year), "Number of sets and year mismatch"
 42 | 
 43 |     imdbs = []
 44 |     for s, y in zip(image_set, year):
 45 |         imdbs.append(PascalVoc(s, y, devkit_path, shuffle, is_train=True, class_names=class_names, true_negative_images=true_negative))
 46 |     if len(imdbs) > 1:
 47 |         return ConcatDB(imdbs, shuffle)
 48 |     else:
 49 |         return imdbs[0]
 50 | 
 51 | def load_coco(image_set, dirname, shuffle=False):
 52 |     """
 53 |     wrapper function for loading ms coco dataset
 54 | 
 55 |     Parameters:
 56 |     ----------
 57 |     image_set : str
 58 |         train2014, val2014, valminusminival2014, minival2014
 59 |     dirname: str
 60 |         root dir for coco
 61 |     shuffle: boolean
 62 |         initial shuffle
 63 |     """
 64 |     anno_files = ['instances_' + y.strip() + '.json' for y in image_set.split(',')]
 65 |     assert anno_files, "No image set specified"
 66 |     imdbs = []
 67 |     for af in anno_files:
 68 |         af_path = os.path.join(dirname, 'annotations', af)
 69 |         imdbs.append(Coco(af_path, dirname, shuffle=shuffle))
 70 |     if len(imdbs) > 1:
 71 |         return ConcatDB(imdbs, shuffle)
 72 |     else:
 73 |         return imdbs[0]
 74 | 
 75 | def parse_args():
 76 |     parser = argparse.ArgumentParser(description='Prepare lists for dataset')
 77 |     parser.add_argument('--dataset', dest='dataset', help='dataset to use',
 78 |                         default='pascal', type=str)
 79 |     parser.add_argument('--year', dest='year', help='which year to use',
 80 |                         default='2007,2012', type=str)
 81 |     parser.add_argument('--set', dest='set', help='train, val, trainval, test',
 82 |                         default='trainval', type=str)
 83 |     parser.add_argument('--target', dest='target', help='output list file',
 84 |                         default=None,
 85 |                         type=str)
 86 |     parser.add_argument('--class-names', dest='class_names', type=str,
 87 |                         default=None, help='string of comma separated names, or text filename')
 88 |     parser.add_argument('--root', dest='root_path', help='dataset root path',
 89 |                         default=os.path.join(curr_path, '..', 'data', 'VOCdevkit'),
 90 |                         type=str)
 91 |     parser.add_argument('--shuffle', dest='shuffle', help='shuffle list',
 92 |                         type=bool, default=True)
 93 |     parser.add_argument('--true-negative', dest='true_negative', help='use images with no GT as true_negative',
 94 |                         type=bool, default=False)
 95 |     args = parser.parse_args()
 96 |     return args
 97 | 
 98 | if __name__ == '__main__':
 99 |     args = parse_args()
100 |     if args.class_names is not None:
101 |         assert args.target is not None, 'for a subset of classes, specify a target path. Its for your own safety'
102 |     if args.dataset == 'pascal':
103 |         db = load_pascal(args.set, args.year, args.root_path, args.shuffle, args.class_names, args.true_negative)
104 |         print("saving list to disk...")
105 |         db.save_imglist(args.target, root=args.root_path)
106 |     elif args.dataset == 'coco':
107 |         db = load_coco(args.set, args.root_path, args.shuffle)
108 |         print("saving list to disk...")
109 |         db.save_imglist(args.target, root=args.root_path)
110 |     else:
111 |         raise NotImplementedError("No implementation for dataset: " + args.dataset)
112 | 
113 |     print("List file {} generated...".format(args.target))
114 | 
115 |     im2rec_path = os.path.join(mxnet.__path__[0], 'tools/im2rec.py')
116 |     # final validation - sometimes __path__ (or __file__) gives 'mxnet/python/mxnet' instead of 'mxnet'
117 |     if not os.path.exists(im2rec_path):
118 |         im2rec_path = os.path.join(os.path.dirname(os.path.dirname(mxnet.__path__[0])), 'tools/im2rec.py')
119 |     subprocess.check_call(["python", im2rec_path,
120 |         os.path.abspath(args.target), os.path.abspath(args.root_path),
121 |         "--shuffle", str(int(args.shuffle)), "--pack-label", "1"])
122 | 
123 |     print("Record file {} generated...".format(args.target.split('.')[0] + '.rec'))
124 | 


--------------------------------------------------------------------------------
/dataset/yolo_format.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import os
  3 | import numpy as np
  4 | from .imdb import Imdb
  5 | 
  6 | 
  7 | class YoloFormat(Imdb):
  8 |     """
  9 |     Base class for loading datasets as used in YOLO
 10 | 
 11 |     Parameters:
 12 |     ----------
 13 |     name : str
 14 |         name for this dataset
 15 |     classes : list or tuple of str
 16 |         class names in this dataset
 17 |     list_file : str
 18 |         filename of the image list file
 19 |     image_dir : str
 20 |         image directory
 21 |     label_dir : str
 22 |         label directory
 23 |     extension : str
 24 |         by default .jpg
 25 |     label_extension : str
 26 |         by default .txt
 27 |     shuffle : bool
 28 |         whether to shuffle the initial order when loading this dataset,
 29 |         default is True
 30 |     """
 31 |     def __init__(self, name, classes, list_file, image_dir, label_dir, \
 32 |                  extension='.jpg', label_extension='.txt', shuffle=True):
 33 |         if isinstance(classes, list) or isinstance(classes, tuple):
 34 |             num_classes = len(classes)
 35 |         elif isinstance(classes, str):
 36 |             with open(classes, 'r') as f:
 37 |                 classes = [l.strip() for l in f.readlines()]
 38 |                 num_classes = len(classes)
 39 |         else:
 40 |             raise ValueError("classes should be list/tuple or text file")
 41 |         assert num_classes > 0, "number of classes must > 0"
 42 |         super(YoloFormat, self).__init__(name + '_' + str(num_classes))
 43 |         self.classes = classes
 44 |         self.num_classes = num_classes
 45 |         self.list_file = list_file
 46 |         self.image_dir = image_dir
 47 |         self.label_dir = label_dir
 48 |         self.extension = extension
 49 |         self.label_extension = label_extension
 50 | 
 51 |         self.image_set_index = self._load_image_set_index(shuffle)
 52 |         self.num_images = len(self.image_set_index)
 53 |         self.labels = self._load_image_labels()
 54 | 
 55 | 
 56 |     def _load_image_set_index(self, shuffle):
 57 |         """
 58 |         find out which indexes correspond to given image set (train or val)
 59 | 
 60 |         Parameters:
 61 |         ----------
 62 |         shuffle : boolean
 63 |             whether to shuffle the image list
 64 |         Returns:
 65 |         ----------
 66 |         entire list of images specified in the setting
 67 |         """
 68 |         assert os.path.exists(self.list_file), 'Path does not exists: {}'.format(self.list_file)
 69 |         with open(self.list_file, 'r') as f:
 70 |             image_set_index = [x.strip() for x in f.readlines()]
 71 |         if shuffle:
 72 |             np.random.shuffle(image_set_index)
 73 |         return image_set_index
 74 | 
 75 |     def image_path_from_index(self, index):
 76 |         """
 77 |         given image index, find out full path
 78 | 
 79 |         Parameters:
 80 |         ----------
 81 |         index: int
 82 |             index of a specific image
 83 |         Returns:
 84 |         ----------
 85 |         full path of this image
 86 |         """
 87 |         assert self.image_set_index is not None, "Dataset not initialized"
 88 |         name = self.image_set_index[index]
 89 |         image_file = os.path.join(self.image_dir, name) + self.extension
 90 |         assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file)
 91 |         return image_file
 92 | 
 93 |     def label_from_index(self, index):
 94 |         """
 95 |         given image index, return preprocessed ground-truth
 96 | 
 97 |         Parameters:
 98 |         ----------
 99 |         index: int
100 |             index of a specific image
101 |         Returns:
102 |         ----------
103 |         ground-truths of this image
104 |         """
105 |         assert self.labels is not None, "Labels not processed"
106 |         return self.labels[index]
107 | 
108 |     def _label_path_from_index(self, index):
109 |         """
110 |         given image index, find out annotation path
111 | 
112 |         Parameters:
113 |         ----------
114 |         index: int
115 |             index of a specific image
116 | 
117 |         Returns:
118 |         ----------
119 |         full path of annotation file
120 |         """
121 |         label_file = os.path.join(self.label_dir, index + self.label_extension)
122 |         assert os.path.exists(label_file), 'Path does not exist: {}'.format(label_file)
123 |         return label_file
124 | 
125 |     def _load_image_labels(self):
126 |         """
127 |         preprocess all ground-truths
128 | 
129 |         Returns:
130 |         ----------
131 |         labels packed in [num_images x max_num_objects x 5] tensor
132 |         """
133 |         temp = []
134 | 
135 |         # load ground-truths
136 |         for idx in self.image_set_index:
137 |             label_file = self._label_path_from_index(idx)
138 |             with open(label_file, 'r') as f:
139 |                 label = []
140 |                 for line in f.readlines():
141 |                     temp_label = line.strip().split()
142 |                     assert len(temp_label) == 5, "Invalid label file" + label_file
143 |                     cls_id = int(temp_label[0])
144 |                     x = float(temp_label[1])
145 |                     y = float(temp_label[2])
146 |                     half_width = float(temp_label[3]) / 2
147 |                     half_height = float(temp_label[4]) / 2
148 |                     xmin = x - half_width
149 |                     ymin = y - half_height
150 |                     xmax = x + half_width
151 |                     ymax = y + half_height
152 |                     label.append([cls_id, xmin, ymin, xmax, ymax])
153 |                 temp.append(np.array(label))
154 |         return temp
155 | 


--------------------------------------------------------------------------------
/tools/caffe_converter/convert_model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, absolute_import
  2 | import sys
  3 | import os.path as osp
  4 | sys.path.insert(0, osp.join(osp.dirname(__file__), '..'))
  5 | from tools import find_mxnet
  6 | import mxnet as mx
  7 | import numpy as np
  8 | import argparse
  9 | import re
 10 | from .convert_symbol import proto2symbol
 11 | 
 12 | caffe_flag = True
 13 | try:
 14 |     import caffe
 15 | except ImportError:
 16 |     from .caffe_parse import parse_from_protobuf as parse
 17 | 
 18 |     caffe_flag = False
 19 | 
 20 | 
 21 | def get_caffe_iter(layer_names, layers):
 22 |     for layer_idx, layer in enumerate(layers):
 23 |         layer_name = re.sub('[-/]', '_', layer_names[layer_idx])
 24 |         layer_type = layer.type
 25 |         layer_blobs = layer.blobs
 26 |         yield (layer_name, layer_type, layer_blobs)
 27 | 
 28 | 
 29 | def get_iter(layers):
 30 |     for layer in layers:
 31 |         layer_name = re.sub('[-/]', '_', layer.name)
 32 |         layer_type = layer.type
 33 |         layer_blobs = layer.blobs
 34 |         yield (layer_name, layer_type, layer_blobs)
 35 | 
 36 | 
 37 | def main():
 38 |     parser = argparse.ArgumentParser(description='Caffe prototxt to mxnet model parameter converter.\
 39 |                     Note that only basic functions are implemented. You are welcomed to contribute to this file.')
 40 |     parser.add_argument('caffe_prototxt', help='The prototxt file in Caffe format')
 41 |     parser.add_argument('caffe_model', help='The binary model parameter file in Caffe format')
 42 |     parser.add_argument('save_model_name', help='The name of the output model prefix')
 43 |     args = parser.parse_args()
 44 | 
 45 |     prob, input_dim = proto2symbol(args.caffe_prototxt)
 46 | 
 47 |     layers = ''
 48 |     layer_names = ''
 49 | 
 50 |     if caffe_flag:
 51 |         caffe.set_mode_cpu()
 52 |         net_caffe = caffe.Net(args.caffe_prototxt, args.caffe_model, caffe.TEST)
 53 |         layer_names = net_caffe._layer_names
 54 |         layers = net_caffe.layers
 55 |     else:
 56 |         layers = parse.parse_caffemodel(args.caffe_model)
 57 | 
 58 |     arg_shapes, output_shapes, aux_shapes = prob.infer_shape(data=tuple(input_dim))
 59 |     arg_names = prob.list_arguments()
 60 |     arg_shape_dic = dict(zip(arg_names, arg_shapes))
 61 |     arg_params = {}
 62 | 
 63 |     iter = ''
 64 |     if caffe_flag:
 65 |         iter = get_caffe_iter(layer_names, layers)
 66 |     else:
 67 |         iter = get_iter(layers)
 68 |     first_conv = True
 69 | 
 70 |     for layer_name, layer_type, layer_blobs in iter:
 71 |         if layer_type == 'Convolution' or layer_type == 'InnerProduct' or layer_type == 4 or layer_type == 14 \
 72 |                 or layer_type == 'PReLU' or layer_type == 'Normalize':
 73 |             if layer_type == 'PReLU':
 74 |                 assert (len(layer_blobs) == 1)
 75 |                 wmat = layer_blobs[0].data
 76 |                 weight_name = layer_name + '_gamma'
 77 |                 arg_params[weight_name] = mx.nd.zeros(wmat.shape)
 78 |                 arg_params[weight_name][:] = wmat
 79 |                 continue
 80 |             if layer_type == 'Normalize':
 81 |                 assert (len(layer_blobs) == 1)
 82 |                 weight_name = layer_name + '_scale'
 83 |                 wmat = layer_blobs[0].data
 84 |                 arg_params[weight_name] = mx.nd.zeros((1, len(wmat), 1, 1))
 85 |                 arg_params[weight_name][:] = np.array(list(wmat)).reshape((1, len(wmat), 1, 1))
 86 |                 continue
 87 |             assert (len(layer_blobs) == 2)
 88 |             wmat_dim = []
 89 |             if getattr(layer_blobs[0].shape, 'dim', None) is not None:
 90 |                 if len(layer_blobs[0].shape.dim) > 0:
 91 |                     wmat_dim = layer_blobs[0].shape.dim
 92 |                 else:
 93 |                     wmat_dim = [layer_blobs[0].num, layer_blobs[0].channels, layer_blobs[0].height,
 94 |                                 layer_blobs[0].width]
 95 |             else:
 96 |                 wmat_dim = list(layer_blobs[0].shape)
 97 |             wmat = np.array(layer_blobs[0].data).reshape(wmat_dim)
 98 |             bias = np.array(layer_blobs[1].data)
 99 |             channels = wmat_dim[1]
100 |             if channels == 3 or channels == 4:  # RGB or RGBA
101 |                 if first_conv:
102 |                     print('Swapping BGR of caffe into RGB in mxnet')
103 |                     wmat[:, [0, 2], :, :] = wmat[:, [2, 0], :, :]
104 | 
105 |             assert (wmat.flags['C_CONTIGUOUS'] is True)
106 |             assert (bias.flags['C_CONTIGUOUS'] is True)
107 |             print('converting layer {0}, wmat shape = {1}, bias shape = {2}'.format(layer_name, wmat.shape, bias.shape))
108 |             wmat = wmat.reshape((wmat.shape[0], -1))
109 |             bias = bias.reshape((bias.shape[0], 1))
110 |             weight_name = layer_name + "_weight"
111 |             bias_name = layer_name + "_bias"
112 | 
113 |             if weight_name not in arg_shape_dic:
114 |                 print(weight_name + ' not found in arg_shape_dic.')
115 |                 continue
116 |             wmat = wmat.reshape(arg_shape_dic[weight_name])
117 |             arg_params[weight_name] = mx.nd.zeros(wmat.shape)
118 |             arg_params[weight_name][:] = wmat
119 | 
120 |             bias = bias.reshape(arg_shape_dic[bias_name])
121 |             arg_params[bias_name] = mx.nd.zeros(bias.shape)
122 |             arg_params[bias_name][:] = bias
123 | 
124 |             if first_conv and (layer_type == 'Convolution' or layer_type == 4):
125 |                 first_conv = False
126 | 
127 |     model = mx.mod.Module(symbol=prob, label_names=None)
128 |     model.bind(data_shapes=[('data', tuple(input_dim))])
129 |     model.init_params(arg_params=arg_params, aux_params={})
130 | 
131 |     model.save_checkpoint(args.save_model_name, 1)
132 | 
133 | 
134 | if __name__ == '__main__':
135 |     main()
136 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import tools.find_mxnet
  3 | import mxnet as mx
  4 | import os
  5 | import sys
  6 | from detect.detector import Detector
  7 | from symbol.symbol_factory import get_symbol
  8 | 
  9 | def get_detector(net, prefix, epoch, data_shape, mean_pixels, ctx, num_class,
 10 |                  nms_thresh=0.5, force_nms=True, nms_topk=400):
 11 |     """
 12 |     wrapper for initialize a detector
 13 | 
 14 |     Parameters:
 15 |     ----------
 16 |     net : str
 17 |         test network name
 18 |     prefix : str
 19 |         load model prefix
 20 |     epoch : int
 21 |         load model epoch
 22 |     data_shape : int
 23 |         resize image shape
 24 |     mean_pixels : tuple (float, float, float)
 25 |         mean pixel values (R, G, B)
 26 |     ctx : mx.ctx
 27 |         running context, mx.cpu() or mx.gpu(?)
 28 |     num_class : int
 29 |         number of classes
 30 |     nms_thresh : float
 31 |         non-maximum suppression threshold
 32 |     force_nms : bool
 33 |         force suppress different categories
 34 |     """
 35 |     if net is not None:
 36 |         net = get_symbol(net, data_shape, num_classes=num_class, nms_thresh=nms_thresh,
 37 |             force_nms=force_nms, nms_topk=nms_topk)
 38 |     detector = Detector(net, prefix, epoch, data_shape, mean_pixels, ctx=ctx)
 39 |     return detector
 40 | 
 41 | def parse_args():
 42 |     parser = argparse.ArgumentParser(description='Single-shot detection network demo')
 43 |     parser.add_argument('--network', dest='network', type=str, default='resnet50',
 44 |                         help='which network to use')
 45 |     parser.add_argument('--images', dest='images', type=str, default='./data/demo/dog.jpg',
 46 |                         help='run demo with images, use comma to seperate multiple images')
 47 |     parser.add_argument('--dir', dest='dir', nargs='?',
 48 |                         help='demo image directory, optional', type=str)
 49 |     parser.add_argument('--ext', dest='extension', help='image extension, optional',
 50 |                         type=str, nargs='?')
 51 |     parser.add_argument('--epoch', dest='epoch', help='epoch of trained model',
 52 |                         default=0, type=int)
 53 |     parser.add_argument('--prefix', dest='prefix', help='trained model prefix',
 54 |                         default=os.path.join(os.getcwd(), 'model', 'ssd_'),
 55 |                         type=str)
 56 |     parser.add_argument('--cpu', dest='cpu', help='(override GPU) use CPU to detect',
 57 |                         action='store_true', default=False)
 58 |     parser.add_argument('--gpu', dest='gpu_id', type=int, default=0,
 59 |                         help='GPU device id to detect with')
 60 |     parser.add_argument('--data-shape', dest='data_shape', type=int, default=512,
 61 |                         help='set image shape')
 62 |     parser.add_argument('--mean-r', dest='mean_r', type=float, default=123,
 63 |                         help='red mean value')
 64 |     parser.add_argument('--mean-g', dest='mean_g', type=float, default=117,
 65 |                         help='green mean value')
 66 |     parser.add_argument('--mean-b', dest='mean_b', type=float, default=104,
 67 |                         help='blue mean value')
 68 |     parser.add_argument('--thresh', dest='thresh', type=float, default=0.5,
 69 |                         help='object visualize score threshold, default 0.6')
 70 |     parser.add_argument('--nms', dest='nms_thresh', type=float, default=0.5,
 71 |                         help='non-maximum suppression threshold, default 0.5')
 72 |     parser.add_argument('--force', dest='force_nms', type=bool, default=True,
 73 |                         help='force non-maximum suppression on different class')
 74 |     parser.add_argument('--timer', dest='show_timer', type=bool, default=True,
 75 |                         help='show detection time')
 76 |     parser.add_argument('--deploy', dest='deploy_net', action='store_true', default=False,
 77 |                         help='Load network from json file, rather than from symbol')
 78 |     parser.add_argument('--class-names', dest='class_names', type=str,
 79 |                         default='aeroplane, bicycle, bird, boat, bottle, bus, \
 80 |                         car, cat, chair, cow, diningtable, dog, horse, motorbike, \
 81 |                         person, pottedplant, sheep, sofa, train, tvmonitor',
 82 |                         help='string of comma separated names, or text filename')
 83 |     args = parser.parse_args()
 84 |     return args
 85 | 
 86 | def parse_class_names(class_names):
 87 |     """ parse # classes and class_names if applicable """
 88 |     if len(class_names) > 0:
 89 |         if os.path.isfile(class_names):
 90 |             # try to open it to read class names
 91 |             with open(class_names, 'r') as f:
 92 |                 class_names = [l.strip() for l in f.readlines()]
 93 |         else:
 94 |             class_names = [c.strip() for c in class_names.split(',')]
 95 |         for name in class_names:
 96 |             assert len(name) > 0
 97 |     else:
 98 |         raise RuntimeError("No valid class_name provided...")
 99 |     return class_names
100 | 
101 | if __name__ == '__main__':
102 |     args = parse_args()
103 |     if args.cpu:
104 |         ctx = mx.cpu()
105 |     else:
106 |         ctx = mx.gpu(args.gpu_id)
107 | 
108 |     # parse image list
109 |     image_list = [i.strip() for i in args.images.split(',')]
110 |     assert len(image_list) > 0, "No valid image specified to detect"
111 | 
112 |     network = None if args.deploy_net else args.network
113 |     class_names = parse_class_names(args.class_names)
114 |     if args.prefix.endswith('_'):
115 |         prefix = args.prefix + args.network + '_' + str(args.data_shape)
116 |     else:
117 |         prefix = args.prefix
118 |     detector = get_detector(network, prefix, args.epoch,
119 |                             args.data_shape,
120 |                             (args.mean_r, args.mean_g, args.mean_b),
121 |                             ctx, len(class_names), args.nms_thresh, args.force_nms)
122 |     # run detection
123 |     detector.detect_and_visualize(image_list, args.dir, args.extension,
124 |                                   class_names, args.thresh, args.show_timer)
125 | 


--------------------------------------------------------------------------------
/evaluate/eval_voc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | given a pascal voc imdb, compute mAP
  3 | """
  4 | from __future__ import print_function
  5 | import numpy as np
  6 | import os
  7 | try:
  8 |     import cPickle as pickle
  9 | except ImportError:
 10 |     import pickle
 11 | 
 12 | 
 13 | def parse_voc_rec(filename):
 14 |     """
 15 |     parse pascal voc record into a dictionary
 16 |     :param filename: xml file path
 17 |     :return: list of dict
 18 |     """
 19 |     import xml.etree.ElementTree as ET
 20 |     tree = ET.parse(filename)
 21 |     objects = []
 22 |     for obj in tree.findall('object'):
 23 |         obj_dict = dict()
 24 |         obj_dict['name'] = obj.find('name').text
 25 |         obj_dict['difficult'] = int(obj.find('difficult').text)
 26 |         bbox = obj.find('bndbox')
 27 |         obj_dict['bbox'] = [int(bbox.find('xmin').text),
 28 |                             int(bbox.find('ymin').text),
 29 |                             int(bbox.find('xmax').text),
 30 |                             int(bbox.find('ymax').text)]
 31 |         objects.append(obj_dict)
 32 |     return objects
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |     """
 37 |     average precision calculations
 38 |     [precision integrated to recall]
 39 |     :param rec: recall
 40 |     :param prec: precision
 41 |     :param use_07_metric: 2007 metric is 11-recall-point based AP
 42 |     :return: average precision
 43 |     """
 44 |     if use_07_metric:
 45 |         ap = 0.
 46 |         for t in np.arange(0., 1.1, 0.1):
 47 |             if np.sum(rec >= t) == 0:
 48 |                 p = 0
 49 |             else:
 50 |                 p = np.max(prec[rec >= t])
 51 |             ap += p / 11.
 52 |     else:
 53 |         # append sentinel values at both ends
 54 |         mrec = np.concatenate(([0.], rec, [1.]))
 55 |         mpre = np.concatenate(([0.], prec, [0.]))
 56 | 
 57 |         # compute precision integration ladder
 58 |         for i in range(mpre.size - 1, 0, -1):
 59 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 60 | 
 61 |         # look for recall value changes
 62 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |         # sum (\delta recall) * prec
 65 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |     return ap
 67 | 
 68 | 
 69 | def voc_eval(detpath, annopath, imageset_file, classname, cache_dir, ovthresh=0.5, use_07_metric=False):
 70 |     """
 71 |     pascal voc evaluation
 72 |     :param detpath: detection results detpath.format(classname)
 73 |     :param annopath: annotations annopath.format(classname)
 74 |     :param imageset_file: text file containing list of images
 75 |     :param classname: category name
 76 |     :param cache_dir: caching annotations
 77 |     :param ovthresh: overlap threshold
 78 |     :param use_07_metric: whether to use voc07's 11 point ap computation
 79 |     :return: rec, prec, ap
 80 |     """
 81 |     if not os.path.isdir(cache_dir):
 82 |         os.mkdir(cache_dir)
 83 |     cache_file = os.path.join(cache_dir, 'annotations.pkl')
 84 |     with open(imageset_file, 'r') as f:
 85 |         lines = f.readlines()
 86 |     image_filenames = [x.strip() for x in lines]
 87 | 
 88 |     # load annotations from cache
 89 |     if not os.path.isfile(cache_file):
 90 |         recs = {}
 91 |         for ind, image_filename in enumerate(image_filenames):
 92 |             recs[image_filename] = parse_voc_rec(annopath.format(image_filename))
 93 |             if ind % 100 == 0:
 94 |                 print('reading annotations for {:d}/{:d}'.format(ind + 1, len(image_filenames)))
 95 |         print('saving annotations cache to {:s}'.format(cache_file))
 96 |         with open(cache_file, 'wb') as f:
 97 |             pickle.dump(recs, f)
 98 |     else:
 99 |         with open(cache_file, 'rb') as f:
100 |             recs = pickle.load(f)
101 | 
102 |     # extract objects in :param classname:
103 |     class_recs = {}
104 |     npos = 0
105 |     for image_filename in image_filenames:
106 |         objects = [obj for obj in recs[image_filename] if obj['name'] == classname]
107 |         bbox = np.array([x['bbox'] for x in objects])
108 |         difficult = np.array([x['difficult'] for x in objects]).astype(np.bool)
109 |         det = [False] * len(objects)  # stand for detected
110 |         npos = npos + sum(~difficult)
111 |         class_recs[image_filename] = {'bbox': bbox,
112 |                                       'difficult': difficult,
113 |                                       'det': det}
114 | 
115 |     # read detections
116 |     detfile = detpath.format(classname)
117 |     with open(detfile, 'r') as f:
118 |         lines = f.readlines()
119 | 
120 |     splitlines = [x.strip().split(' ') for x in lines]
121 |     image_ids = [x[0] for x in splitlines]
122 |     confidence = np.array([float(x[1]) for x in splitlines])
123 |     bbox = np.array([[float(z) for z in x[2:]] for x in splitlines])
124 | 
125 |     # sort by confidence
126 |     sorted_inds = np.argsort(-confidence)
127 |     sorted_scores = np.sort(-confidence)
128 |     bbox = bbox[sorted_inds, :]
129 |     image_ids = [image_ids[x] for x in sorted_inds]
130 | 
131 |     # go down detections and mark true positives and false positives
132 |     nd = len(image_ids)
133 |     tp = np.zeros(nd)
134 |     fp = np.zeros(nd)
135 |     for d in range(nd):
136 |         r = class_recs[image_ids[d]]
137 |         bb = bbox[d, :].astype(float)
138 |         ovmax = -np.inf
139 |         bbgt = r['bbox'].astype(float)
140 | 
141 |         if bbgt.size > 0:
142 |             # compute overlaps
143 |             # intersection
144 |             ixmin = np.maximum(bbgt[:, 0], bb[0])
145 |             iymin = np.maximum(bbgt[:, 1], bb[1])
146 |             ixmax = np.minimum(bbgt[:, 2], bb[2])
147 |             iymax = np.minimum(bbgt[:, 3], bb[3])
148 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
149 |             ih = np.maximum(iymax - iymin + 1., 0.)
150 |             inters = iw * ih
151 | 
152 |             # union
153 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
154 |                    (bbgt[:, 2] - bbgt[:, 0] + 1.) *
155 |                    (bbgt[:, 3] - bbgt[:, 1] + 1.) - inters)
156 | 
157 |             overlaps = inters / uni
158 |             ovmax = np.max(overlaps)
159 |             jmax = np.argmax(overlaps)
160 | 
161 |         if ovmax > ovthresh:
162 |             if not r['difficult'][jmax]:
163 |                 if not r['det'][jmax]:
164 |                     tp[d] = 1.
165 |                     r['det'][jmax] = 1
166 |                 else:
167 |                     fp[d] = 1.
168 |         else:
169 |             fp[d] = 1.
170 | 
171 |     # compute precision recall
172 |     fp = np.cumsum(fp)
173 |     tp = np.cumsum(tp)
174 |     rec = tp / float(npos)
175 |     # avoid division by zero in case first detection matches a difficult ground ruth
176 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
177 |     ap = voc_ap(rec, prec, use_07_metric)
178 | 
179 |     return rec, prec, ap
180 | 


--------------------------------------------------------------------------------
/detect/detector.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import mxnet as mx
  3 | import numpy as np
  4 | from timeit import default_timer as timer
  5 | from dataset.testdb import TestDB
  6 | from dataset.iterator import DetIter
  7 | 
  8 | class Detector(object):
  9 |     """
 10 |     SSD detector which hold a detection network and wraps detection API
 11 | 
 12 |     Parameters:
 13 |     ----------
 14 |     symbol : mx.Symbol
 15 |         detection network Symbol
 16 |     model_prefix : str
 17 |         name prefix of trained model
 18 |     epoch : int
 19 |         load epoch of trained model
 20 |     data_shape : int
 21 |         input data resize shape
 22 |     mean_pixels : tuple of float
 23 |         (mean_r, mean_g, mean_b)
 24 |     batch_size : int
 25 |         run detection with batch size
 26 |     ctx : mx.ctx
 27 |         device to use, if None, use mx.cpu() as default context
 28 |     """
 29 |     def __init__(self, symbol, model_prefix, epoch, data_shape, mean_pixels, \
 30 |                  batch_size=1, ctx=None):
 31 |         self.ctx = ctx
 32 |         if self.ctx is None:
 33 |             self.ctx = mx.cpu()
 34 |         load_symbol, args, auxs = mx.model.load_checkpoint(model_prefix, epoch)
 35 |         if symbol is None:
 36 |             symbol = load_symbol
 37 |         self.mod = mx.mod.Module(symbol, label_names=None, context=ctx)
 38 |         self.data_shape = data_shape
 39 |         self.mod.bind(data_shapes=[('data', (batch_size, 3, data_shape, data_shape))])
 40 |         self.mod.set_params(args, auxs)
 41 |         self.data_shape = data_shape
 42 |         self.mean_pixels = mean_pixels
 43 | 
 44 |     def detect(self, det_iter, show_timer=False):
 45 |         """
 46 |         detect all images in iterator
 47 | 
 48 |         Parameters:
 49 |         ----------
 50 |         det_iter : DetIter
 51 |             iterator for all testing images
 52 |         show_timer : Boolean
 53 |             whether to print out detection exec time
 54 | 
 55 |         Returns:
 56 |         ----------
 57 |         list of detection results
 58 |         """
 59 |         num_images = det_iter._size
 60 |         result = []
 61 |         detections = []
 62 |         if not isinstance(det_iter, mx.io.PrefetchingIter):
 63 |             det_iter = mx.io.PrefetchingIter(det_iter)
 64 |         start = timer()
 65 |         for pred, _, _ in self.mod.iter_predict(det_iter):
 66 |             detections.append(pred[0].asnumpy())
 67 |         time_elapsed = timer() - start
 68 |         if show_timer:
 69 |             print("Detection time for {} images: {:.4f} sec".format(
 70 |                 num_images, time_elapsed))
 71 |         for output in detections:
 72 |             for i in range(output.shape[0]):
 73 |                 det = output[i, :, :]
 74 |                 res = det[np.where(det[:, 0] >= 0)[0]]
 75 |                 result.append(res)
 76 |         return result
 77 | 
 78 |     def im_detect(self, im_list, root_dir=None, extension=None, show_timer=False):
 79 |         """
 80 |         wrapper for detecting multiple images
 81 | 
 82 |         Parameters:
 83 |         ----------
 84 |         im_list : list of str
 85 |             image path or list of image paths
 86 |         root_dir : str
 87 |             directory of input images, optional if image path already
 88 |             has full directory information
 89 |         extension : str
 90 |             image extension, eg. ".jpg", optional
 91 | 
 92 |         Returns:
 93 |         ----------
 94 |         list of detection results in format [det0, det1...], det is in
 95 |         format np.array([id, score, xmin, ymin, xmax, ymax]...)
 96 |         """
 97 |         test_db = TestDB(im_list, root_dir=root_dir, extension=extension)
 98 |         test_iter = DetIter(test_db, 1, self.data_shape, self.mean_pixels,
 99 |                             is_train=False)
100 |         return self.detect(test_iter, show_timer)
101 | 
102 |     def visualize_detection(self, img, dets, classes=[], thresh=0.6):
103 |         """
104 |         visualize detections in one image
105 | 
106 |         Parameters:
107 |         ----------
108 |         img : numpy.array
109 |             image, in bgr format
110 |         dets : numpy.array
111 |             ssd detections, numpy.array([[id, score, x1, y1, x2, y2]...])
112 |             each row is one object
113 |         classes : tuple or list of str
114 |             class names
115 |         thresh : float
116 |             score threshold
117 |         """
118 |         import matplotlib.pyplot as plt
119 |         import random
120 |         plt.imshow(img)
121 |         height = img.shape[0]
122 |         width = img.shape[1]
123 |         colors = dict()
124 |         for i in range(dets.shape[0]):
125 |             cls_id = int(dets[i, 0])
126 |             if cls_id >= 0:
127 |                 score = dets[i, 1]
128 |                 if score > thresh:
129 |                     if cls_id not in colors:
130 |                         colors[cls_id] = (random.random(), random.random(), random.random())
131 |                     xmin = int(dets[i, 2] * width)
132 |                     ymin = int(dets[i, 3] * height)
133 |                     xmax = int(dets[i, 4] * width)
134 |                     ymax = int(dets[i, 5] * height)
135 |                     rect = plt.Rectangle((xmin, ymin), xmax - xmin,
136 |                                          ymax - ymin, fill=False,
137 |                                          edgecolor=colors[cls_id],
138 |                                          linewidth=3.5)
139 |                     plt.gca().add_patch(rect)
140 |                     class_name = str(cls_id)
141 |                     if classes and len(classes) > cls_id:
142 |                         class_name = classes[cls_id]
143 |                     plt.gca().text(xmin, ymin - 2,
144 |                                     '{:s} {:.3f}'.format(class_name, score),
145 |                                     bbox=dict(facecolor=colors[cls_id], alpha=0.5),
146 |                                     fontsize=12, color='white')
147 |         plt.show()
148 | 
149 |     def detect_and_visualize(self, im_list, root_dir=None, extension=None,
150 |                              classes=[], thresh=0.6, show_timer=False):
151 |         """
152 |         wrapper for im_detect and visualize_detection
153 | 
154 |         Parameters:
155 |         ----------
156 |         im_list : list of str or str
157 |             image path or list of image paths
158 |         root_dir : str or None
159 |             directory of input images, optional if image path already
160 |             has full directory information
161 |         extension : str or None
162 |             image extension, eg. ".jpg", optional
163 | 
164 |         Returns:
165 |         ----------
166 | 
167 |         """
168 |         import cv2
169 |         dets = self.im_detect(im_list, root_dir, extension, show_timer=show_timer)
170 |         if not isinstance(im_list, list):
171 |             im_list = [im_list]
172 |         assert len(dets) == len(im_list)
173 |         for k, det in enumerate(dets):
174 |             img = cv2.imread(im_list[k])
175 |             img[:, :, (0, 1, 2)] = img[:, :, (2, 1, 0)]
176 |             self.visualize_detection(img, det, classes, thresh)
177 | 


--------------------------------------------------------------------------------
/symbol/symbol_builder.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | from common import multi_layer_feature, multibox_layer
  3 | 
  4 | 
  5 | def import_module(module_name):
  6 |     """Helper function to import module"""
  7 |     import sys, os
  8 |     import importlib
  9 |     sys.path.append(os.path.dirname(__file__))
 10 |     return importlib.import_module(module_name)
 11 | 
 12 | def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pads,
 13 |                      sizes, ratios, normalizations=-1, steps=[], min_filter=128,
 14 |                      nms_thresh=0.5, force_suppress=False, nms_topk=400, minimum_negative_samples=0, **kwargs):
 15 |     """Build network symbol for training SSD
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     network : str
 20 |         base network symbol name
 21 |     num_classes : int
 22 |         number of object classes not including background
 23 |     from_layers : list of str
 24 |         feature extraction layers, use '' for add extra layers
 25 |         For example:
 26 |         from_layers = ['relu4_3', 'fc7', '', '', '', '']
 27 |         which means extract feature from relu4_3 and fc7, adding 4 extra layers
 28 |         on top of fc7
 29 |     num_filters : list of int
 30 |         number of filters for extra layers, you can use -1 for extracted features,
 31 |         however, if normalization and scale is applied, the number of filter for
 32 |         that layer must be provided.
 33 |         For example:
 34 |         num_filters = [512, -1, 512, 256, 256, 256]
 35 |     strides : list of int
 36 |         strides for the 3x3 convolution appended, -1 can be used for extracted
 37 |         feature layers
 38 |     pads : list of int
 39 |         paddings for the 3x3 convolution, -1 can be used for extracted layers
 40 |     sizes : list or list of list
 41 |         [min_size, max_size] for all layers or [[], [], []...] for specific layers
 42 |     ratios : list or list of list
 43 |         [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
 44 |     normalizations : int or list of int
 45 |         use normalizations value for all layers or [...] for specific layers,
 46 |         -1 indicate no normalizations and scales
 47 |     steps : list
 48 |         specify steps for each MultiBoxPrior layer, leave empty, it will calculate
 49 |         according to layer dimensions
 50 |     min_filter : int
 51 |         minimum number of filters used in 1x1 convolution
 52 |     nms_thresh : float
 53 |         non-maximum suppression threshold
 54 |     force_suppress : boolean
 55 |         whether suppress different class objects
 56 |     nms_topk : int
 57 |         apply NMS to top K detections
 58 |     minimum_negative_samples : int
 59 |         always have some negative examples, no matter how many positive there are.
 60 |         this is useful when training on images with no ground-truth.
 61 |     Returns
 62 |     -------
 63 |     mx.Symbol
 64 | 
 65 |     """
 66 |     label = mx.sym.Variable('label')
 67 |     body = import_module(network).get_symbol(num_classes=num_classes, **kwargs)
 68 |     layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
 69 |         min_filter=min_filter)
 70 | 
 71 |     loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
 72 |         num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
 73 |         num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
 74 | 
 75 |     tmp = mx.contrib.symbol.MultiBoxTarget(
 76 |         *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
 77 |         ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=minimum_negative_samples, \
 78 |         negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
 79 |         name="multibox_target")
 80 |     loc_target = tmp[0]
 81 |     loc_target_mask = tmp[1]
 82 |     cls_target = tmp[2]
 83 | 
 84 |     cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
 85 |         ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
 86 |         normalization='valid', name="cls_prob")
 87 |     loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
 88 |         data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
 89 |     loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
 90 |         normalization='valid', name="loc_loss")
 91 | 
 92 |     # monitoring training status
 93 |     cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
 94 |     det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
 95 |         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
 96 |         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
 97 |     det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
 98 | 
 99 |     # group output
100 |     out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
101 |     return out
102 | 
103 | def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios,
104 |                strides, pads, normalizations=-1, steps=[], min_filter=128,
105 |                nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
106 |     """Build network for testing SSD
107 | 
108 |     Parameters
109 |     ----------
110 |     network : str
111 |         base network symbol name
112 |     num_classes : int
113 |         number of object classes not including background
114 |     from_layers : list of str
115 |         feature extraction layers, use '' for add extra layers
116 |         For example:
117 |         from_layers = ['relu4_3', 'fc7', '', '', '', '']
118 |         which means extract feature from relu4_3 and fc7, adding 4 extra layers
119 |         on top of fc7
120 |     num_filters : list of int
121 |         number of filters for extra layers, you can use -1 for extracted features,
122 |         however, if normalization and scale is applied, the number of filter for
123 |         that layer must be provided.
124 |         For example:
125 |         num_filters = [512, -1, 512, 256, 256, 256]
126 |     strides : list of int
127 |         strides for the 3x3 convolution appended, -1 can be used for extracted
128 |         feature layers
129 |     pads : list of int
130 |         paddings for the 3x3 convolution, -1 can be used for extracted layers
131 |     sizes : list or list of list
132 |         [min_size, max_size] for all layers or [[], [], []...] for specific layers
133 |     ratios : list or list of list
134 |         [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
135 |     normalizations : int or list of int
136 |         use normalizations value for all layers or [...] for specific layers,
137 |         -1 indicate no normalizations and scales
138 |     steps : list
139 |         specify steps for each MultiBoxPrior layer, leave empty, it will calculate
140 |         according to layer dimensions
141 |     min_filter : int
142 |         minimum number of filters used in 1x1 convolution
143 |     nms_thresh : float
144 |         non-maximum suppression threshold
145 |     force_suppress : boolean
146 |         whether suppress different class objects
147 |     nms_topk : int
148 |         apply NMS to top K detections
149 | 
150 |     Returns
151 |     -------
152 |     mx.Symbol
153 | 
154 |     """
155 |     body = import_module(network).get_symbol(num_classes=num_classes, **kwargs)
156 |     layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
157 |         min_filter=min_filter)
158 | 
159 |     loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
160 |         num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
161 |         num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
162 | 
163 |     cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
164 |         name='cls_prob')
165 |     out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
166 |         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
167 |         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
168 |     return out
169 | 


--------------------------------------------------------------------------------
/symbol/resnet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
  3 | Original author Wei Wu
  4 | 
  5 | Implemented the following paper:
  6 | 
  7 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks"
  8 | '''
  9 | import mxnet as mx
 10 | 
 11 | def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False):
 12 |     """Return ResNet Unit symbol for building ResNet
 13 |     Parameters
 14 |     ----------
 15 |     data : str
 16 |         Input data
 17 |     num_filter : int
 18 |         Number of output channels
 19 |     bnf : int
 20 |         Bottle neck channels factor with regard to num_filter
 21 |     stride : tupe
 22 |         Stride used in convolution
 23 |     dim_match : Boolen
 24 |         True means channel number between input and output is the same, otherwise means differ
 25 |     name : str
 26 |         Base name of the operators
 27 |     workspace : int
 28 |         Workspace used in convolution operator
 29 |     """
 30 |     if bottle_neck:
 31 |         # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
 32 |         bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
 33 |         act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
 34 |         conv1 = mx.sym.Convolution(data=act1, num_filter=int(num_filter*0.25), kernel=(1,1), stride=(1,1), pad=(0,0),
 35 |                                    no_bias=True, workspace=workspace, name=name + '_conv1')
 36 |         bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
 37 |         act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
 38 |         conv2 = mx.sym.Convolution(data=act2, num_filter=int(num_filter*0.25), kernel=(3,3), stride=stride, pad=(1,1),
 39 |                                    no_bias=True, workspace=workspace, name=name + '_conv2')
 40 |         bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
 41 |         act3 = mx.sym.Activation(data=bn3, act_type='relu', name=name + '_relu3')
 42 |         conv3 = mx.sym.Convolution(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
 43 |                                    workspace=workspace, name=name + '_conv3')
 44 |         if dim_match:
 45 |             shortcut = data
 46 |         else:
 47 |             shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
 48 |                                             workspace=workspace, name=name+'_sc')
 49 |         if memonger:
 50 |             shortcut._set_attr(mirror_stage='True')
 51 |         return conv3 + shortcut
 52 |     else:
 53 |         bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1')
 54 |         act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
 55 |         conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1),
 56 |                                       no_bias=True, workspace=workspace, name=name + '_conv1')
 57 |         bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
 58 |         act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
 59 |         conv2 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
 60 |                                       no_bias=True, workspace=workspace, name=name + '_conv2')
 61 |         if dim_match:
 62 |             shortcut = data
 63 |         else:
 64 |             shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
 65 |                                             workspace=workspace, name=name+'_sc')
 66 |         if memonger:
 67 |             shortcut._set_attr(mirror_stage='True')
 68 |         return conv2 + shortcut
 69 | 
 70 | def resnet(units, num_stages, filter_list, num_classes, image_shape, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False):
 71 |     """Return ResNet symbol of
 72 |     Parameters
 73 |     ----------
 74 |     units : list
 75 |         Number of units in each stage
 76 |     num_stages : int
 77 |         Number of stage
 78 |     filter_list : list
 79 |         Channel size of each stage
 80 |     num_classes : int
 81 |         Ouput size of symbol
 82 |     dataset : str
 83 |         Dataset type, only cifar10 and imagenet supports
 84 |     workspace : int
 85 |         Workspace used in convolution operator
 86 |     """
 87 |     num_unit = len(units)
 88 |     assert(num_unit == num_stages)
 89 |     data = mx.sym.Variable(name='data')
 90 |     data = mx.sym.identity(data=data, name='id')
 91 |     data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
 92 |     (nchannel, height, width) = image_shape
 93 |     if height <= 32:            # such as cifar10
 94 |         body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1),
 95 |                                   no_bias=True, name="conv0", workspace=workspace)
 96 |     else:                       # often expected to be 224 such as imagenet
 97 |         body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3),
 98 |                                   no_bias=True, name="conv0", workspace=workspace)
 99 |         body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
100 |         body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
101 |         body = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
102 | 
103 |     for i in range(num_stages):
104 |         body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False,
105 |                              name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace,
106 |                              memonger=memonger)
107 |         for j in range(units[i]-1):
108 |             body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2),
109 |                                  bottle_neck=bottle_neck, workspace=workspace, memonger=memonger)
110 |     bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
111 |     relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1')
112 |     # Although kernel is not used here when global_pool=True, we should put one
113 |     pool1 = mx.symbol.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
114 |     flat = mx.symbol.Flatten(data=pool1)
115 |     fc1 = mx.symbol.FullyConnected(data=flat, num_hidden=num_classes, name='fc1')
116 |     return mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
117 | 
118 | def get_symbol(num_classes, num_layers, image_shape, conv_workspace=256, **kwargs):
119 |     """
120 |     Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
121 |     Original author Wei Wu
122 |     """
123 |     image_shape = [int(l) for l in image_shape.split(',')]
124 |     (nchannel, height, width) = image_shape
125 |     if height <= 28:
126 |         num_stages = 3
127 |         if (num_layers-2) % 9 == 0 and num_layers >= 164:
128 |             per_unit = [(num_layers-2)//9]
129 |             filter_list = [16, 64, 128, 256]
130 |             bottle_neck = True
131 |         elif (num_layers-2) % 6 == 0 and num_layers < 164:
132 |             per_unit = [(num_layers-2)//6]
133 |             filter_list = [16, 16, 32, 64]
134 |             bottle_neck = False
135 |         else:
136 |             raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
137 |         units = per_unit * num_stages
138 |     else:
139 |         if num_layers >= 50:
140 |             filter_list = [64, 256, 512, 1024, 2048]
141 |             bottle_neck = True
142 |         else:
143 |             filter_list = [64, 64, 128, 256, 512]
144 |             bottle_neck = False
145 |         num_stages = 4
146 |         if num_layers == 18:
147 |             units = [2, 2, 2, 2]
148 |         elif num_layers == 34:
149 |             units = [3, 4, 6, 3]
150 |         elif num_layers == 50:
151 |             units = [3, 4, 6, 3]
152 |         elif num_layers == 101:
153 |             units = [3, 4, 23, 3]
154 |         elif num_layers == 152:
155 |             units = [3, 8, 36, 3]
156 |         elif num_layers == 200:
157 |             units = [3, 24, 36, 3]
158 |         elif num_layers == 269:
159 |             units = [3, 30, 48, 8]
160 |         else:
161 |             raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
162 | 
163 |     return resnet(units       = units,
164 |                   num_stages  = num_stages,
165 |                   filter_list = filter_list,
166 |                   num_classes = num_classes,
167 |                   image_shape = image_shape,
168 |                   bottle_neck = bottle_neck,
169 |                   workspace   = conv_workspace)
170 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import tools.find_mxnet
  3 | import mxnet as mx
  4 | import os
  5 | import sys
  6 | from train.train_net import train_net
  7 | 
  8 | 
  9 | def parse_args():
 10 |     parser = argparse.ArgumentParser(description='Train a Single-shot detection network')
 11 |     parser.add_argument('--train-path', dest='train_path', help='train record to use',
 12 |                         default=os.path.join(os.getcwd(), 'data', 'train.rec'), type=str)
 13 |     parser.add_argument('--train-list', dest='train_list', help='train list to use',
 14 |                         default="", type=str)
 15 |     parser.add_argument('--val-path', dest='val_path', help='validation record to use',
 16 |                         default=os.path.join(os.getcwd(), 'data', 'val.rec'), type=str)
 17 |     parser.add_argument('--val-list', dest='val_list', help='validation list to use',
 18 |                         default="", type=str)
 19 |     parser.add_argument('--network', dest='network', type=str, default='vgg16_reduced',
 20 |                         help='which network to use')
 21 |     parser.add_argument('--batch-size', dest='batch_size', type=int, default=32,
 22 |                         help='training batch size')
 23 |     parser.add_argument('--resume', dest='resume', type=int, default=-1,
 24 |                         help='resume training from epoch n')
 25 |     parser.add_argument('--finetune', dest='finetune', type=int, default=-1,
 26 |                         help='finetune from epoch n, rename the model before doing this')
 27 |     parser.add_argument('--pretrained', dest='pretrained', help='pretrained model prefix',
 28 |                         default=os.path.join(os.getcwd(), 'model', 'vgg16_reduced'), type=str)
 29 |     parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model',
 30 |                         default=1, type=int)
 31 |     parser.add_argument('--prefix', dest='prefix', help='new model prefix',
 32 |                         default=os.path.join(os.getcwd(), 'output', 'exp1', 'ssd'), type=str)
 33 |     parser.add_argument('--gpus', dest='gpus', help='GPU devices to train with',
 34 |                         default='0', type=str)
 35 |     parser.add_argument('--begin-epoch', dest='begin_epoch', help='begin epoch of training',
 36 |                         default=0, type=int)
 37 |     parser.add_argument('--end-epoch', dest='end_epoch', help='end epoch of training',
 38 |                         default=240, type=int)
 39 |     parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
 40 |                         default=20, type=int)
 41 |     parser.add_argument('--data-shape', dest='data_shape', type=int, default=300,
 42 |                         help='set image shape')
 43 |     parser.add_argument('--label-width', dest='label_width', type=int, default=350,
 44 |                         help='force padding label width to sync across train and validation')
 45 |     parser.add_argument('--optimizer', dest='optimizer', type=str, default='sgd',
 46 |                         help='Whether to use a different optimizer or follow the original code with sgd')
 47 |     parser.add_argument('--lr', dest='learning_rate', type=float, default=0.004,
 48 |                         help='learning rate')
 49 |     parser.add_argument('--momentum', dest='momentum', type=float, default=0.9,
 50 |                         help='momentum')
 51 |     parser.add_argument('--wd', dest='weight_decay', type=float, default=0.0005,
 52 |                         help='weight decay')
 53 |     parser.add_argument('--mean-r', dest='mean_r', type=float, default=123,
 54 |                         help='red mean value')
 55 |     parser.add_argument('--mean-g', dest='mean_g', type=float, default=117,
 56 |                         help='green mean value')
 57 |     parser.add_argument('--mean-b', dest='mean_b', type=float, default=104,
 58 |                         help='blue mean value')
 59 |     parser.add_argument('--lr-steps', dest='lr_refactor_step', type=str, default='80, 160',
 60 |                         help='refactor learning rate at specified epochs')
 61 |     parser.add_argument('--lr-factor', dest='lr_refactor_ratio', type=str, default=0.1,
 62 |                         help='ratio to refactor learning rate')
 63 |     parser.add_argument('--freeze', dest='freeze_pattern', type=str, default="^(conv1_|conv2_).*",
 64 |                         help='freeze layer pattern')
 65 |     parser.add_argument('--log', dest='log_file', type=str, default="train.log",
 66 |                         help='save training log to file')
 67 |     parser.add_argument('--monitor', dest='monitor', type=int, default=0,
 68 |                         help='log network parameters every N iters if larger than 0')
 69 |     parser.add_argument('--pattern', dest='monitor_pattern', type=str, default=".*",
 70 |                         help='monitor parameter pattern, as regex')
 71 |     parser.add_argument('--num-class', dest='num_class', type=int, default=20,
 72 |                         help='number of classes')
 73 |     parser.add_argument('--num-example', dest='num_example', type=int, default=16551,
 74 |                         help='number of image examples')
 75 |     parser.add_argument('--class-names', dest='class_names', type=str,
 76 |                         default='aeroplane, bicycle, bird, boat, bottle, bus, \
 77 |                         car, cat, chair, cow, diningtable, dog, horse, motorbike, \
 78 |                         person, pottedplant, sheep, sofa, train, tvmonitor',
 79 |                         help='string of comma separated names, or text filename')
 80 |     parser.add_argument('--nms', dest='nms_thresh', type=float, default=0.45,
 81 |                         help='non-maximum suppression threshold')
 82 |     parser.add_argument('--nms_topk', dest='nms_topk', type=int, default=400,
 83 |                         help='final number of detections')
 84 |     parser.add_argument('--overlap', dest='overlap_thresh', type=float, default=0.5,
 85 |                         help='evaluation overlap threshold')
 86 |     parser.add_argument('--force', dest='force_nms', type=bool, default=False,
 87 |                         help='force non-maximum suppression on different class')
 88 |     parser.add_argument('--use-difficult', dest='use_difficult', type=bool, default=False,
 89 |                         help='use difficult ground-truths in evaluation')
 90 |     parser.add_argument('--voc07', dest='use_voc07_metric', type=bool, default=True,
 91 |                         help='use PASCAL VOC 07 11-point metric')
 92 |     parser.add_argument('--tensorboard', dest='tensorboard', type=bool, default=False,
 93 |                         help='save metrics into tensorboard readable files')
 94 |     parser.add_argument('--min_neg_samples', dest='min_neg_samples', type=int, default=0,
 95 |                         help='min number of negative samples taken in hard mining.')
 96 | 
 97 |     args = parser.parse_args()
 98 |     return args
 99 | 
100 | def parse_class_names(args):
101 |     """ parse # classes and class_names if applicable """
102 |     num_class = args.num_class
103 |     if len(args.class_names) > 0:
104 |         if os.path.isfile(args.class_names):
105 |             # try to open it to read class names
106 |             with open(args.class_names, 'r') as f:
107 |                 class_names = [l.strip() for l in f.readlines()]
108 |         else:
109 |             class_names = [c.strip() for c in args.class_names.split(',')]
110 |         assert len(class_names) == num_class, str(len(class_names))
111 |         for name in class_names:
112 |             assert len(name) > 0
113 |     else:
114 |         class_names = None
115 |     return class_names
116 | 
117 | if __name__ == '__main__':
118 |     args = parse_args()
119 |     # context list
120 |     ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
121 |     ctx = [mx.cpu()] if not ctx else ctx
122 |     # class names if applicable
123 |     class_names = parse_class_names(args)
124 |     # start training
125 |     train_net(args.network, args.train_path,
126 |               args.num_class, args.batch_size,
127 |               args.data_shape, [args.mean_r, args.mean_g, args.mean_b],
128 |               args.resume, args.finetune, args.pretrained,
129 |               args.epoch, args.prefix, ctx, args.begin_epoch, args.end_epoch,
130 |               args.frequent, args.learning_rate, args.momentum, args.weight_decay,
131 |               args.lr_refactor_step, args.lr_refactor_ratio,
132 |               val_path=args.val_path,
133 |               min_neg_samples=args.min_neg_samples,
134 |               num_example=args.num_example,
135 |               class_names=class_names,
136 |               label_pad_width=args.label_width,
137 |               freeze_layer_pattern=args.freeze_pattern,
138 |               iter_monitor=args.monitor,
139 |               monitor_pattern=args.monitor_pattern,
140 |               log_file=args.log_file,
141 |               nms_thresh=args.nms_thresh,
142 |               nms_topk=args.nms_topk,
143 |               force_nms=args.force_nms,
144 |               ovp_thresh=args.overlap_thresh,
145 |               use_difficult=args.use_difficult,
146 |               voc07_metric=args.use_voc07_metric,
147 |               optimizer=args.optimizer,
148 |               tensorboard=args.tensorboard)
149 | 


--------------------------------------------------------------------------------
/symbol/densenet.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
  3 | Original author Wei Wu
  4 | Referenced https://github.com/bamos/densenet.pytorch/blob/master/densenet.py
  5 | Original author bamos
  6 | Referenced https://github.com/andreasveit/densenet-pytorch/blob/master/densenet.py
  7 | Original author andreasveit
  8 | Referenced https://github.com/Nicatio/Densenet/blob/master/mxnet/symbol_densenet.py
  9 | Original author Nicatio
 10 | 
 11 | Implemented the following paper:     DenseNet-BC
 12 | Gao Huang, Zhuang Liu, Kilian Q. Weinberger, Laurens van der Maaten. "Densely Connected Convolutional Networks"
 13 | 
 14 | Coded by Lin Xiong Mar-1, 2017
 15 | """
 16 | import mxnet as mx
 17 | import math
 18 | 
 19 | def BasicBlock(data, growth_rate, stride, name, bottle_neck=True, drop_out=0.0, bn_mom=0.9, workspace=512):
 20 |     """Return BaiscBlock Unit symbol for building DenseBlock
 21 |     Parameters
 22 |     ----------
 23 |     data : str
 24 |         Input data
 25 |     growth_rate : int
 26 |         Number of output channels
 27 |     stride : tupe
 28 |         Stride used in convolution
 29 |     drop_out : float
 30 |         Probability of an element to be zeroed. Default = 0.2
 31 |     name : str
 32 |         Base name of the operators
 33 |     workspace : int
 34 |         Workspace used in convolution operator
 35 |     """
 36 |     # import pdb
 37 |     # pdb.set_trace()
 38 | 
 39 |     if bottle_neck:
 40 |         # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
 41 |         bn1   = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
 42 |         act1  = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
 43 |         conv1 = mx.sym.Convolution(data=act1, num_filter=int(growth_rate*4), kernel=(1,1), stride=(1,1), pad=(0,0),
 44 |                                       no_bias=True, workspace=workspace, name=name + '_conv1')
 45 |         if drop_out > 0:
 46 |             conv1 = mx.symbol.Dropout(data=conv1, p=drop_out, name=name + '_dp1')
 47 |         bn2   = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
 48 |         act2  = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
 49 |         conv2 = mx.sym.Convolution(data=act2, num_filter=int(growth_rate), kernel=(3,3), stride=stride, pad=(1,1),
 50 |                                       no_bias=True, workspace=workspace, name=name + '_conv2')
 51 |         if drop_out > 0:
 52 |             conv2 = mx.symbol.Dropout(data=conv2, p=drop_out, name=name + '_dp2')
 53 |         #return mx.symbol.Concat(data, conv2, name=name + '_concat0')
 54 |         return conv2
 55 |     else:
 56 |         bn1   = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
 57 |         act1  = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
 58 |         conv1 = mx.sym.Convolution(data=act1, num_filter=int(growth_rate), kernel=(3,3), stride=(1,1), pad=(1,1),
 59 |                                       no_bias=True, workspace=workspace, name=name + '_conv1')
 60 |         if drop_out > 0:
 61 |             conv1 = mx.symbol.Dropout(data=conv1, p=drop_out, name=name + '_dp1')
 62 |         #return mx.symbol.Concat(data, conv1, name=name + '_concat0')
 63 |         return conv1
 64 | 
 65 | def DenseBlock(units_num, data, growth_rate, name, bottle_neck=True, drop_out=0.0, bn_mom=0.9, workspace=512):
 66 |     """Return DenseBlock Unit symbol for building DenseNet
 67 |     Parameters
 68 |     ----------
 69 |     units_num : int
 70 |         the number of BasicBlock in each DenseBlock
 71 |     data : str
 72 |         Input data
 73 |     growth_rate : int
 74 |         Number of output channels
 75 |     drop_out : float
 76 |         Probability of an element to be zeroed. Default = 0.2
 77 |     workspace : int
 78 |         Workspace used in convolution operator
 79 |     """
 80 |     # import pdb
 81 |     # pdb.set_trace()
 82 | 
 83 |     for i in range(units_num):
 84 |         Block = BasicBlock(data, growth_rate=growth_rate, stride=(1,1), name=name + '_unit%d' % (i+1),
 85 |                             bottle_neck=bottle_neck, drop_out=drop_out,
 86 |                             bn_mom=bn_mom, workspace=workspace)
 87 |         data = mx.symbol.Concat(data, Block, name=name + '_concat%d' % (i+1))
 88 |     return data
 89 | 
 90 | def TransitionBlock(num_stage, data, num_filter, stride, name, drop_out=0.0, bn_mom=0.9, workspace=512):
 91 |     """Return TransitionBlock Unit symbol for building DenseNet
 92 |     Parameters
 93 |     ----------
 94 |     num_stage : int
 95 |         Number of stage
 96 |     data : str
 97 |         Input data
 98 |     num : int
 99 |         Number of output channels
100 |     stride : tupe
101 |         Stride used in convolution
102 |     name : str
103 |         Base name of the operators
104 |     drop_out : float
105 |         Probability of an element to be zeroed. Default = 0.2
106 |     workspace : int
107 |         Workspace used in convolution operator
108 |     """
109 |     bn1   = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
110 |     act1  = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
111 |     conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter,
112 |                                 kernel=(1,1), stride=stride, pad=(0,0), no_bias=True,
113 |                                 workspace=workspace, name=name + '_conv1')
114 |     if drop_out > 0:
115 |         conv1 = mx.symbol.Dropout(data=conv1, p=drop_out, name=name + '_dp1')
116 |     return mx.symbol.Pooling(conv1, global_pool=False, kernel=(2,2), stride=(2,2), pool_type='avg', name=name + '_pool%d' % (num_stage+1))
117 | 
118 | def get_symbol(units, num_stage, growth_rate, num_classes, data_type, reduction=0.5, drop_out=0., bottle_neck=True, bn_mom=0.9, workspace=512, **kwargs):
119 |     """Return DenseNet symbol of imagenet
120 |     Parameters
121 |     ----------
122 |     units : list
123 |         Number of units in each stage
124 |     num_stage : int
125 |         Number of stage
126 |     growth_rate : int
127 |         Number of output channels
128 |     num_classes : int
129 |         Ouput size of symbol
130 |     data_type : str
131 |         the type of dataset
132 |     reduction : float
133 |         Compression ratio. Default = 0.5
134 |     drop_out : float
135 |         Probability of an element to be zeroed. Default = 0.2
136 |     workspace : int
137 |         Workspace used in convolution operator
138 |     """
139 |     num_unit = len(units)
140 |     assert(num_unit == num_stage)
141 |     init_channels = 2 * growth_rate
142 |     n_channels = init_channels
143 |     data = mx.sym.Variable(name='data')
144 |     data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
145 |     if data_type == 'imagenet':
146 |         body = mx.sym.Convolution(data=data, num_filter=growth_rate*2, kernel=(7, 7), stride=(2,2), pad=(3, 3),
147 |                                   no_bias=True, name="conv0", workspace=workspace)
148 |         body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
149 |         body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
150 |         body = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
151 |     elif data_type == 'vggface':
152 |         body = mx.sym.Convolution(data=data, num_filter=growth_rate*2, kernel=(7, 7), stride=(2,2), pad=(3, 3),
153 |                                   no_bias=True, name="conv0", workspace=workspace)
154 |         body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
155 |         body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
156 |         body = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
157 |     elif data_type == 'msface':
158 |         body = mx.sym.Convolution(data=data, num_filter=growth_rate*2, kernel=(7, 7), stride=(2,2), pad=(3, 3),
159 |                                   no_bias=True, name="conv0", workspace=workspace)
160 |         body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
161 |         body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
162 |         body = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
163 |     else:
164 |         raise ValueError("do not support {} yet".format(data_type))
165 |     for i in range(num_stage-1):
166 |         body = DenseBlock(units[i], body, growth_rate=growth_rate, name='DBstage%d' % (i + 1), bottle_neck=bottle_neck, drop_out=drop_out, bn_mom=bn_mom, workspace=workspace)
167 |         n_channels += units[i]*growth_rate
168 |         n_channels = int(math.floor(n_channels*reduction))
169 |         body = TransitionBlock(i, body, n_channels, stride=(1,1), name='TBstage%d' % (i + 1), drop_out=drop_out, bn_mom=bn_mom, workspace=workspace)
170 |     body = DenseBlock(units[num_stage-1], body, growth_rate=growth_rate, name='DBstage%d' % (num_stage), bottle_neck=bottle_neck, drop_out=drop_out, bn_mom=bn_mom, workspace=workspace)
171 |     bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
172 |     relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1')
173 |     pool1 = mx.symbol.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
174 |     flat = mx.symbol.Flatten(data=pool1)
175 |     fc1 = mx.symbol.FullyConnected(data=flat, num_hidden=num_classes, name='fc1')
176 |     return mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
177 | 


--------------------------------------------------------------------------------
/symbol/symbol_factory.py:
--------------------------------------------------------------------------------
  1 | """Presets for various network configurations"""
  2 | import logging
  3 | from symbol import symbol_builder
  4 | import numpy as np
  5 | 
  6 | def get_scales(min_scale=0.2, max_scale=0.9,num_layers=6):
  7 |     """ Following the ssd arxiv paper, regarding the calculation of scales & ratios
  8 | 
  9 |     Parameters
 10 |     ----------
 11 |     min_scale : float
 12 |     max_scales: float
 13 |     num_layers: int
 14 |         number of layers that will have a detection head
 15 |     anchor_ratios: list
 16 |     first_layer_ratios: list
 17 | 
 18 |     return
 19 |     ------
 20 |     sizes : list
 21 |         list of scale sizes per feature layer
 22 |     ratios : list
 23 |         list of anchor_ratios per feature layer
 24 |     """
 25 | 
 26 |     # this code follows the original implementation of wei liu
 27 |     # for more, look at ssd/score_ssd_pascal.py:310 in the original caffe implementation
 28 |     min_ratio = int(min_scale * 100)
 29 |     max_ratio = int(max_scale * 100)
 30 |     step = int(np.floor((max_ratio - min_ratio) / (num_layers - 2)))
 31 |     min_sizes = []
 32 |     max_sizes = []
 33 |     for ratio in xrange(min_ratio, max_ratio + 1, step):
 34 |         min_sizes.append(ratio / 100.)
 35 |         max_sizes.append((ratio + step) / 100.)
 36 |     min_sizes = [int(100*min_scale / 2.0) / 100.0] + min_sizes
 37 |     max_sizes = [min_scale] + max_sizes
 38 | 
 39 |     # convert it back to this implementation's notation:
 40 |     scales = []
 41 |     for layer_idx in range(num_layers):
 42 |         scales.append([min_sizes[layer_idx], np.single(np.sqrt(min_sizes[layer_idx] * max_sizes[layer_idx]))])
 43 |     return scales
 44 | 
 45 | def get_config(network, data_shape, **kwargs):
 46 |     """Configuration factory for various networks
 47 | 
 48 |     Parameters
 49 |     ----------
 50 |     network : str
 51 |         base network name, such as vgg_reduced, inceptionv3, resnet...
 52 |     data_shape : int
 53 |         input data dimension
 54 |     kwargs : dict
 55 |         extra arguments
 56 |     """
 57 |     if network == 'vgg16_reduced':
 58 |         if data_shape >= 448:
 59 |             from_layers = ['relu4_3', 'relu7', '', '', '', '', '']
 60 |             num_filters = [512, -1, 512, 256, 256, 256, 256]
 61 |             strides = [-1, -1, 2, 2, 2, 2, 1]
 62 |             pads = [-1, -1, 1, 1, 1, 1, 1]
 63 |             sizes = get_scales(min_scale=0.15, max_scale=0.9, num_layers=len(from_layers))
 64 |             ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
 65 |                 [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
 66 |             normalizations = [20, -1, -1, -1, -1, -1, -1]
 67 |             steps = [] if data_shape != 512 else [x / 512.0 for x in
 68 |                 [8, 16, 32, 64, 128, 256, 512]]
 69 |         else:
 70 |             from_layers = ['relu4_3', 'relu7', '', '', '', '']
 71 |             num_filters = [512, -1, 512, 256, 256, 256]
 72 |             strides = [-1, -1, 2, 2, 1, 1]
 73 |             pads = [-1, -1, 1, 1, 0, 0]
 74 |             sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers))
 75 |             ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
 76 |                 [1,2,.5], [1,2,.5]]
 77 |             normalizations = [20, -1, -1, -1, -1, -1]
 78 |             steps = [] if data_shape != 300 else [x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
 79 |         if not (data_shape == 300 or data_shape == 512):
 80 |             logging.warn('data_shape %d was not tested, use with caucious.' % data_shape)
 81 |         return locals()
 82 |     elif network == 'inceptionv3':
 83 |         if data_shape >= 448:
 84 |             from_layers = ['ch_concat_mixed_7_chconcat', 'ch_concat_mixed_10_chconcat', '', '', '', '']
 85 |             num_filters = [-1, -1, 512, 256, 256, 128]
 86 |             strides = [-1, -1, 2, 2, 2, 2]
 87 |             pads = [-1, -1, 1, 1, 1, 1]
 88 |             sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers))
 89 |             ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
 90 |                 [1,2,.5], [1,2,.5]]
 91 |             normalizations = -1
 92 |             steps = []
 93 |         else:
 94 |             from_layers = ['ch_concat_mixed_2_chconcat', 'ch_concat_mixed_7_chconcat', 'ch_concat_mixed_10_chconcat', '', '', '']
 95 |             num_filters = [-1, -1, -1, 256, 256, 128]
 96 |             strides = [-1, -1, -1, 2, 2, 2]
 97 |             pads = [-1, -1, -1, 1, 1, 1]
 98 |             sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers))
 99 |             ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
100 |                 [1,2,.5], [1,2,.5]]
101 |             normalizations = -1
102 |             steps = []
103 |         return locals()
104 |     elif network == 'resnet50':
105 |         num_layers = 50
106 |         image_shape = '3,224,224'  # resnet require it as shape check
107 |         network = 'resnet'
108 |         from_layers = ['_plus12', '_plus15', '', '', '', '']
109 |         num_filters = [-1, -1, 512, 256, 256, 128]
110 |         strides = [-1, -1, 2, 2, 2, 2]
111 |         pads = [-1, -1, 1, 1, 1, 1]
112 |         sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers))
113 |         ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
114 |             [1,2,.5], [1,2,.5]]
115 |         normalizations = -1
116 |         steps = []
117 |         return locals()
118 |     elif network == 'resnet101':
119 |         num_layers = 101
120 |         image_shape = '3,224,224'
121 |         network = 'resnet'
122 |         from_layers = ['_plus29', '_plus32', '', '', '', '']
123 |         num_filters = [-1, -1, 512, 256, 256, 128]
124 |         strides = [-1, -1, 2, 2, 2, 2]
125 |         pads = [-1, -1, 1, 1, 1, 1]
126 |         sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers))
127 |         ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
128 |             [1,2,.5], [1,2,.5]]
129 |         normalizations = -1
130 |         steps = []
131 |         return locals()
132 |     elif network == 'mobilenet':
133 |         from_layers = ['conv_12_relu', 'conv_14_relu', '', '', '', '', '']
134 |         num_filters = [-1, -1, 512, 256, 256, 256, 256]
135 |         strides = [-1, -1, 2, 2, 2, 2, 2]
136 |         pads = [-1, -1, 1, 1, 1, 1, 1]
137 |         sizes = get_scales(min_scale=0.15, max_scale=0.9, num_layers=len(from_layers))
138 |         ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
139 |                   [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
140 |         normalizations = -1
141 |         steps = []
142 |         return locals()
143 |     elif network == 'densenet121':
144 |         network = 'densenet'
145 |         data_type = 'imagenet'
146 |         units = [6, 12, 24, 16]
147 |         num_stage = 4
148 |         growth_rate = 32
149 |         bottle_neck = True
150 |         from_layers = ['DBstage3_concat24', 'DBstage4_concat16', '', '', '', '']
151 |         num_filters = [-1, -1, 256, 256, 256, 128]
152 |         strides = [-1, -1, 2, 2, 2, 2]
153 |         pads = [-1, -1, 1, 1, 1, 1]
154 |         sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers))
155 |         ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
156 |             [1,2,.5], [1,2,.5]]
157 |         normalizations = -1
158 |         steps = []
159 |         return locals()
160 |     elif network == 'densenet-tiny':
161 |         network = 'densenet'
162 |         data_type = 'imagenet'
163 |         units = [6, 12, 18, 12]
164 |         num_stage = 4
165 |         growth_rate = 16
166 |         bottle_neck = True
167 |         from_layers = ['DBstage2_concat12', 'DBstage3_concat18', '', '', '', '']
168 |         num_filters = [-1, -1, 256, 256, 256, 128]
169 |         strides = [-1, -1, 2, 2, 2, 2]
170 |         pads = [-1, -1, 1, 1, 1, 1]
171 |         sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers))
172 |         ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
173 |             [1,2,.5], [1,2,.5]]
174 |         normalizations = -1
175 |         steps = []
176 |         return locals()
177 |     else:
178 |         msg = 'No configuration found for %s with data_shape %d' % (network, data_shape)
179 |         raise NotImplementedError(msg)
180 | 
181 | def get_symbol_train(network, data_shape, **kwargs):
182 |     """Wrapper for get symbol for train
183 | 
184 |     Parameters
185 |     ----------
186 |     network : str
187 |         name for the base network symbol
188 |     data_shape : int
189 |         input shape
190 |     kwargs : dict
191 |         see symbol_builder.get_symbol_train for more details
192 |     """
193 |     if network.startswith('legacy'):
194 |         logging.warn('Using legacy model.')
195 |         return symbol_builder.import_module(network).get_symbol_train(**kwargs)
196 |     config = get_config(network, data_shape, **kwargs).copy()
197 |     config.update(kwargs)
198 |     return symbol_builder.get_symbol_train(**config)
199 | 
200 | def get_symbol(network, data_shape, **kwargs):
201 |     """Wrapper for get symbol for test
202 | 
203 |     Parameters
204 |     ----------
205 |     network : str
206 |         name for the base network symbol
207 |     data_shape : int
208 |         input shape
209 |     kwargs : dict
210 |         see symbol_builder.get_symbol for more details
211 |     """
212 |     if network.startswith('legacy'):
213 |         logging.warn('Using legacy model.')
214 |         return symbol_builder.import_module(network).get_symbol(**kwargs)
215 |     config = get_config(network, data_shape, **kwargs).copy()
216 |     config.update(kwargs)
217 |     return symbol_builder.get_symbol(**config)
218 | 


--------------------------------------------------------------------------------
/symbol/legacy_vgg16_ssd_300.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import mxnet as mx
  3 | from .common import legacy_conv_act_layer
  4 | from .common import multibox_layer
  5 | 
  6 | def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False,
  7 |                      nms_topk=400, **kwargs):
  8 |     """
  9 |     Single-shot multi-box detection with VGG 16 layers ConvNet
 10 |     This is a modified version, with fc6/fc7 layers replaced by conv layers
 11 |     And the network is slightly smaller than original VGG 16 network
 12 |     This is a training network with losses
 13 | 
 14 |     Parameters:
 15 |     ----------
 16 |     num_classes: int
 17 |         number of object classes not including background
 18 |     nms_thresh : float
 19 |         non-maximum suppression threshold
 20 |     force_suppress : boolean
 21 |         whether suppress different class objects
 22 |     nms_topk : int
 23 |         apply NMS to top K detections
 24 | 
 25 |     Returns:
 26 |     ----------
 27 |     mx.Symbol
 28 |     """
 29 |     data = mx.symbol.Variable(name="data")
 30 |     label = mx.symbol.Variable(name="label")
 31 | 
 32 |     # group 1
 33 |     conv1_1 = mx.symbol.Convolution(
 34 |         data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
 35 |     relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
 36 |     conv1_2 = mx.symbol.Convolution(
 37 |         data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
 38 |     relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
 39 |     pool1 = mx.symbol.Pooling(
 40 |         data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
 41 |     # group 2
 42 |     conv2_1 = mx.symbol.Convolution(
 43 |         data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
 44 |     relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
 45 |     conv2_2 = mx.symbol.Convolution(
 46 |         data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
 47 |     relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
 48 |     pool2 = mx.symbol.Pooling(
 49 |         data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
 50 |     # group 3
 51 |     conv3_1 = mx.symbol.Convolution(
 52 |         data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
 53 |     relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
 54 |     conv3_2 = mx.symbol.Convolution(
 55 |         data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
 56 |     relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
 57 |     conv3_3 = mx.symbol.Convolution(
 58 |         data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
 59 |     relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
 60 |     pool3 = mx.symbol.Pooling(
 61 |         data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
 62 |         pooling_convention="full", name="pool3")
 63 |     # group 4
 64 |     conv4_1 = mx.symbol.Convolution(
 65 |         data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
 66 |     relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
 67 |     conv4_2 = mx.symbol.Convolution(
 68 |         data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
 69 |     relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
 70 |     conv4_3 = mx.symbol.Convolution(
 71 |         data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
 72 |     relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
 73 |     pool4 = mx.symbol.Pooling(
 74 |         data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
 75 |     # group 5
 76 |     conv5_1 = mx.symbol.Convolution(
 77 |         data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
 78 |     relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
 79 |     conv5_2 = mx.symbol.Convolution(
 80 |         data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
 81 |     relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
 82 |     conv5_3 = mx.symbol.Convolution(
 83 |         data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
 84 |     relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
 85 |     pool5 = mx.symbol.Pooling(
 86 |         data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
 87 |         pad=(1,1), name="pool5")
 88 |     # group 6
 89 |     conv6 = mx.symbol.Convolution(
 90 |         data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
 91 |         num_filter=1024, name="conv6")
 92 |     relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
 93 |     # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
 94 |     # group 7
 95 |     conv7 = mx.symbol.Convolution(
 96 |         data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
 97 |     relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
 98 |     # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
 99 | 
100 |     ### ssd extra layers ###
101 |     conv8_1, relu8_1 = legacy_conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
102 |         stride=(1,1), act_type="relu", use_batchnorm=False)
103 |     conv8_2, relu8_2 = legacy_conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
104 |         stride=(2,2), act_type="relu", use_batchnorm=False)
105 |     conv9_1, relu9_1 = legacy_conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
106 |         stride=(1,1), act_type="relu", use_batchnorm=False)
107 |     conv9_2, relu9_2 = legacy_conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
108 |         stride=(2,2), act_type="relu", use_batchnorm=False)
109 |     conv10_1, relu10_1 = legacy_conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
110 |         stride=(1,1), act_type="relu", use_batchnorm=False)
111 |     conv10_2, relu10_2 = legacy_conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(0,0), \
112 |         stride=(1,1), act_type="relu", use_batchnorm=False)
113 |     conv11_1, relu11_1 = legacy_conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
114 |         stride=(1,1), act_type="relu", use_batchnorm=False)
115 |     conv11_2, relu11_2 = legacy_conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(0,0), \
116 |         stride=(1,1), act_type="relu", use_batchnorm=False)
117 | 
118 |     # specific parameters for VGG16 network
119 |     from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2]
120 |     sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
121 |     ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
122 |         [1,2,.5], [1,2,.5]]
123 |     normalizations = [20, -1, -1, -1, -1, -1]
124 |     steps = [ x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
125 |     num_channels = [512]
126 | 
127 |     loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
128 |         num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
129 |         num_channels=num_channels, clip=False, interm_layer=0, steps=steps)
130 | 
131 |     tmp = mx.contrib.symbol.MultiBoxTarget(
132 |         *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
133 |         ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
134 |         negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
135 |         name="multibox_target")
136 |     loc_target = tmp[0]
137 |     loc_target_mask = tmp[1]
138 |     cls_target = tmp[2]
139 | 
140 |     cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
141 |         ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
142 |         normalization='valid', name="cls_prob")
143 |     loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
144 |         data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
145 |     loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
146 |         normalization='valid', name="loc_loss")
147 | 
148 |     # monitoring training status
149 |     cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
150 |     det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
151 |         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
152 |         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
153 |     det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
154 | 
155 |     # group output
156 |     out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
157 |     return out
158 | 
159 | def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False,
160 |                nms_topk=400, **kwargs):
161 |     """
162 |     Single-shot multi-box detection with VGG 16 layers ConvNet
163 |     This is a modified version, with fc6/fc7 layers replaced by conv layers
164 |     And the network is slightly smaller than original VGG 16 network
165 |     This is the detection network
166 | 
167 |     Parameters:
168 |     ----------
169 |     num_classes: int
170 |         number of object classes not including background
171 |     nms_thresh : float
172 |         threshold of overlap for non-maximum suppression
173 |     force_suppress : boolean
174 |         whether suppress different class objects
175 |     nms_topk : int
176 |         apply NMS to top K detections
177 | 
178 |     Returns:
179 |     ----------
180 |     mx.Symbol
181 |     """
182 |     net = get_symbol_train(num_classes)
183 |     cls_preds = net.get_internals()["multibox_cls_pred_output"]
184 |     loc_preds = net.get_internals()["multibox_loc_pred_output"]
185 |     anchor_boxes = net.get_internals()["multibox_anchors_output"]
186 | 
187 |     cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
188 |         name='cls_prob')
189 |     out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
190 |         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
191 |         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
192 |     return out
193 | 


--------------------------------------------------------------------------------
/symbol/inceptionv3.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Inception V3, suitable for images with around 299 x 299
  3 | 
  4 | Reference:
  5 | 
  6 | Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision." arXiv preprint arXiv:1512.00567 (2015).
  7 | """
  8 | import mxnet as mx
  9 | 
 10 | def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=''):
 11 |     conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix))
 12 |     bn = mx.sym.BatchNorm(data=conv, name='%s%s_batchnorm' %(name, suffix), fix_gamma=True)
 13 |     act = mx.sym.Activation(data=bn, act_type='relu', name='%s%s_relu' %(name, suffix))
 14 |     return act
 15 | 
 16 | 
 17 | def Inception7A(data,
 18 |                 num_1x1,
 19 |                 num_3x3_red, num_3x3_1, num_3x3_2,
 20 |                 num_5x5_red, num_5x5,
 21 |                 pool, proj,
 22 |                 name):
 23 |     tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name))
 24 |     tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv')
 25 |     tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name), suffix='_conv_1')
 26 |     tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv')
 27 |     tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1')
 28 |     tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_2')
 29 |     pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
 30 |     cproj = Conv(pooling, proj, name=('%s_tower_2' %  name), suffix='_conv')
 31 |     concat = mx.sym.Concat(*[tower_1x1, tower_5x5, tower_3x3, cproj], name='ch_concat_%s_chconcat' % name)
 32 |     return concat
 33 | 
 34 | # First Downsample
 35 | def Inception7B(data,
 36 |                 num_3x3,
 37 |                 num_d3x3_red, num_d3x3_1, num_d3x3_2,
 38 |                 pool,
 39 |                 name):
 40 |     tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_conv' % name))
 41 |     tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv')
 42 |     tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_tower' % name), suffix='_conv_1')
 43 |     tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_2')
 44 |     pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0,0), pool_type="max", name=('max_pool_%s_pool' % name))
 45 |     concat = mx.sym.Concat(*[tower_3x3, tower_d3x3, pooling], name='ch_concat_%s_chconcat' % name)
 46 |     return concat
 47 | 
 48 | def Inception7C(data,
 49 |                 num_1x1,
 50 |                 num_d7_red, num_d7_1, num_d7_2,
 51 |                 num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4,
 52 |                 pool, proj,
 53 |                 name):
 54 |     tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name))
 55 |     tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv')
 56 |     tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower' % name), suffix='_conv_1')
 57 |     tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower' % name), suffix='_conv_2')
 58 |     tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv')
 59 |     tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_1')
 60 |     tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_2')
 61 |     tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_3')
 62 |     tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_4')
 63 |     pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
 64 |     cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' %  name), suffix='_conv')
 65 |     # concat
 66 |     concat = mx.sym.Concat(*[tower_1x1, tower_d7, tower_q7, cproj], name='ch_concat_%s_chconcat' % name)
 67 |     return concat
 68 | 
 69 | def Inception7D(data,
 70 |                 num_3x3_red, num_3x3,
 71 |                 num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3,
 72 |                 pool,
 73 |                 name):
 74 |     tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name), suffix='_conv')
 75 |     tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0,0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_1')
 76 |     tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name), suffix='_conv')
 77 |     tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_1')
 78 |     tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_2')
 79 |     tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2), name=('%s_tower_1' % name), suffix='_conv_3')
 80 |     pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
 81 |     # concat
 82 |     concat = mx.sym.Concat(*[tower_3x3, tower_d7_3x3, pooling], name='ch_concat_%s_chconcat' % name)
 83 |     return concat
 84 | 
 85 | def Inception7E(data,
 86 |                 num_1x1,
 87 |                 num_d3_red, num_d3_1, num_d3_2,
 88 |                 num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2,
 89 |                 pool, proj,
 90 |                 name):
 91 |     tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name))
 92 |     tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv')
 93 |     tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower' % name), suffix='_mixed_conv')
 94 |     tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower' % name), suffix='_mixed_conv_1')
 95 |     tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name), suffix='_conv')
 96 |     tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1')
 97 |     tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower_1' % name), suffix='_mixed_conv')
 98 |     tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower_1' % name), suffix='_mixed_conv_1')
 99 |     pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
100 |     cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' %  name), suffix='_conv')
101 |     # concat
102 |     concat = mx.sym.Concat(*[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj], name='ch_concat_%s_chconcat' % name)
103 |     return concat
104 | 
105 | # In[49]:
106 | 
107 | def get_symbol(num_classes=1000, **kwargs):
108 |     data = mx.symbol.Variable(name="data")
109 |     # stage 1
110 |     conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv")
111 |     conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1")
112 |     conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2")
113 |     pool = mx.sym.Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool")
114 |     # stage 2
115 |     conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3")
116 |     conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4")
117 |     pool1 = mx.sym.Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1")
118 |     # stage 3
119 |     in3a = Inception7A(pool1, 64,
120 |                        64, 96, 96,
121 |                        48, 64,
122 |                        "avg", 32, "mixed")
123 |     in3b = Inception7A(in3a, 64,
124 |                        64, 96, 96,
125 |                        48, 64,
126 |                        "avg", 64, "mixed_1")
127 |     in3c = Inception7A(in3b, 64,
128 |                        64, 96, 96,
129 |                        48, 64,
130 |                        "avg", 64, "mixed_2")
131 |     in3d = Inception7B(in3c, 384,
132 |                        64, 96, 96,
133 |                        "max", "mixed_3")
134 |     # stage 4
135 |     in4a = Inception7C(in3d, 192,
136 |                        128, 128, 192,
137 |                        128, 128, 128, 128, 192,
138 |                        "avg", 192, "mixed_4")
139 |     in4b = Inception7C(in4a, 192,
140 |                        160, 160, 192,
141 |                        160, 160, 160, 160, 192,
142 |                        "avg", 192, "mixed_5")
143 |     in4c = Inception7C(in4b, 192,
144 |                        160, 160, 192,
145 |                        160, 160, 160, 160, 192,
146 |                        "avg", 192, "mixed_6")
147 |     in4d = Inception7C(in4c, 192,
148 |                        192, 192, 192,
149 |                        192, 192, 192, 192, 192,
150 |                        "avg", 192, "mixed_7")
151 |     in4e = Inception7D(in4d, 192, 320,
152 |                        192, 192, 192, 192,
153 |                        "max", "mixed_8")
154 |     # stage 5
155 |     in5a = Inception7E(in4e, 320,
156 |                        384, 384, 384,
157 |                        448, 384, 384, 384,
158 |                        "avg", 192, "mixed_9")
159 |     in5b = Inception7E(in5a, 320,
160 |                        384, 384, 384,
161 |                        448, 384, 384, 384,
162 |                        "max", 192, "mixed_10")
163 |     # pool
164 |     pool = mx.sym.Pooling(data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", name="global_pool")
165 |     flatten = mx.sym.Flatten(data=pool, name="flatten")
166 |     fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1')
167 |     softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
168 |     return softmax
169 | 


--------------------------------------------------------------------------------
/symbol/legacy_vgg16_ssd_512.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import mxnet as mx
  3 | from .common import legacy_conv_act_layer
  4 | from .common import multibox_layer
  5 | 
  6 | def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
  7 |     """
  8 |     Single-shot multi-box detection with VGG 16 layers ConvNet
  9 |     This is a modified version, with fc6/fc7 layers replaced by conv layers
 10 |     And the network is slightly smaller than original VGG 16 network
 11 |     This is a training network with losses
 12 | 
 13 |     Parameters:
 14 |     ----------
 15 |     num_classes: int
 16 |         number of object classes not including background
 17 |     nms_thresh : float
 18 |         non-maximum suppression threshold
 19 |     force_suppress : boolean
 20 |         whether suppress different class objects
 21 |     nms_topk : int
 22 |         apply NMS to top K detections
 23 | 
 24 |     Returns:
 25 |     ----------
 26 |     mx.Symbol
 27 |     """
 28 |     data = mx.symbol.Variable(name="data")
 29 |     label = mx.symbol.Variable(name="label")
 30 | 
 31 |     # group 1
 32 |     conv1_1 = mx.symbol.Convolution(
 33 |         data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
 34 |     relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
 35 |     conv1_2 = mx.symbol.Convolution(
 36 |         data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
 37 |     relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
 38 |     pool1 = mx.symbol.Pooling(
 39 |         data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
 40 |     # group 2
 41 |     conv2_1 = mx.symbol.Convolution(
 42 |         data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
 43 |     relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
 44 |     conv2_2 = mx.symbol.Convolution(
 45 |         data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
 46 |     relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
 47 |     pool2 = mx.symbol.Pooling(
 48 |         data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
 49 |     # group 3
 50 |     conv3_1 = mx.symbol.Convolution(
 51 |         data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
 52 |     relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
 53 |     conv3_2 = mx.symbol.Convolution(
 54 |         data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
 55 |     relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
 56 |     conv3_3 = mx.symbol.Convolution(
 57 |         data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
 58 |     relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
 59 |     pool3 = mx.symbol.Pooling(
 60 |         data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
 61 |         pooling_convention="full", name="pool3")
 62 |     # group 4
 63 |     conv4_1 = mx.symbol.Convolution(
 64 |         data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
 65 |     relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
 66 |     conv4_2 = mx.symbol.Convolution(
 67 |         data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
 68 |     relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
 69 |     conv4_3 = mx.symbol.Convolution(
 70 |         data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
 71 |     relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
 72 |     pool4 = mx.symbol.Pooling(
 73 |         data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
 74 |     # group 5
 75 |     conv5_1 = mx.symbol.Convolution(
 76 |         data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
 77 |     relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
 78 |     conv5_2 = mx.symbol.Convolution(
 79 |         data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
 80 |     relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
 81 |     conv5_3 = mx.symbol.Convolution(
 82 |         data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
 83 |     relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
 84 |     pool5 = mx.symbol.Pooling(
 85 |         data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
 86 |         pad=(1,1), name="pool5")
 87 |     # group 6
 88 |     conv6 = mx.symbol.Convolution(
 89 |         data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
 90 |         num_filter=1024, name="conv6")
 91 |     relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
 92 |     # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
 93 |     # group 7
 94 |     conv7 = mx.symbol.Convolution(
 95 |         data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
 96 |     relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
 97 |     # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
 98 | 
 99 |     ### ssd extra layers ###
100 |     conv8_1, relu8_1 = legacy_conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
101 |         stride=(1,1), act_type="relu", use_batchnorm=False)
102 |     conv8_2, relu8_2 = legacy_conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
103 |         stride=(2,2), act_type="relu", use_batchnorm=False)
104 |     conv9_1, relu9_1 = legacy_conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
105 |         stride=(1,1), act_type="relu", use_batchnorm=False)
106 |     conv9_2, relu9_2 = legacy_conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
107 |         stride=(2,2), act_type="relu", use_batchnorm=False)
108 |     conv10_1, relu10_1 = legacy_conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
109 |         stride=(1,1), act_type="relu", use_batchnorm=False)
110 |     conv10_2, relu10_2 = legacy_conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \
111 |         stride=(2,2), act_type="relu", use_batchnorm=False)
112 |     conv11_1, relu11_1 = legacy_conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
113 |         stride=(1,1), act_type="relu", use_batchnorm=False)
114 |     conv11_2, relu11_2 = legacy_conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(1,1), \
115 |         stride=(2,2), act_type="relu", use_batchnorm=False)
116 |     conv12_1, relu12_1 = legacy_conv_act_layer(relu11_2, "12_1", 128, kernel=(1,1), pad=(0,0), \
117 |         stride=(1,1), act_type="relu", use_batchnorm=False)
118 |     conv12_2, relu12_2 = legacy_conv_act_layer(relu12_1, "12_2", 256, kernel=(4,4), pad=(1,1), \
119 |         stride=(1,1), act_type="relu", use_batchnorm=False)
120 | 
121 |     # specific parameters for VGG16 network
122 |     from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2, relu12_2]
123 |     sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \
124 |         [.75, .8216], [.9, .9721]]
125 |     ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
126 |         [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
127 |     normalizations = [20, -1, -1, -1, -1, -1, -1]
128 |     steps = [ x / 512.0 for x in [8, 16, 32, 64, 128, 256, 512]]
129 |     num_channels = [512]
130 | 
131 |     loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
132 |         num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
133 |         num_channels=num_channels, clip=False, interm_layer=0, steps=steps)
134 | 
135 |     tmp = mx.contrib.symbol.MultiBoxTarget(
136 |         *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
137 |         ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
138 |         negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
139 |         name="multibox_target")
140 |     loc_target = tmp[0]
141 |     loc_target_mask = tmp[1]
142 |     cls_target = tmp[2]
143 | 
144 |     cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
145 |         ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
146 |         normalization='valid', name="cls_prob")
147 |     loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
148 |         data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
149 |     loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
150 |         normalization='valid', name="loc_loss")
151 | 
152 |     # monitoring training status
153 |     cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
154 |     det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
155 |         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
156 |         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
157 |     det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
158 | 
159 |     # group output
160 |     out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
161 |     return out
162 | 
163 | def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
164 |     """
165 |     Single-shot multi-box detection with VGG 16 layers ConvNet
166 |     This is a modified version, with fc6/fc7 layers replaced by conv layers
167 |     And the network is slightly smaller than original VGG 16 network
168 |     This is the detection network
169 | 
170 |     Parameters:
171 |     ----------
172 |     num_classes: int
173 |         number of object classes not including background
174 |     nms_thresh : float
175 |         threshold of overlap for non-maximum suppression
176 |     force_suppress : boolean
177 |         whether suppress different class objects
178 |     nms_topk : int
179 |         apply NMS to top K detections
180 | 
181 |     Returns:
182 |     ----------
183 |     mx.Symbol
184 |     """
185 |     net = get_symbol_train(num_classes)
186 |     cls_preds = net.get_internals()["multibox_cls_pred_output"]
187 |     loc_preds = net.get_internals()["multibox_loc_pred_output"]
188 |     anchor_boxes = net.get_internals()["multibox_anchors_output"]
189 | 
190 |     cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
191 |         name='cls_prob')
192 |     out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
193 |         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
194 |         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
195 |     return out
196 | 


--------------------------------------------------------------------------------
/tools/rand_sampler.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | 
  4 | class RandSampler(object):
  5 |     """
  6 |     Random sampler base class, used for data augmentation
  7 | 
  8 |     Parameters:
  9 |     ----------
 10 |     max_trials : int
 11 |         maximum trials, if exceed this number, give up anyway
 12 |     max_sample : int
 13 |         maximum random crop samples to be generated
 14 |     """
 15 |     def __init__(self, max_trials, max_sample):
 16 |         assert max_trials > 0
 17 |         self.max_trials = int(max_trials)
 18 |         assert max_sample >= 0
 19 |         self.max_sample = int(max_sample)
 20 | 
 21 |     def sample(self, label):
 22 |         """
 23 |         Interface for calling sampling function
 24 | 
 25 |         Parameters:
 26 |         ----------
 27 |         label : numpy.array (n x 5 matrix)
 28 |             ground-truths
 29 | 
 30 |         Returns:
 31 |         ----------
 32 |         list of (crop_box, label) tuples, if failed, return empty list []
 33 |         """
 34 |         return NotImplementedError
 35 | 
 36 | 
 37 | class RandCropper(RandSampler):
 38 |     """
 39 |     Random cropping original images with various settings
 40 | 
 41 |     Parameters:
 42 |     ----------
 43 |     min_scale : float
 44 |         minimum crop scale, (0, 1]
 45 |     max_scale : float
 46 |         maximum crop scale, (0, 1], must larger than min_scale
 47 |     min_aspect_ratio : float
 48 |         minimum crop aspect ratio, (0, 1]
 49 |     max_aspect_ratio : float
 50 |         maximum crop aspect ratio, [1, inf)
 51 |     min_overlap : float
 52 |         hreshold of minimum overlap between a rand crop and any gt
 53 |     max_trials : int
 54 |         maximum trials, if exceed this number, give up anyway
 55 |     max_sample : int
 56 |         maximum random crop samples to be generated
 57 |     """
 58 |     def __init__(self, min_scale=1., max_scale=1.,
 59 |                  min_aspect_ratio=1., max_aspect_ratio=1.,
 60 |                  min_overlap=0., max_trials=50, max_sample=1):
 61 |         super(RandCropper, self).__init__(max_trials, max_sample)
 62 |         assert min_scale <= max_scale, "min_scale must <= max_scale"
 63 |         assert 0 < min_scale and min_scale <= 1, "min_scale must in (0, 1]"
 64 |         assert 0 < max_scale and max_scale <= 1, "max_scale must in (0, 1]"
 65 |         self.min_scale = min_scale
 66 |         self.max_scale = max_scale
 67 |         assert 0 < min_aspect_ratio and min_aspect_ratio <= 1, "min_ratio must in (0, 1]"
 68 |         assert 1 <= max_aspect_ratio , "max_ratio must >= 1"
 69 |         self.min_aspect_ratio = min_aspect_ratio
 70 |         self.max_aspect_ratio = max_aspect_ratio
 71 |         assert 0 <= min_overlap and min_overlap <= 1, "min_overlap must in [0,1]"
 72 |         self.min_overlap = min_overlap
 73 | 
 74 |         self.config = {'gt_constraint' : 'center'}
 75 | 
 76 |     def sample(self, label):
 77 |         """
 78 |         generate random cropping boxes according to parameters
 79 |         if satifactory crops generated, apply to ground-truth as well
 80 | 
 81 |         Parameters:
 82 |         ----------
 83 |         label : numpy.array (n x 5 matrix)
 84 |             ground-truths
 85 | 
 86 |         Returns:
 87 |         ----------
 88 |         list of (crop_box, label) tuples, if failed, return empty list []
 89 |         """
 90 |         samples = []
 91 |         count = 0
 92 |         for trial in range(self.max_trials):
 93 |             if count >= self.max_sample:
 94 |                 return samples
 95 |             scale = np.random.uniform(self.min_scale, self.max_scale)
 96 |             min_ratio = max(self.min_aspect_ratio, scale * scale)
 97 |             max_ratio = min(self.max_aspect_ratio, 1. / scale / scale)
 98 |             ratio = math.sqrt(np.random.uniform(min_ratio, max_ratio))
 99 |             width = scale * ratio
100 |             height = scale / ratio
101 |             left = np.random.uniform(0., 1 - width)
102 |             top = np.random.uniform(0., 1 - height)
103 |             rand_box = (left, top, left + width, top + height)
104 |             valid_mask = np.where(label[:, 0] > -1)[0]
105 |             gt = label[valid_mask, :]
106 |             ious = self._check_satisfy(rand_box, gt)
107 |             if ious is not None:
108 |                 # transform gt labels after crop, discard bad ones
109 |                 l, t, r, b = rand_box
110 |                 new_gt_boxes = []
111 |                 new_width = r - l
112 |                 new_height = b - t
113 |                 for i in range(valid_mask.size):
114 |                     if ious[i] > 0:
115 |                         xmin = max(0., (gt[i, 1] - l) / new_width)
116 |                         ymin = max(0., (gt[i, 2] - t) / new_height)
117 |                         xmax = min(1., (gt[i, 3] - l) / new_width)
118 |                         ymax = min(1., (gt[i, 4] - t) / new_height)
119 |                         new_gt_boxes.append([gt[i, 0], xmin, ymin, xmax, ymax])
120 |                 if not new_gt_boxes:
121 |                     continue
122 |                 new_gt_boxes = np.array(new_gt_boxes)
123 |                 label = np.lib.pad(new_gt_boxes,
124 |                     ((0, label.shape[0]-new_gt_boxes.shape[0]), (0,0)), \
125 |                     'constant', constant_values=(-1, -1))
126 |                 samples.append((rand_box, label))
127 |                 count += 1
128 |         return samples
129 | 
130 |     def _check_satisfy(self, rand_box, gt_boxes):
131 |         """
132 |         check if overlap with any gt box is larger than threshold
133 |         """
134 |         l, t, r, b = rand_box
135 |         num_gt = gt_boxes.shape[0]
136 |         ls = np.ones(num_gt) * l
137 |         ts = np.ones(num_gt) * t
138 |         rs = np.ones(num_gt) * r
139 |         bs = np.ones(num_gt) * b
140 |         mask = np.where(ls < gt_boxes[:, 1])[0]
141 |         ls[mask] = gt_boxes[mask, 1]
142 |         mask = np.where(ts < gt_boxes[:, 2])[0]
143 |         ts[mask] = gt_boxes[mask, 2]
144 |         mask = np.where(rs > gt_boxes[:, 3])[0]
145 |         rs[mask] = gt_boxes[mask, 3]
146 |         mask = np.where(bs > gt_boxes[:, 4])[0]
147 |         bs[mask] = gt_boxes[mask, 4]
148 |         w = rs - ls
149 |         w[w < 0] = 0
150 |         h = bs - ts
151 |         h[h < 0] = 0
152 |         inter_area = h * w
153 |         union_area = np.ones(num_gt) * max(0, r - l) * max(0, b - t)
154 |         union_area += (gt_boxes[:, 3] - gt_boxes[:, 1]) * (gt_boxes[:, 4] - gt_boxes[:, 2])
155 |         union_area -= inter_area
156 |         ious = inter_area / union_area
157 |         ious[union_area <= 0] = 0
158 |         max_iou = np.amax(ious)
159 |         if max_iou < self.min_overlap:
160 |             return None
161 |         # check ground-truth constraint
162 |         if self.config['gt_constraint'] == 'center':
163 |             for i in range(ious.shape[0]):
164 |                 if ious[i] > 0:
165 |                     gt_x = (gt_boxes[i, 1] + gt_boxes[i, 3]) / 2.0
166 |                     gt_y = (gt_boxes[i, 2] + gt_boxes[i, 4]) / 2.0
167 |                     if gt_x < l or gt_x > r or gt_y < t or gt_y > b:
168 |                         return None
169 |         elif self.config['gt_constraint'] == 'corner':
170 |             for i in range(ious.shape[0]):
171 |                 if ious[i] > 0:
172 |                     if gt_boxes[i, 1] < l or gt_boxes[i, 3] > r \
173 |                         or gt_boxes[i, 2] < t or gt_boxes[i, 4] > b:
174 |                         return None
175 |         return ious
176 | 
177 | 
178 | class RandPadder(RandSampler):
179 |     """
180 |     Random cropping original images with various settings
181 | 
182 |     Parameters:
183 |     ----------
184 |     min_scale : float
185 |         minimum crop scale, [1, inf)
186 |     max_scale : float
187 |         maximum crop scale, [1, inf), must larger than min_scale
188 |     min_aspect_ratio : float
189 |         minimum crop aspect ratio, (0, 1]
190 |     max_aspect_ratio : float
191 |         maximum crop aspect ratio, [1, inf)
192 |     min_gt_scale : float
193 |         minimum ground-truth scale to be satisfied after padding,
194 |         either width or height, [0, 1]
195 |     max_trials : int
196 |         maximum trials, if exceed this number, give up anyway
197 |     max_sample : int
198 |         maximum random crop samples to be generated
199 |     """
200 |     def __init__(self, min_scale=1., max_scale=1., min_aspect_ratio=1., \
201 |                  max_aspect_ratio=1., min_gt_scale=.01, max_trials=50,
202 |                  max_sample=1):
203 |         super(RandPadder, self).__init__(max_trials, max_sample)
204 |         assert min_scale <= max_scale, "min_scale must <= max_scale"
205 |         assert min_scale >= 1, "min_scale must in (0, 1]"
206 |         self.min_scale = min_scale
207 |         self.max_scale = max_scale
208 |         assert 0 < min_aspect_ratio and min_aspect_ratio <= 1, "min_ratio must in (0, 1]"
209 |         assert 1 <= max_aspect_ratio , "max_ratio must >= 1"
210 |         self.min_aspect_ratio = min_aspect_ratio
211 |         self.max_aspect_ratio = max_aspect_ratio
212 |         assert 0 <= min_gt_scale and min_gt_scale <= 1, "min_gt_scale must in [0, 1]"
213 |         self.min_gt_scale = min_gt_scale
214 | 
215 |     def sample(self, label):
216 |         """
217 |         generate random padding boxes according to parameters
218 |         if satifactory padding generated, apply to ground-truth as well
219 | 
220 |         Parameters:
221 |         ----------
222 |         label : numpy.array (n x 5 matrix)
223 |             ground-truths
224 | 
225 |         Returns:
226 |         ----------
227 |         list of (crop_box, label) tuples, if failed, return empty list []
228 |         """
229 |         samples = []
230 |         count = 0
231 |         for trial in range(self.max_trials):
232 |             if count >= self.max_sample:
233 |                 return samples
234 |             scale = np.random.uniform(self.min_scale, self.max_scale)
235 |             min_ratio = max(self.min_aspect_ratio, scale * scale)
236 |             max_ratio = min(self.max_aspect_ratio, 1. / scale / scale)
237 |             ratio = math.sqrt(np.random.uniform(min_ratio, max_ratio))
238 |             width = scale * ratio
239 |             if width < 1:
240 |                 continue
241 |             height = scale / ratio
242 |             if height < 1:
243 |                 continue
244 |             left = np.random.uniform(0., 1 - width)
245 |             top = np.random.uniform(0., 1 - height)
246 |             right = left + width
247 |             bot = top + height
248 |             rand_box = (left, top, right, bot)
249 |             valid_mask = np.where(label[:, 0] > -1)[0]
250 |             gt = label[valid_mask, :]
251 |             new_gt_boxes = []
252 |             for i in range(gt.shape[0]):
253 |                 xmin = (gt[i, 1] - left) / width
254 |                 ymin = (gt[i, 2] - top) / height
255 |                 xmax = (gt[i, 3] - left) / width
256 |                 ymax = (gt[i, 4] - top) / height
257 |                 new_size = min(xmax - xmin, ymax - ymin)
258 |                 if new_size < self.min_gt_scale:
259 |                     new_gt_boxes = []
260 |                     break
261 |                 new_gt_boxes.append([gt[i, 0], xmin, ymin, xmax, ymax])
262 |             if not new_gt_boxes:
263 |                 continue
264 |             new_gt_boxes = np.array(new_gt_boxes)
265 |             label = np.lib.pad(new_gt_boxes,
266 |                 ((0, label.shape[0]-new_gt_boxes.shape[0]), (0,0)), \
267 |                 'constant', constant_values=(-1, -1))
268 |             samples.append((rand_box, label))
269 |             count += 1
270 |         return samples
271 | 


--------------------------------------------------------------------------------
/evaluate/custom_callbacks.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import scipy.misc
  4 | import numpy as np
  5 | import random
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | class ParseLogCallback(object):
  9 |     """
 10 |     1. log distribution's std to tensorboard (as distribution)
 11 |     This function make use of mxnet's "monitor" module, and it's output to a log file.
 12 |     while training, it is possible to specify layers to be monitored.
 13 |     these layers will be printed to a given log file,
 14 |     their values are computed **asynchronously**.
 15 | 
 16 |     2. log training loss to tensorboard (as scalar)
 17 | 
 18 |     Currently - does not support resume training..
 19 |     """
 20 |     def __init__(self, dist_logging_dir=None, scalar_logging_dir=None,
 21 |                  logfile_path=None, batch_size=None, iter_monitor=0,
 22 |                  frequent=None, prefix='ssd'):
 23 |         self.scalar_logging_dir = scalar_logging_dir
 24 |         self.dist_logging_dir = dist_logging_dir
 25 |         self.logfile_path = logfile_path
 26 |         self.batch_size = batch_size
 27 |         self.iter_monitor = iter_monitor
 28 |         self.frequent = frequent
 29 |         self.prefix = prefix
 30 |         self.batch = 0
 31 |         self.line_idx = 0
 32 |         try:
 33 |             from tensorboard import SummaryWriter
 34 |             self.dist_summary_writer = SummaryWriter(dist_logging_dir)
 35 |             self.scalar_summary_writer = SummaryWriter(scalar_logging_dir)
 36 |         except ImportError:
 37 |             logging.error('You can install tensorboard via `pip install tensorboard`.')
 38 | 
 39 |     def __call__(self, param):
 40 |         """Callback to parse a log file and and add params to TensorBoard."""
 41 | 
 42 |         # save distributions from the monitor output log
 43 |         if not self.iter_monitor == 0 and self.batch % self.iter_monitor == 0:
 44 |             with open(self.logfile_path) as fp:
 45 |                 for i in range(self.line_idx):
 46 |                     fp.next()
 47 |                 for line in fp:
 48 |                     if line.startswith('Batch'):
 49 |                         line = line.split(' ')
 50 |                         line = [x for x in line if x]
 51 |                         layer_name = line[2]
 52 |                         layer_value = np.array(float(line[3].split('\t')[0])).flatten()
 53 |                         if np.isfinite(layer_value):
 54 |                             self.dist_summary_writer.add_histogram(layer_name, layer_value)
 55 |                     self.line_idx += 1
 56 | 
 57 |         # save training loss
 58 |         if self.batch % self.frequent == 0:
 59 |             if param.eval_metric is None:
 60 |                 return
 61 |             name_value = param.eval_metric.get_name_value()
 62 |             for name, value in name_value:
 63 |                 if self.prefix is not None:
 64 |                     name = '%s-%s' % (self.prefix, name)
 65 |                 self.scalar_summary_writer.add_scalar(name, value, global_step=self.batch)
 66 |         self.batch += 1
 67 | 
 68 | class LogROCCallback(object):
 69 |     """save roc graphs periodically in TensorBoard.
 70 |         write TensorBoard event file, holding the roc graph for every epoch
 71 |         logging_dir : str
 72 |         this function can only be executed after 'eval_metric.py', since that function is responsible for the graph creation
 73 |             where the tensorboard file will be created
 74 |         roc_path : list[str]
 75 |             list of paths to future roc's
 76 |         class_names : list[str]
 77 |             list of class names.
 78 |         """
 79 |     def __init__(self, logging_dir=None, prefix='val', roc_path=None, class_names=None):
 80 |         self.prefix = prefix
 81 |         self.roc_path = roc_path
 82 |         self.class_names = class_names
 83 |         try:
 84 |             from tensorboard import SummaryWriter
 85 |             self.summary_writer = SummaryWriter(logging_dir)
 86 |         except ImportError:
 87 |             logging.error('You can install tensorboard via `pip install tensorboard`.')
 88 | 
 89 |     def __call__(self, param):
 90 |         """Callback to log ROC graph as an image in TensorBoard."""
 91 |         for class_name in self.class_names:
 92 |             roc = os.path.join(self.roc_path, 'roc_'+class_name+'.png')
 93 |             if not os.path.exists(roc):
 94 |                 continue
 95 |             im = scipy.misc.imread(roc)
 96 |             self.summary_writer.add_image(self.prefix+'_'+class_name, im)
 97 | 
 98 | class LogDetectionsCallback(object):
 99 |     """ TODO complete
100 |     """
101 |     def __init__(self, logging_dir=None, prefix='val', images_path=None,
102 |                  class_names=None, batch_size=None, mean_pixels=None, det_thresh=0.5):
103 | 
104 |         self.logging_dir = logging_dir
105 |         self.prefix = prefix
106 |         if not os.path.exists(images_path):
107 |             os.mkdir(images_path)
108 |         self.images_path = images_path
109 |         self.class_names = class_names
110 |         self.batch_size = batch_size
111 |         self.mean_pixels = mean_pixels
112 |         self.det_thresh = det_thresh
113 |         try:
114 |             from tensorboard import SummaryWriter
115 |             self.summary_writer = SummaryWriter(logging_dir)
116 |         except ImportError:
117 |             logging.error('You can install tensorboard via `pip install tensorboard`.')
118 | 
119 |     def __call__(self, param):
120 |         """Callback to log detections and gt-boxes as an image in TensorBoard."""
121 |         if param.locals is None:
122 |             return
123 | 
124 |         result = []
125 |         pad = param.locals['eval_batch'].pad
126 |         images = param.locals['eval_batch'].data[0][0:self.batch_size-pad].asnumpy()
127 |         labels = param.locals['eval_batch'].label[0][0:self.batch_size - pad].asnumpy()
128 |         outputs = [out[0:out.shape[0] - pad] for out in param.locals['self'].get_outputs()]
129 |         # 'det' variable can be in different positions depending with train/test symbols
130 |         if len(outputs) > 1:
131 |             det_idx = [idx for idx,f in enumerate(param.locals['self'].output_names) if f.startswith('det')][0]
132 |             detections = outputs[det_idx].asnumpy()
133 |         else:
134 |             detections = outputs[0].asnumpy()
135 |         for i in range(detections.shape[0]):
136 |             det = detections[i, :, :]
137 |             det = det[np.where(det[:, 0] >= 0)[0]]
138 |             label = labels[i,:,:]
139 |             label = label[np.where(label[:, 0] >= 0)[0]]
140 |             img = images[i,:,:,:] + np.reshape(self.mean_pixels, (3,1,1))
141 |             img = img.astype(np.uint8)
142 |             img = img.transpose([1,2,0])
143 |             img[:, :, (0, 1, 2)] = img[:, :, (2, 1, 0)]
144 |             self._visualize_detection_and_labels(img, det, label=label,
145 |                                                  classes=self.class_names, thresh=self.det_thresh,
146 |                                                  plt_path=os.path.join(self.images_path, 'image'+str(i)+'.png'))
147 |             # save to tensorboard
148 |             img_det_graph = scipy.misc.imread(os.path.join(self.images_path, 'image'+str(i)+'.png'))
149 |             self.summary_writer.add_image('image'+str(i)+'.png', img_det_graph)
150 |         return result
151 | 
152 |     def _visualize_detection_and_labels(self, img, dets, label, classes=[], thresh=None, plt_path=None):
153 |         """
154 |         visualize detections in one image
155 | 
156 |         Parameters:
157 |         ----------
158 |         img : numpy.array
159 |             image, in bgr format
160 |         dets : numpy.array
161 |             ssd detections, numpy.array([[id, score, x1, y1, x2, y2]...])
162 |             each row is one object
163 |         classes : tuple or list of str
164 |             class names
165 |         thresh : float
166 |             score threshold
167 |         """
168 |         fig = plt.figure()
169 |         plt.imshow(img)
170 |         height = img.shape[0]
171 |         width = img.shape[1]
172 |         colors = dict()
173 |         # Visualize ground-truth boxes
174 |         gt_color = (1.0, 0.0, 0.0)
175 |         for i in range(label.shape[0]):
176 |             cls_id = int(label[i, 0])
177 |             if cls_id >= 0:
178 |                 xmin = int(label[i, 1] * width)
179 |                 ymin = int(label[i, 2] * height)
180 |                 xmax = int(label[i, 3] * width)
181 |                 ymax = int(label[i, 4] * height)
182 |                 rect = plt.Rectangle((xmin, ymin), xmax - xmin,
183 |                                      ymax - ymin, fill=False,
184 |                                      edgecolor=gt_color,
185 |                                      linewidth=2)
186 |                 plt.gca().add_patch(rect)
187 |                 class_name = str(cls_id)
188 |                 if classes and len(classes) > cls_id:
189 |                     class_name = classes[cls_id]
190 |                 plt.gca().text(xmin, ymin - 2,
191 |                                'gt',
192 |                                bbox=dict(facecolor=gt_color, alpha=0.5),
193 |                                fontsize=8, color='white')
194 |         # visualize predictions
195 |         for i in range(dets.shape[0]):
196 |             cls_id = int(dets[i, 0])
197 |             if cls_id >= 0:
198 |                 score = dets[i, 1]
199 |                 if score > thresh:
200 |                     if cls_id not in colors:
201 |                         colors[cls_id] = (random.random(), random.random(), random.random())
202 |                     xmin = int(dets[i, 2] * width)
203 |                     ymin = int(dets[i, 3] * height)
204 |                     xmax = int(dets[i, 4] * width)
205 |                     ymax = int(dets[i, 5] * height)
206 |                     rect = plt.Rectangle((xmin, ymin), xmax - xmin,
207 |                                          ymax - ymin, fill=False,
208 |                                          edgecolor=colors[cls_id],
209 |                                          linewidth=3.5)
210 |                     plt.gca().add_patch(rect)
211 |                     class_name = str(cls_id)
212 |                     if classes and len(classes) > cls_id:
213 |                         class_name = classes[cls_id]
214 |                     plt.gca().text(xmin, ymin - 2,
215 |                                     '{:s} {:.3f}'.format(class_name, score),
216 |                                     bbox=dict(facecolor=colors[cls_id], alpha=0.5),
217 |                                     fontsize=8, color='white')
218 |         plt.savefig(plt_path)
219 |         plt.close(fig)
220 | 
221 | 
222 | 
223 | class LogDistributionsCallback(object):
224 |     """
225 |     This function has been deprecated because it consumes too much time.
226 |     The faster way is to use "ParseLogCallback" with a 'iter_monitor' flag
227 | 
228 |     Log metrics periodically in TensorBoard.
229 |     This callback works almost same as `callback.Speedometer`, but write TensorBoard event file
230 |     for visualization.
231 |     logging_dir : str
232 |         where the tensorboard file will be created
233 |     layers_list : list[str]
234 |         list of layers to be tracked
235 |     """
236 |     def __init__(self, logging_dir, prefix=None, layers_list=None):
237 |         self.prefix = prefix
238 |         self.layers_list = layers_list
239 |         try:
240 |             from tensorboard import SummaryWriter
241 |             self.summary_writer = SummaryWriter(logging_dir)
242 |         except ImportError:
243 |             logging.error('You can install tensorboard via `pip install tensorboard`.')
244 | 
245 |     def __call__(self, param):
246 |         """Callback to log layers' distributions in TensorBoard."""
247 |         if param.locals is None:
248 |             return
249 |         for name, value in param.locals['arg_params'].iteritems():
250 |             # TODO - implement layer to choose from..
251 |             if self.layers_list is None:
252 |                 continue
253 |             if self.prefix is not None:
254 |                 name = '%s-%s' % (self.prefix, name)
255 |             self.summary_writer.add_histogram(name, value.asnumpy().flatten())


--------------------------------------------------------------------------------
/dataset/iterator.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import numpy as np
  3 | import cv2
  4 | from tools.rand_sampler import RandSampler
  5 | 
  6 | class DetRecordIter(mx.io.DataIter):
  7 |     """
  8 |     The new detection iterator wrapper for mx.io.ImageDetRecordIter which is
  9 |     written in C++, it takes record file as input and runs faster.
 10 |     Supports various augment operations for object detection.
 11 | 
 12 |     Parameters:
 13 |     -----------
 14 |     path_imgrec : str
 15 |         path to the record file
 16 |     path_imglist : str
 17 |         path to the list file to replace the labels in record
 18 |     batch_size : int
 19 |         batch size
 20 |     data_shape : tuple
 21 |         (3, height, width)
 22 |     label_width : int
 23 |         specify the label width, use -1 for variable length
 24 |     label_pad_width : int
 25 |         labels must have same shape in batches, use -1 for automatic estimation
 26 |         in each record, otherwise force padding to width in case you want t
 27 |         rain/validation to match the same width
 28 |     label_pad_value : float
 29 |         label padding value
 30 |     resize_mode : str
 31 |         force - resize to data_shape regardless of aspect ratio
 32 |         fit - try fit to data_shape preserving aspect ratio
 33 |         shrink - shrink to data_shape only, preserving aspect ratio
 34 |     mean_pixels : list or tuple
 35 |         mean values for red/green/blue
 36 |     kwargs : dict
 37 |         see mx.io.ImageDetRecordIter
 38 | 
 39 |     Returns:
 40 |     ----------
 41 | 
 42 |     """
 43 |     def __init__(self, path_imgrec, batch_size, data_shape, path_imglist="",
 44 |                  label_width=-1, label_pad_width=-1, label_pad_value=-1,
 45 |                  resize_mode='force',  mean_pixels=[123.68, 116.779, 103.939],
 46 |                  **kwargs):
 47 |         super(DetRecordIter, self).__init__()
 48 |         self.rec = mx.io.ImageDetRecordIter(
 49 |             path_imgrec     = path_imgrec,
 50 |             path_imglist    = path_imglist,
 51 |             label_width     = label_width,
 52 |             label_pad_width = label_pad_width,
 53 |             label_pad_value = label_pad_value,
 54 |             batch_size      = batch_size,
 55 |             data_shape      = data_shape,
 56 |             mean_r          = mean_pixels[0],
 57 |             mean_g          = mean_pixels[1],
 58 |             mean_b          = mean_pixels[2],
 59 |             resize_mode     = resize_mode,
 60 |             **kwargs)
 61 | 
 62 |         self.provide_label = None
 63 |         self._get_batch()
 64 |         if not self.provide_label:
 65 |             raise RuntimeError("Invalid ImageDetRecordIter: " + path_imgrec)
 66 |         self.reset()
 67 | 
 68 |     @property
 69 |     def provide_data(self):
 70 |         return self.rec.provide_data
 71 | 
 72 |     def reset(self):
 73 |         self.rec.reset()
 74 | 
 75 |     def iter_next(self):
 76 |         return self._get_batch()
 77 | 
 78 |     def next(self):
 79 |         if self.iter_next():
 80 |             return self._batch
 81 |         else:
 82 |             raise StopIteration
 83 | 
 84 |     def _get_batch(self):
 85 |         self._batch = self.rec.next()
 86 |         if not self._batch:
 87 |             return False
 88 | 
 89 |         if self.provide_label is None:
 90 |             # estimate the label shape for the first batch, always reshape to n*5
 91 |             first_label = self._batch.label[0][0].asnumpy()
 92 |             self.batch_size = self._batch.label[0].shape[0]
 93 |             self.label_header_width = int(first_label[4])
 94 |             self.label_object_width = int(first_label[5])
 95 |             assert self.label_object_width >= 5, "object width must >=5"
 96 |             self.label_start = 4 + self.label_header_width
 97 |             self.max_objects = (first_label.size - self.label_start) // self.label_object_width
 98 |             self.label_shape = (self.batch_size, self.max_objects, self.label_object_width)
 99 |             self.label_end = self.label_start + self.max_objects * self.label_object_width
100 |             self.provide_label = [('label', self.label_shape)]
101 | 
102 |         # modify label
103 |         label = self._batch.label[0].asnumpy()
104 |         label = label[:, self.label_start:self.label_end].reshape(
105 |             (self.batch_size, self.max_objects, self.label_object_width))
106 |         self._batch.label = [mx.nd.array(label)]
107 |         return True
108 | 
109 | class DetIter(mx.io.DataIter):
110 |     """
111 |     Detection Iterator, which will feed data and label to network
112 |     Optional data augmentation is performed when providing batch
113 | 
114 |     Parameters:
115 |     ----------
116 |     imdb : Imdb
117 |         image database
118 |     batch_size : int
119 |         batch size
120 |     data_shape : int or (int, int)
121 |         image shape to be resized
122 |     mean_pixels : float or float list
123 |         [R, G, B], mean pixel values
124 |     rand_samplers : list
125 |         random cropping sampler list, if not specified, will
126 |         use original image only
127 |     rand_mirror : bool
128 |         whether to randomly mirror input images, default False
129 |     shuffle : bool
130 |         whether to shuffle initial image list, default False
131 |     rand_seed : int or None
132 |         whether to use fixed random seed, default None
133 |     max_crop_trial : bool
134 |         if random crop is enabled, defines the maximum trial time
135 |         if trial exceed this number, will give up cropping
136 |     is_train : bool
137 |         whether in training phase, default True, if False, labels might
138 |         be ignored
139 |     """
140 |     def __init__(self, imdb, batch_size, data_shape, \
141 |                  mean_pixels=[128, 128, 128], rand_samplers=[], \
142 |                  rand_mirror=False, shuffle=False, rand_seed=None, \
143 |                  is_train=True, max_crop_trial=50):
144 |         super(DetIter, self).__init__()
145 | 
146 |         self._imdb = imdb
147 |         self.batch_size = batch_size
148 |         if isinstance(data_shape, int):
149 |             data_shape = (data_shape, data_shape)
150 |         self._data_shape = data_shape
151 |         self._mean_pixels = mx.nd.array(mean_pixels).reshape((3,1,1))
152 |         if not rand_samplers:
153 |             self._rand_samplers = []
154 |         else:
155 |             if not isinstance(rand_samplers, list):
156 |                 rand_samplers = [rand_samplers]
157 |             assert isinstance(rand_samplers[0], RandSampler), "Invalid rand sampler"
158 |             self._rand_samplers = rand_samplers
159 |         self.is_train = is_train
160 |         self._rand_mirror = rand_mirror
161 |         self._shuffle = shuffle
162 |         if rand_seed:
163 |             np.random.seed(rand_seed) # fix random seed
164 |         self._max_crop_trial = max_crop_trial
165 | 
166 |         self._current = 0
167 |         self._size = imdb.num_images
168 |         self._index = np.arange(self._size)
169 | 
170 |         self._data = None
171 |         self._label = None
172 |         self._get_batch()
173 | 
174 |     @property
175 |     def provide_data(self):
176 |         return [(k, v.shape) for k, v in self._data.items()]
177 | 
178 |     @property
179 |     def provide_label(self):
180 |         if self.is_train:
181 |             return [(k, v.shape) for k, v in self._label.items()]
182 |         else:
183 |             return []
184 | 
185 |     def reset(self):
186 |         self._current = 0
187 |         if self._shuffle:
188 |             np.random.shuffle(self._index)
189 | 
190 |     def iter_next(self):
191 |         return self._current < self._size
192 | 
193 |     def next(self):
194 |         if self.iter_next():
195 |             self._get_batch()
196 |             data_batch = mx.io.DataBatch(data=list(self._data.values()),
197 |                                    label=list(self._label.values()),
198 |                                    pad=self.getpad(), index=self.getindex())
199 |             self._current += self.batch_size
200 |             return data_batch
201 |         else:
202 |             raise StopIteration
203 | 
204 |     def getindex(self):
205 |         return self._current // self.batch_size
206 | 
207 |     def getpad(self):
208 |         pad = self._current + self.batch_size - self._size
209 |         return 0 if pad < 0 else pad
210 | 
211 |     def _get_batch(self):
212 |         """
213 |         Load data/label from dataset
214 |         """
215 |         batch_data = mx.nd.zeros((self.batch_size, 3, self._data_shape[0], self._data_shape[1]))
216 |         batch_label = []
217 |         for i in range(self.batch_size):
218 |             if (self._current + i) >= self._size:
219 |                 if not self.is_train:
220 |                     continue
221 |                 # use padding from middle in each epoch
222 |                 idx = (self._current + i + self._size // 2) % self._size
223 |                 index = self._index[idx]
224 |             else:
225 |                 index = self._index[self._current + i]
226 |             # index = self.debug_index
227 |             im_path = self._imdb.image_path_from_index(index)
228 |             with open(im_path, 'rb') as fp:
229 |                 img_content = fp.read()
230 |             img = mx.img.imdecode(img_content)
231 |             gt = self._imdb.label_from_index(index).copy() if self.is_train else None
232 |             data, label = self._data_augmentation(img, gt)
233 |             batch_data[i] = data
234 |             if self.is_train:
235 |                 batch_label.append(label)
236 |         self._data = {'data': batch_data}
237 |         if self.is_train:
238 |             self._label = {'label': mx.nd.array(np.array(batch_label))}
239 |         else:
240 |             self._label = {'label': None}
241 | 
242 |     def _data_augmentation(self, data, label):
243 |         """
244 |         perform data augmentations: crop, mirror, resize, sub mean, swap channels...
245 |         """
246 |         if self.is_train and self._rand_samplers:
247 |             rand_crops = []
248 |             for rs in self._rand_samplers:
249 |                 rand_crops += rs.sample(label)
250 |             num_rand_crops = len(rand_crops)
251 |             # randomly pick up one as input data
252 |             if num_rand_crops > 0:
253 |                 index = int(np.random.uniform(0, 1) * num_rand_crops)
254 |                 width = data.shape[1]
255 |                 height = data.shape[0]
256 |                 crop = rand_crops[index][0]
257 |                 xmin = int(crop[0] * width)
258 |                 ymin = int(crop[1] * height)
259 |                 xmax = int(crop[2] * width)
260 |                 ymax = int(crop[3] * height)
261 |                 if xmin >= 0 and ymin >= 0 and xmax <= width and ymax <= height:
262 |                     data = mx.img.fixed_crop(data, xmin, ymin, xmax-xmin, ymax-ymin)
263 |                 else:
264 |                     # padding mode
265 |                     new_width = xmax - xmin
266 |                     new_height = ymax - ymin
267 |                     offset_x = 0 - xmin
268 |                     offset_y = 0 - ymin
269 |                     data_bak = data
270 |                     data = mx.nd.full((new_height, new_width, 3), 128, dtype='uint8')
271 |                     data[offset_y:offset_y+height, offset_x:offset_x + width, :] = data_bak
272 |                 label = rand_crops[index][1]
273 |         if self.is_train:
274 |             interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, \
275 |                               cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
276 |         else:
277 |             interp_methods = [cv2.INTER_LINEAR]
278 |         interp_method = interp_methods[int(np.random.uniform(0, 1) * len(interp_methods))]
279 |         data = mx.img.imresize(data, self._data_shape[1], self._data_shape[0], interp_method)
280 |         if self.is_train and self._rand_mirror:
281 |             if np.random.uniform(0, 1) > 0.5:
282 |                 data = mx.nd.flip(data, axis=1)
283 |                 valid_mask = np.where(label[:, 0] > -1)[0]
284 |                 tmp = 1.0 - label[valid_mask, 1]
285 |                 label[valid_mask, 1] = 1.0 - label[valid_mask, 3]
286 |                 label[valid_mask, 3] = tmp
287 |         data = mx.nd.transpose(data, (2,0,1))
288 |         data = data.astype('float32')
289 |         data = data - self._mean_pixels
290 |         return data, label
291 | 


--------------------------------------------------------------------------------