├── __init__.py ├── config ├── __init__.py ├── config.py └── utils.py ├── detect ├── __init__.py └── detector.py ├── symbol ├── __init__.py ├── README.md ├── vgg16_reduced.py ├── mobilenet.py ├── symbol_builder.py ├── resnet.py ├── densenet.py ├── symbol_factory.py ├── legacy_vgg16_ssd_300.py ├── inceptionv3.py └── legacy_vgg16_ssd_512.py ├── tools ├── __init__.py ├── caffe_converter │ ├── caffe_parse │ │ ├── __init__.py │ │ └── parse_from_protobuf.py │ ├── make_win32.bat │ ├── Makefile │ ├── run.sh │ ├── mean_image.py │ ├── README.md │ └── convert_model.py ├── prepare_pascal.sh ├── prepare_coco.sh ├── find_mxnet.py ├── visualize_net.py ├── image_processing.py ├── prepare_dataset.py └── rand_sampler.py ├── train ├── __init__.py └── metric.py ├── dataset ├── __init__.py ├── pycocotools │ ├── __init__.py │ └── README.md ├── names │ ├── pascal_voc.names │ └── mscoco.names ├── testdb.py ├── imdb.py ├── concat_db.py ├── mscoco.py ├── yolo_format.py └── iterator.py ├── evaluate ├── __init__.py ├── evaluate_net.py ├── eval_voc.py └── custom_callbacks.py ├── scripts ├── __init__.py ├── train_script.sh ├── run_ssd_docker.sh └── run_tensorboard.sh ├── data └── demo │ ├── dog.jpg │ ├── eagle.jpg │ ├── 000001.jpg │ ├── 000002.jpg │ ├── 000003.jpg │ ├── 000004.jpg │ ├── 000006.jpg │ ├── 000008.jpg │ ├── 000010.jpg │ ├── 000022.jpg │ ├── horses.jpg │ ├── person.jpg │ └── street.jpg ├── .gitmodules ├── model └── README.md ├── docker ├── mxnet_0.12 │ └── Dockerfile ├── mxnet_0_11 │ └── Dockerfile ├── cudnn5.1 │ └── Dockerfile └── cudnn6.0 │ └── Dockerfile ├── LICENSE ├── deploy.py ├── .gitignore ├── evaluate.py ├── demo.py └── train.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /detect/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /symbol/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/caffe_converter/caffe_parse/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /data/demo/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/dog.jpg -------------------------------------------------------------------------------- /data/demo/eagle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/eagle.jpg -------------------------------------------------------------------------------- /data/demo/000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000001.jpg -------------------------------------------------------------------------------- /data/demo/000002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000002.jpg -------------------------------------------------------------------------------- /data/demo/000003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000003.jpg -------------------------------------------------------------------------------- /data/demo/000004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000004.jpg -------------------------------------------------------------------------------- /data/demo/000006.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000006.jpg -------------------------------------------------------------------------------- /data/demo/000008.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000008.jpg -------------------------------------------------------------------------------- /data/demo/000010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000010.jpg -------------------------------------------------------------------------------- /data/demo/000022.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/000022.jpg -------------------------------------------------------------------------------- /data/demo/horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/horses.jpg -------------------------------------------------------------------------------- /data/demo/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/person.jpg -------------------------------------------------------------------------------- /data/demo/street.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhreshold/mxnet-ssd/HEAD/data/demo/street.jpg -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "mxnet"] 2 | path = mxnet 3 | url = https://github.com/zhreshold/mxnet.git 4 | -------------------------------------------------------------------------------- /model/README.md: -------------------------------------------------------------------------------- 1 | #### This is the default directory to store all the models, including `*.params` and `*.json` 2 | -------------------------------------------------------------------------------- /tools/caffe_converter/make_win32.bat: -------------------------------------------------------------------------------- 1 | @protoc --python_out=./ ./caffe_parse/caffe.proto 2 | @echo done. 3 | @pause 4 | -------------------------------------------------------------------------------- /dataset/pycocotools/README.md: -------------------------------------------------------------------------------- 1 | This is a modified version of https://github.com/pdollar/coco python API. 2 | No `make` is required, but this will not support mask functions. 3 | -------------------------------------------------------------------------------- /dataset/names/pascal_voc.names: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor 21 | -------------------------------------------------------------------------------- /docker/mxnet_0.12/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mxnet/python:gpu_0.12.0 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | nano \ 5 | wget \ 6 | graphviz \ 7 | python-tk 8 | 9 | 10 | RUN pip install ipython jupyter matplotlib scipy graphviz tensorboard future 11 | 12 | WORKDIR /mxnet/example/ssd 13 | -------------------------------------------------------------------------------- /tools/prepare_pascal.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | python $DIR/prepare_dataset.py --dataset pascal --year 2007,2012 --set trainval --target $DIR/../data/train.lst 4 | python $DIR/prepare_dataset.py --dataset pascal --year 2007 --set test --target $DIR/../data/val.lst --shuffle False 5 | -------------------------------------------------------------------------------- /docker/mxnet_0_11/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM mxnet/python:gpu_0.11.0 3 | 4 | RUN apt-get update && apt-get install -y \ 5 | nano \ 6 | wget \ 7 | graphviz \ 8 | python-tk 9 | 10 | 11 | RUN pip install ipython jupyter matplotlib scipy graphviz tensorboard future 12 | 13 | WORKDIR /mxnet/example/ssd 14 | -------------------------------------------------------------------------------- /tools/prepare_coco.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | python $DIR/prepare_dataset.py --dataset coco --set train2014,valminusminival2014 --target $DIR/../data/train.lst --root $DIR/../data/coco 4 | python $DIR/prepare_dataset.py --dataset coco --set minival2014 --target $DIR/../data/val.lst --shuffle False --root $DIR/../data/coco 5 | -------------------------------------------------------------------------------- /tools/caffe_converter/Makefile: -------------------------------------------------------------------------------- 1 | # find protoc 2 | ifndef PROTOC 3 | DEPS_PROTOC=../../deps/bin/protoc 4 | ifneq ("$(wildcard $(DEPS_PROTOC))","") 5 | PROTOC = $(DEPS_PROTOC) 6 | else 7 | PROTOC = protoc 8 | endif 9 | endif 10 | 11 | all: caffe_parse/caffe_pb2.py 12 | 13 | clean: 14 | rm caffe_parse/caffe_pb2.py* 15 | 16 | caffe_parse/caffe_pb2.py: 17 | $(PROTOC) --python_out=./ ./caffe_parse/caffe.proto 18 | -------------------------------------------------------------------------------- /tools/find_mxnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | try: 4 | if os.environ.get('MXNET_EXAMPLE_SSD_DISABLE_PRE_INSTALLED', 0): 5 | raise ImportError 6 | import mxnet as mx 7 | print("Using mxnet as:") 8 | print(mx) 9 | print("Warning: using pre-installed version of mxnet may cause unexpected error...") 10 | print("(export MXNET_EXAMPLE_SSD_DISABLE_PRE_INSTALLED=1) to prevent loading pre-installed mxnet.") 11 | except ImportError: 12 | import os, sys 13 | curr_path = os.path.abspath(os.path.dirname(__file__)) 14 | sys.path.insert(0, os.path.join(curr_path, "../mxnet/python")) 15 | import mxnet as mx 16 | -------------------------------------------------------------------------------- /scripts/train_script.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # this is a training script 4 | # defining global parameters 5 | GPUS='0,1,2,3' 6 | TRAIN_REC_PATH=./data/train.rec 7 | VAL_REC_PATH=./data/val.rec 8 | NETWORK=vgg16_reduced 9 | BATCH_SIZE=128 10 | DATA_SHAPE=300 11 | PRETRAINED=./model/vgg16_reduced/vgg16_reduced 12 | OPTIMIZER=rmsprop 13 | TENSORBOARD=True 14 | LR_STEPS=20,40,60 15 | 16 | python ./train.py \ 17 | --train-path ${TRAIN_REC_PATH} \ 18 | --val-path ${VAL_REC_PATH} \ 19 | --network ${NETWORK} \ 20 | --batch-size ${BATCH_SIZE} \ 21 | --data-shape ${DATA_SHAPE} \ 22 | --gpus ${GPUS} \ 23 | --pretrained ${PRETRAINED} \ 24 | --optimizer ${OPTIMIZER} \ 25 | --tensorboard ${TENSORBOARD} \ 26 | --lr-steps ${LR_STEPS} \ 27 | --freeze '' -------------------------------------------------------------------------------- /tools/caffe_converter/caffe_parse/parse_from_protobuf.py: -------------------------------------------------------------------------------- 1 | from google.protobuf import text_format 2 | import numpy as np 3 | import caffe_parse.caffe_pb2 as caffe_pb2 4 | 5 | 6 | def parse_caffemodel(file_path): 7 | """ 8 | parses the trained .caffemodel file 9 | 10 | filepath: /path/to/trained-model.caffemodel 11 | 12 | returns: layers 13 | """ 14 | f = open(file_path, 'rb') 15 | contents = f.read() 16 | 17 | net_param = caffe_pb2.NetParameter() 18 | net_param.ParseFromString(contents) 19 | 20 | layers = find_layers(net_param) 21 | return layers 22 | 23 | 24 | def find_layers(net_param): 25 | if len(net_param.layers) > 0: 26 | return net_param.layers 27 | elif len(net_param.layer) > 0: 28 | return net_param.layer 29 | else: 30 | raise Exception("Couldn't find layers") 31 | 32 | 33 | def main(): 34 | param_dict = parse_caffemodel('xxx.caffemodel') 35 | 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /dataset/names/mscoco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Joshua Z. Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/run_ssd_docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | nvidia-docker run -it --rm \ 4 | -e MXNET_CUDNN_AUTOTUNE_DEFAULT=0 \ 5 | -v /home/oper/Datasets:/mxnet/example/ssd/data \ 6 | -v /home/oper/david/mxnet-ssd/model:/mxnet/example/ssd/model \ 7 | -v /home/oper/david/mxnet-ssd/config:/mxnet/example/ssd/config \ 8 | -v /home/oper/david/mxnet-ssd/output:/mxnet/example/ssd/output \ 9 | -v /home/oper/david/mxnet-ssd/dataset:/mxnet/example/ssd/dataset \ 10 | -v /home/oper/david/mxnet-ssd/train:/mxnet/example/ssd/train \ 11 | -v /home/oper/david/mxnet-ssd/tools:/mxnet/example/ssd/tools \ 12 | -v /home/oper/david/mxnet-ssd/symbol:/mxnet/example/ssd/symbol \ 13 | -v /home/oper/david/mxnet-ssd/detect:/mxnet/example/ssd/detect \ 14 | -v /home/oper/david/mxnet-ssd/evaluate:/mxnet/example/ssd/evaluate \ 15 | -v /home/oper/david/mxnet-ssd/scripts:/mxnet/example/ssd/scripts \ 16 | -v /home/oper/david/mxnet-ssd/deploy.py:/mxnet/example/ssd/deploy.py \ 17 | -v /home/oper/david/mxnet-ssd/evaluate.py:/mxnet/example/ssd/evaluate.py \ 18 | -v /home/oper/david/mxnet-ssd/train.py:/mxnet/example/ssd/train.py \ 19 | -v /home/oper/david/mxnet-ssd/demo.py:/mxnet/example/ssd/demo.py \ 20 | mxnet/ssd:gpu_0.12.0_cuda9 21 | -------------------------------------------------------------------------------- /scripts/run_tensorboard.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | nvidia-docker run -it --rm -p 0.0.0.0:6006:6006 \ 4 | -e MXNET_CUDNN_AUTOTUNE_DEFAULT=0 \ 5 | -v /home/oper/Datasets:/mxnet/example/ssd/data \ 6 | -v /home/oper/david/mxnet-ssd/model:/mxnet/example/ssd/model \ 7 | -v /home/oper/david/mxnet-ssd/config:/mxnet/example/ssd/config \ 8 | -v /home/oper/david/mxnet-ssd/output:/mxnet/example/ssd/output \ 9 | -v /home/oper/david/mxnet-ssd/dataset:/mxnet/example/ssd/dataset \ 10 | -v /home/oper/david/mxnet-ssd/train:/mxnet/example/ssd/train \ 11 | -v /home/oper/david/mxnet-ssd/tools:/mxnet/example/ssd/tools \ 12 | -v /home/oper/david/mxnet-ssd/symbol:/mxnet/example/ssd/symbol \ 13 | -v /home/oper/david/mxnet-ssd/detect:/mxnet/example/ssd/detect \ 14 | -v /home/oper/david/mxnet-ssd/evaluate:/mxnet/example/ssd/evaluate \ 15 | -v /home/oper/david/mxnet-ssd/scripts:/mxnet/example/ssd/scripts \ 16 | -v /home/oper/david/mxnet-ssd/deploy.py:/mxnet/example/ssd/deploy.py \ 17 | -v /home/oper/david/mxnet-ssd/evaluate.py:/mxnet/example/ssd/evaluate.py \ 18 | -v /home/oper/david/mxnet-ssd/train.py:/mxnet/example/ssd/train.py \ 19 | -v /home/oper/david/mxnet-ssd/demo.py:/mxnet/example/ssd/demo.py \ 20 | mxnet/ssd:gpu_0.12.0_cuda9 21 | -------------------------------------------------------------------------------- /tools/caffe_converter/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [[ $# -ne 1 ]]; then 3 | echo "usage: $0 model_name" 4 | echo " model_name: [vgg16|vgg19], ..." 5 | exit -1 6 | fi 7 | 8 | if [[ $1 == "vgg19" ]]; then 9 | if [[ ! -f VGG_ILSVRC_19_layers_deploy.prototxt ]]; then 10 | wget -c https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/bb2b4fe0a9bb0669211cf3d0bc949dfdda173e9e/VGG_ILSVRC_19_layers_deploy.prototxt 11 | fi 12 | 13 | if [[ ! -f VGG_ILSVRC_19_layers.caffemodel ]]; then 14 | wget -c http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel 15 | fi 16 | 17 | echo "converting" 18 | python `dirname $0`/convert_model.py VGG_ILSVRC_19_layers_deploy.prototxt VGG_ILSVRC_19_layers.caffemodel vgg19 19 | elif [[ $1 == "vgg16" ]]; then 20 | if [[ ! -f VGG_ILSVRC_16_layers_deploy.prototxt ]]; then 21 | wget -c https://gist.githubusercontent.com/ksimonyan/211839e770f7b538e2d8/raw/c3ba00e272d9f48594acef1f67e5fd12aff7a806/VGG_ILSVRC_16_layers_deploy.prototxt 22 | fi 23 | 24 | if [[ ! -f VGG_ILSVRC_16_layers.caffemodel ]]; then 25 | wget -c http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel 26 | fi 27 | 28 | echo "converting" 29 | python `dirname $0`/convert_model.py VGG_ILSVRC_16_layers_deploy.prototxt VGG_ILSVRC_16_layers.caffemodel vgg16 30 | else 31 | echo "unsupported model: $1" 32 | fi 33 | -------------------------------------------------------------------------------- /docker/cudnn5.1/Dockerfile: -------------------------------------------------------------------------------- 1 | # Start with cuDNN base image 2 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04 3 | MAINTAINER DavidSolomon 4 | 5 | # Install git, wget and other dependencies 6 | RUN apt-get update && apt-get install -y \ 7 | nano \ 8 | git \ 9 | libopenblas-dev \ 10 | libopencv-dev \ 11 | python-dev \ 12 | python-numpy \ 13 | python-setuptools \ 14 | python-opencv \ 15 | python-matplotlib \ 16 | python-tk \ 17 | wget \ 18 | graphviz 19 | 20 | # Clone MXNet repo and move into it 21 | RUN cd /root && git clone --recursive https://github.com/zhreshold/mxnet-ssd.git && cd mxnet-ssd/mxnet && \ 22 | # Copy config.mk 23 | cp make/config.mk config.mk && \ 24 | # Set OpenBLAS 25 | sed -i 's/USE_BLAS = atlas/USE_BLAS = openblas/g' config.mk && \ 26 | # Set CUDA flag 27 | sed -i 's/USE_CUDA = 0/USE_CUDA = 1/g' config.mk && \ 28 | sed -i 's/USE_CUDA_PATH = NONE/USE_CUDA_PATH = \/usr\/local\/cuda/g' config.mk && \ 29 | # Set cuDNN flag 30 | sed -i 's/USE_CUDNN = 0/USE_CUDNN = 1/g' config.mk && \ 31 | # Make 32 | make -j $(nproc) 33 | 34 | # Install Python package 35 | RUN cd /root/mxnet-ssd/mxnet/python && python setup.py install 36 | 37 | # Add to Python path 38 | RUN echo "export PYTHONPATH=$/root/mxnet-ssd/mxnet/python:$PYTHONPATH" >> /root/.bashrc 39 | 40 | # Install pip 41 | RUN easy_install -U pip 42 | 43 | # Install graphviz and jupyter 44 | RUN pip install graphviz jupyter ipython matplotlib tensorboard future scipy 45 | 46 | # Set ~/mxnet as working directory 47 | WORKDIR /root/mxnet-ssd 48 | 49 | # TODO add tensorboard code change to the docker... 50 | # the installation was /usr/local/lib/python2.7/dist-packages/tensorboard/summary:186 51 | 52 | -------------------------------------------------------------------------------- /docker/cudnn6.0/Dockerfile: -------------------------------------------------------------------------------- 1 | # Start with cuDNN base image 2 | FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04 3 | MAINTAINER DavidSolomon 4 | 5 | # Install git, wget and other dependencies 6 | RUN apt-get update && apt-get install -y \ 7 | nano \ 8 | git \ 9 | libopenblas-dev \ 10 | libopencv-dev \ 11 | python-dev \ 12 | python-numpy \ 13 | python-setuptools \ 14 | python-opencv \ 15 | python-matplotlib \ 16 | python-tk \ 17 | wget \ 18 | graphviz 19 | 20 | # Clone MXNet repo and move into it 21 | RUN cd /root && git clone --recursive https://github.com/zhreshold/mxnet-ssd.git && cd mxnet-ssd/mxnet && \ 22 | # Copy config.mk 23 | cp make/config.mk config.mk && \ 24 | # Set OpenBLAS 25 | sed -i 's/USE_BLAS = atlas/USE_BLAS = openblas/g' config.mk && \ 26 | # Set CUDA flag 27 | sed -i 's/USE_CUDA = 0/USE_CUDA = 1/g' config.mk && \ 28 | sed -i 's/USE_CUDA_PATH = NONE/USE_CUDA_PATH = \/usr\/local\/cuda/g' config.mk && \ 29 | # Set cuDNN flag 30 | sed -i 's/USE_CUDNN = 0/USE_CUDNN = 1/g' config.mk && \ 31 | # Make 32 | make -j $(nproc) 33 | 34 | # Install Python package 35 | RUN cd /root/mxnet-ssd/mxnet/python && python setup.py install 36 | 37 | # Add to Python path 38 | RUN echo "export PYTHONPATH=$/root/mxnet-ssd/mxnet/python:$PYTHONPATH" >> /root/.bashrc 39 | 40 | # Install pip 41 | RUN easy_install -U pip 42 | 43 | # Install graphviz and jupyter 44 | RUN pip install graphviz jupyter ipython matplotlib tensorboard future scipy 45 | 46 | # Set ~/mxnet as working directory 47 | WORKDIR /root/mxnet-ssd 48 | 49 | # TODO add tensorboard code change to the docker... 50 | # the installation was /usr/local/lib/python2.7/dist-packages/tensorboard/summary:186 51 | 52 | -------------------------------------------------------------------------------- /tools/caffe_converter/mean_image.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import mxnet as mx 3 | import numpy as np 4 | import argparse 5 | 6 | caffe_flag = True 7 | try: 8 | import caffe 9 | from caffe.proto import caffe_pb2 10 | except ImportError: 11 | caffe_flag = False 12 | from .caffe_parse import caffe_pb2 13 | 14 | 15 | def protoBlobFileToND(proto_file): 16 | data = '' 17 | file = open(proto_file, "r") 18 | if not file: 19 | raise Exception("ERROR (" + proto_file + ")!") 20 | data = file.read() 21 | file.close() 22 | 23 | if caffe_flag: 24 | mean_blob = caffe.proto.caffe_pb2.BlobProto() 25 | else: 26 | mean_blob = caffe_parse.caffe_pb2.BlobProto() 27 | 28 | mean_blob.ParseFromString(data) 29 | img_mean_np = np.array(mean_blob.data) 30 | img_mean_np = img_mean_np.reshape( 31 | mean_blob.channels, mean_blob.height, mean_blob.width 32 | ) 33 | # swap channels from Caffe BGR to RGB 34 | img_mean_np2 = img_mean_np 35 | img_mean_np[0] = img_mean_np2[2] 36 | img_mean_np[2] = img_mean_np2[0] 37 | return mx.nd.array(img_mean_np) 38 | 39 | 40 | def main(): 41 | parser = argparse.ArgumentParser(description='Caffe prototxt to mxnet model parameter converter.\ 42 | Note that only basic functions are implemented. You are welcomed to contribute to this file.') 43 | parser.add_argument('mean_image_proto', help='The protobuf file in Caffe format') 44 | parser.add_argument('save_name', help='The name of the output file prefix') 45 | args = parser.parse_args() 46 | nd = protoBlobFileToND(args.mean_image_proto) 47 | mx.nd.save(args.save_name + ".nd", {"mean_image": nd}) 48 | 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /dataset/testdb.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | from .imdb import Imdb 4 | 5 | 6 | class TestDB(Imdb): 7 | """ 8 | A simple wrapper class for converting list of image to Imdb during testing 9 | 10 | Parameters: 11 | ---------- 12 | images : str or list of str 13 | image path or list of images, if directory and extension not 14 | specified, root_dir and extension are required 15 | root_dir : str or None 16 | directory of input images, optional if image path already 17 | has full directory information 18 | extension : str or None 19 | image extension, eg. ".jpg", optional 20 | """ 21 | def __init__(self, images, root_dir=None, extension=None): 22 | if not isinstance(images, list): 23 | images = [images] 24 | num_images = len(images) 25 | super(TestDB, self).__init__("test" + str(num_images)) 26 | self.image_set_index = images 27 | self.num_images = num_images 28 | self.root_dir = root_dir if root_dir else None 29 | self.extension = extension if extension else None 30 | 31 | 32 | def image_path_from_index(self, index): 33 | """ 34 | given image index, return full path 35 | 36 | Parameters: 37 | ---------- 38 | index: int 39 | index of a specific image 40 | Returns 41 | ---------- 42 | path of this image 43 | """ 44 | name = self.image_set_index[index] 45 | if self.extension: 46 | name += self.extension 47 | if self.root_dir: 48 | name = os.path.join(self.root_dir, name) 49 | assert os.path.exists(name), 'Path does not exist: {}'.format(name) 50 | return name 51 | 52 | def label_from_index(self, index): 53 | return RuntimeError("Testdb does not support label loading") 54 | -------------------------------------------------------------------------------- /deploy.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import tools.find_mxnet 4 | import mxnet as mx 5 | import os 6 | import importlib 7 | import sys 8 | from symbol.symbol_factory import get_symbol 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='Convert a trained model to deploy model') 12 | parser.add_argument('--network', dest='network', type=str, default='vgg16_reduced', 13 | help='which network to use') 14 | parser.add_argument('--epoch', dest='epoch', help='epoch of trained model', 15 | default=0, type=int) 16 | parser.add_argument('--prefix', dest='prefix', help='trained model prefix', 17 | default=os.path.join(os.getcwd(), 'model', 'ssd_'), type=str) 18 | parser.add_argument('--data-shape', dest='data_shape', type=int, default=300, 19 | help='data shape') 20 | parser.add_argument('--num-class', dest='num_classes', help='number of classes', 21 | default=20, type=int) 22 | parser.add_argument('--nms', dest='nms_thresh', type=float, default=0.5, 23 | help='non-maximum suppression threshold, default 0.5') 24 | parser.add_argument('--force', dest='force_nms', type=bool, default=True, 25 | help='force non-maximum suppression on different class') 26 | parser.add_argument('--topk', dest='nms_topk', type=int, default=400, 27 | help='apply nms only to top k detections based on scores.') 28 | args = parser.parse_args() 29 | return args 30 | 31 | if __name__ == '__main__': 32 | args = parse_args() 33 | net = get_symbol(args.network, args.data_shape, 34 | num_classes=args.num_classes, nms_thresh=args.nms_thresh, 35 | force_suppress=args.force_nms, nms_topk=args.nms_topk) 36 | if args.prefix.endswith('_'): 37 | prefix = args.prefix + args.network + '_' + str(args.data_shape) 38 | else: 39 | prefix = args.prefix 40 | _, arg_params, aux_params = mx.model.load_checkpoint(prefix, args.epoch) 41 | # new name 42 | tmp = prefix.rsplit('/', 1) 43 | save_prefix = '/deploy_'.join(tmp) 44 | mx.model.save_checkpoint(save_prefix, args.epoch, net, arg_params, aux_params) 45 | print("Saved model: {}-{:04d}.params".format(save_prefix, args.epoch)) 46 | print("Saved symbol: {}-symbol.json".format(save_prefix)) 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # User defined 2 | data/* 3 | !data/demo/ 4 | model/* 5 | !model/README.md 6 | cache/* 7 | tools/caffe_converter/*.params 8 | tools/caffe_converter/*.json 9 | tools/caffe_converter/*.prototxt 10 | tools/caffe_converter/*.caffemodel 11 | .DS_Store 12 | 13 | # Compiled Object files 14 | *.slo 15 | *.lo 16 | *.o 17 | *.obj 18 | *.d 19 | 20 | # Precompiled Headers 21 | *.gch 22 | *.pch 23 | 24 | # Compiled Dynamic libraries 25 | *.so 26 | *.dylib 27 | *.dll 28 | 29 | # Fortran module files 30 | *.mod 31 | 32 | # Compiled Static libraries 33 | *.lai 34 | *.la 35 | *.a 36 | *.lib 37 | 38 | # Executables 39 | *.exe 40 | *.out 41 | *.app 42 | *~ 43 | 44 | *.pyc 45 | .Rhistory 46 | *log 47 | Debug 48 | *suo 49 | tracker 50 | 51 | # vim 52 | *.swp 53 | *.swo 54 | *.swn 55 | .vimrc 56 | .ycm_extra_conf.py 57 | .ycm_extra_conf.pyc 58 | 59 | # Byte-compiled / optimized / DLL files 60 | __pycache__/ 61 | *.py[cod] 62 | *$py.class 63 | 64 | # C extensions 65 | *.so 66 | 67 | # Distribution / packaging 68 | .Python 69 | env/ 70 | build/ 71 | develop-eggs/ 72 | dist/ 73 | downloads/ 74 | eggs/ 75 | .eggs/ 76 | lib/ 77 | lib64/ 78 | parts/ 79 | sdist/ 80 | var/ 81 | *.egg-info/ 82 | .installed.cfg 83 | *.egg 84 | 85 | # PyInstaller 86 | # Usually these files are written by a python script from a template 87 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 88 | *.manifest 89 | *.spec 90 | 91 | # Installer logs 92 | pip-log.txt 93 | pip-delete-this-directory.txt 94 | 95 | # Unit test / coverage reports 96 | htmlcov/ 97 | .tox/ 98 | .coverage 99 | .coverage.* 100 | .cache 101 | nosetests.xml 102 | coverage.xml 103 | *,cover 104 | .hypothesis/ 105 | 106 | # Translations 107 | *.mo 108 | *.pot 109 | 110 | # Django stuff: 111 | *.log 112 | local_settings.py 113 | 114 | # Flask stuff: 115 | instance/ 116 | .webassets-cache 117 | 118 | # Scrapy stuff: 119 | .scrapy 120 | 121 | # Sphinx documentation 122 | docs/_build/ 123 | 124 | # PyBuilder 125 | target/ 126 | 127 | # IPython Notebook 128 | .ipynb_checkpoints 129 | 130 | # pyenv 131 | .python-version 132 | 133 | # celery beat schedule file 134 | celerybeat-schedule 135 | 136 | # dotenv 137 | .env 138 | 139 | # virtualenv 140 | venv/ 141 | ENV/ 142 | 143 | # ide project settings 144 | .spyderproject 145 | .idea 146 | 147 | # Rope project settings 148 | .ropeproject 149 | -------------------------------------------------------------------------------- /train/metric.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | 4 | 5 | class MultiBoxMetric(mx.metric.EvalMetric): 6 | """Calculate metrics for Multibox training """ 7 | def __init__(self, eps=1e-8): 8 | super(MultiBoxMetric, self).__init__('MultiBox') 9 | self.eps = eps 10 | self.num = 2 11 | self.name = ['CrossEntropy', 'SmoothL1'] 12 | self.reset() 13 | 14 | def reset(self): 15 | """ 16 | override reset behavior 17 | """ 18 | if getattr(self, 'num', None) is None: 19 | self.num_inst = 0 20 | self.sum_metric = 0.0 21 | else: 22 | self.num_inst = [0] * self.num 23 | self.sum_metric = [0.0] * self.num 24 | 25 | def update(self, labels, preds): 26 | """ 27 | Implementation of updating metrics 28 | """ 29 | # get generated multi label from network 30 | cls_prob = preds[0].asnumpy() 31 | loc_loss = preds[1].asnumpy() 32 | cls_label = preds[2].asnumpy() 33 | valid_count = np.sum(cls_label >= 0) 34 | # overall accuracy & object accuracy 35 | label = cls_label.flatten() 36 | # in case you have a 'other' class 37 | label[np.where(label >= cls_prob.shape[1])] = 0 38 | mask = np.where(label >= 0)[0] 39 | indices = np.int64(label[mask]) 40 | prob = cls_prob.transpose((0, 2, 1)).reshape((-1, cls_prob.shape[1])) 41 | prob = prob[mask, indices] 42 | self.sum_metric[0] += (-np.log(prob + self.eps)).sum() 43 | self.num_inst[0] += valid_count 44 | # smoothl1loss 45 | self.sum_metric[1] += np.sum(loc_loss) 46 | self.num_inst[1] += valid_count 47 | 48 | def get(self): 49 | """Get the current evaluation result. 50 | Override the default behavior 51 | 52 | Returns 53 | ------- 54 | name : str 55 | Name of the metric. 56 | value : float 57 | Value of the evaluation. 58 | """ 59 | if self.num is None: 60 | if self.num_inst == 0: 61 | return (self.name, float('nan')) 62 | else: 63 | return (self.name, self.sum_metric / self.num_inst) 64 | else: 65 | names = ['%s'%(self.name[i]) for i in range(self.num)] 66 | values = [x / y if y != 0 else float('nan') \ 67 | for x, y in zip(self.sum_metric, self.num_inst)] 68 | return (names, values) 69 | -------------------------------------------------------------------------------- /tools/caffe_converter/README.md: -------------------------------------------------------------------------------- 1 | # Convert Caffe Model to Mxnet Format 2 | 3 | ### Build (Linux) 4 | 5 | Either [Caffe's python package](http://caffe.berkeleyvision.org/installation.html) or [Google protobuf](https://developers.google.com/protocol-buffers/?hl=en) is required. The latter is often much easier to install: 6 | 7 | 1. We first install the protobuf compiler. If you compiled mxnet with `USE_DIST_KVSTORE = 1` then it is already built. Otherwise, install `protobuf-compiler` by your favor package manager, e.g. `sudo apt-get install protobuf-compiler` for ubuntu and `sudo yum install protobuf-compiler` for redhat/fedora. 8 | 9 | 2. Then install the protobuf's python binding. For example `sudo pip install protobuf` 10 | 11 | Now we can build the tool by running `make` in the current directory. 12 | 13 | ### Build (Windows) 14 | 15 | Note: this tool currently only works on python 2. 16 | 17 | We must make sure that the installed python binding and protobuf compiler are using the same version of protobuf, 18 | so we install the bindings first, and then install the corresponding compiler. 19 | 20 | 1. Install the protobuf bindings. At time of writing, the conda package manager has the most up to date version. Either run `conda install -c conda-forge protobuf` or `pip install protobuf` 21 | 2. Download the win32 build of protoc from [Protocol Buffers Releases](https://github.com/google/protobuf/releases). Make sure to download the version that corresponds to the version of the bindings. Extract to any location then add that location to your `PATH` 22 | 3. Run `make_win32.bat` to build the package 23 | 24 | 25 | ### How to use 26 | To convert ssd caffemodels, Use: `python convert_model.py prototxt caffemodel outputprefix` 27 | 28 | Linux: Use `./run.sh model_name` to download and convert a model. E.g. `./run.sh vgg19` 29 | 30 | Windows: Use `python convert_model.py prototxt caffemodel outputprefix` 31 | For example: `python convert_model.py VGG_ILSVRC_16_layers_deploy.prototxt VGG_ILSVRC_16_layers.caffemodel vgg16` 32 | 33 | 34 | ### Note 35 | 36 | * We have verified the results of VGG_16/VGG_19 model and BVLC_googlenet results from Caffe model zoo. 37 | * The tool only supports single input and single output network. 38 | * The tool can only work with the L2LayerParameter in Caffe. 39 | * Caffe uses a convention for multi-strided pooling output shape inconsistent with MXNet 40 | * This importer doesn't handle this problem properly yet 41 | * And example of this failure is importing bvlc_Googlenet. The user needs to add padding to stride-2 pooling to make this work right now. 42 | -------------------------------------------------------------------------------- /config/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | from .utils import DotDict, namedtuple_with_defaults, zip_namedtuple, config_as_dict 4 | 5 | RandCropper = namedtuple_with_defaults('RandCropper', 6 | 'min_crop_scales, max_crop_scales, \ 7 | min_crop_aspect_ratios, max_crop_aspect_ratios, \ 8 | min_crop_overlaps, max_crop_overlaps, \ 9 | min_crop_sample_coverages, max_crop_sample_coverages, \ 10 | min_crop_object_coverages, max_crop_object_coverages, \ 11 | max_crop_trials', 12 | [0.0, 1.0, 13 | 0.5, 2.0, 14 | 0.0, 1.0, 15 | 0.0, 1.0, 16 | 0.0, 1.0, 17 | 25]) 18 | 19 | RandPadder = namedtuple_with_defaults('RandPadder', 20 | 'rand_pad_prob, max_pad_scale, fill_value', 21 | [0.0, 1.0, 127]) 22 | 23 | ColorJitter = namedtuple_with_defaults('ColorJitter', 24 | 'random_hue_prob, max_random_hue, \ 25 | random_saturation_prob, max_random_saturation, \ 26 | random_illumination_prob, max_random_illumination, \ 27 | random_contrast_prob, max_random_contrast', 28 | [0.0, 18, 29 | 0.0, 32, 30 | 0.0, 32, 31 | 0.0, 0.5]) 32 | 33 | 34 | cfg = DotDict() 35 | cfg.ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 36 | 37 | # training configs 38 | cfg.train = DotDict() 39 | # random cropping samplers 40 | cfg.train.rand_crop_samplers = [ 41 | RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.1), 42 | RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.3), 43 | RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.5), 44 | RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.7), 45 | RandCropper(min_crop_scales=0.3, min_crop_overlaps=0.9),] 46 | cfg.train.crop_emit_mode = 'center' 47 | # cfg.train.emit_overlap_thresh = 0.4 48 | # random padding 49 | cfg.train.rand_pad = RandPadder(rand_pad_prob=0.5, max_pad_scale=4.0) 50 | # random color jitter 51 | cfg.train.color_jitter = ColorJitter(random_hue_prob=0.5, random_saturation_prob=0.5, 52 | random_illumination_prob=0.5, random_contrast_prob=0.5) 53 | cfg.train.inter_method = 10 # random interpolation 54 | cfg.train.rand_mirror_prob = 0.5 55 | cfg.train.shuffle = True 56 | cfg.train.seed = 233 57 | cfg.train.preprocess_threads = 48 58 | cfg.train = config_as_dict(cfg.train) # convert to normal dict 59 | 60 | # validation 61 | cfg.valid = DotDict() 62 | cfg.valid.rand_crop_samplers = [] 63 | cfg.valid.rand_pad = RandPadder() 64 | cfg.valid.color_jitter = ColorJitter() 65 | cfg.valid.rand_mirror_prob = 0 66 | cfg.valid.shuffle = False 67 | cfg.valid.seed = 0 68 | cfg.valid.preprocess_threads = 32 69 | cfg.valid = config_as_dict(cfg.valid) # convert to normal dict 70 | -------------------------------------------------------------------------------- /symbol/README.md: -------------------------------------------------------------------------------- 1 | ## How to compose SSD network on top of mainstream classification networks 2 | 3 | 1. Have the base network ready in this directory as `name.py`, such as `inceptionv3.py`. 4 | 2. Add configuration to `symbol_factory.py`, an example would be: 5 | ``` 6 | if network == 'vgg16_reduced': 7 | if data_shape >= 448: 8 | from_layers = ['relu4_3', 'relu7', '', '', '', '', ''] 9 | num_filters = [512, -1, 512, 256, 256, 256, 256] 10 | strides = [-1, -1, 2, 2, 2, 2, 1] 11 | pads = [-1, -1, 1, 1, 1, 1, 1] 12 | sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \ 13 | [.75, .8216], [.9, .9721]] 14 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 15 | [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]] 16 | normalizations = [20, -1, -1, -1, -1, -1, -1] 17 | steps = [] if data_shape != 512 else [x / 512.0 for x in 18 | [8, 16, 32, 64, 128, 256, 512]] 19 | else: 20 | from_layers = ['relu4_3', 'relu7', '', '', '', ''] 21 | num_filters = [512, -1, 512, 256, 256, 256] 22 | strides = [-1, -1, 2, 2, 1, 1] 23 | pads = [-1, -1, 1, 1, 0, 0] 24 | sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]] 25 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 26 | [1,2,.5], [1,2,.5]] 27 | normalizations = [20, -1, -1, -1, -1, -1] 28 | steps = [] if data_shape != 300 else [x / 300.0 for x in [8, 16, 32, 64, 100, 300]] 29 | return locals() 30 | elif network == 'inceptionv3': 31 | from_layers = ['ch_concat_mixed_7_chconcat', 'ch_concat_mixed_10_chconcat', '', '', '', ''] 32 | num_filters = [-1, -1, 512, 256, 256, 128] 33 | strides = [-1, -1, 2, 2, 2, 2] 34 | pads = [-1, -1, 1, 1, 1, 1] 35 | sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]] 36 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 37 | [1,2,.5], [1,2,.5]] 38 | normalizations = -1 39 | steps = [] 40 | return locals() 41 | ``` 42 | Here `from_layers` indicate the feature layer you would like to extract from the base network. 43 | `''` indicate that we want add extra new layers on top of the last feature layer, 44 | and the number of filters must be specified in `num_filters`. Similarly, `strides` and `pads` 45 | are required to compose these new layers. `sizes` and `ratios` are the parameters controlling 46 | the anchor generation algorithm. `normalizations` is used to normalize and rescale feature if 47 | not `-1`. `steps`: optional, used to calculate the anchor sliding steps. 48 | 49 | 3. Train or test with arguments `--network name --data-shape xxx --pretrained pretrained_model` 50 | -------------------------------------------------------------------------------- /tools/visualize_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import find_mxnet 3 | import mxnet as mx 4 | import argparse 5 | import sys, os 6 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'symbol')) 7 | import symbol_factory 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='network visualization') 11 | parser.add_argument('--network', dest='network', type=str, default='vgg16_reduced', 12 | help='the cnn to use') 13 | parser.add_argument('--num-classes', dest='num_classes', type=int, default=20, 14 | help='the number of classes') 15 | parser.add_argument('--data-shape', dest='data_shape', type=int, default=300, 16 | help='set image\'s shape') 17 | parser.add_argument('--train', dest='train', type=bool, default=False, help='show train net') 18 | parser.add_argument('--output-dir', dest='output_dir', type=str, default=os.path.dirname(__file__), 19 | help='path of the output visualized net') 20 | parser.add_argument('--print-net', dest='print_net', type=bool, default=False, 21 | help='print the network as json') 22 | args = parser.parse_args() 23 | return args 24 | 25 | def net_visualization(network=None, 26 | num_classes=None, 27 | data_shape=None, 28 | train=None, 29 | output_dir=None, 30 | print_net=False, 31 | net=None): 32 | # if you specify your net, this means that you are calling this function from somewhere else.. 33 | if net is None: 34 | if not train: 35 | net = symbol_factory.get_symbol(network, data_shape, num_classes=num_classes) 36 | else: 37 | net = symbol_factory.get_symbol_train(network, data_shape, num_classes=num_classes) 38 | 39 | if not train: 40 | a = mx.viz.plot_network(net, shape={"data": (1, 3, data_shape, data_shape)}, \ 41 | node_attrs={"shape": 'rect', "fixedsize": 'false'}) 42 | filename = "ssd_" + network + '_' + str(data_shape)+'_'+'test' 43 | else: 44 | a = mx.viz.plot_network(net, shape=None, \ 45 | node_attrs={"shape": 'rect', "fixedsize": 'false'}) 46 | filename = "ssd_" + network + '_' + 'train' 47 | 48 | a.render(os.path.join(output_dir, filename)) 49 | if print_net: 50 | print(net.tojson()) 51 | 52 | if __name__ == '__main__': 53 | args = parse_args() 54 | net_visualization(network=args.network, 55 | num_classes=args.num_classes, 56 | data_shape=args.data_shape, 57 | train=args.train, 58 | output_dir=args.output_dir, 59 | print_net=args.print_net) 60 | -------------------------------------------------------------------------------- /tools/image_processing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | def rescale(im, target_size, max_size): 5 | """ 6 | only resize input image to target size and return scale 7 | 8 | Parameters: 9 | ---------- 10 | im : numpy.array 11 | BGR image input by opencv 12 | target_size: int 13 | one dimensional size (the short side) 14 | max_size: int 15 | one dimensional max size (the long side) 16 | 17 | Returns: 18 | ---------- 19 | numpy.array, rescaled image 20 | """ 21 | im_shape = im.shape 22 | im_size_min = np.min(im_shape[0:2]) 23 | im_size_max = np.min(im_shape[0:2]) 24 | im_scale = float(target_size) / float(im_size_min) 25 | # prevent bigger axis from being more than max_size: 26 | if np.round(im_scale * im_size_max) > max_size: 27 | im_scale = float(max_size) / float(im_size_max) 28 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) 29 | return im, im_scale 30 | 31 | def resize(im, target_size, interp_method=cv2.INTER_LINEAR): 32 | """ 33 | resize image to target size regardless of aspect ratio 34 | 35 | Parameters: 36 | ---------- 37 | im : numpy.array 38 | BGR image input by opencv 39 | target_size : tuple (int, int) 40 | (h, w) two dimensional size 41 | Returns: 42 | ---------- 43 | numpy.array, resized image 44 | """ 45 | return cv2.resize(im, target_size, interpolation=interp_method) 46 | 47 | def transform(im, pixel_means): 48 | """ 49 | transform into mxnet tensor 50 | substract pixel size and transform to correct format 51 | 52 | Parameters: 53 | ---------- 54 | im : numpy.array 55 | [height, width, channel] in BGR 56 | pixel_means : list 57 | [[[R, G, B pixel means]]] 58 | 59 | Returns: 60 | ---------- 61 | numpy.array as in shape [channel, height, width] 62 | """ 63 | im = im.copy() 64 | im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)] 65 | im = im.astype(float) 66 | im -= pixel_means 67 | # put channel first 68 | channel_swap = (2, 0, 1) 69 | im_tensor = im.transpose(channel_swap) 70 | return im_tensor 71 | 72 | 73 | def transform_inverse(im_tensor, pixel_means): 74 | """ 75 | transform from mxnet im_tensor to ordinary RGB image 76 | im_tensor is limited to one image 77 | 78 | Parameters: 79 | ---------- 80 | im_tensor : numpy.array 81 | in shape [batch, channel, height, width] 82 | pixel_means: list 83 | [[[R, G, B pixel means]]] 84 | 85 | Returns: 86 | ---------- 87 | im [height, width, channel(RGB)] 88 | """ 89 | assert im_tensor.shape[0] == 1 90 | im_tensor = im_tensor.copy() 91 | # put channel back 92 | channel_swap = (0, 2, 3, 1) 93 | im_tensor = im_tensor.transpose(channel_swap) 94 | im = im_tensor[0] 95 | assert im.shape[2] == 3 96 | im += pixel_means 97 | im = im.astype(np.uint8) 98 | return im 99 | -------------------------------------------------------------------------------- /config/utils.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | class DotDict(dict): 4 | """ 5 | Simple class for dot access elements in dict, support nested initialization 6 | Example: 7 | d = DotDict({'child': 'dotdict'}, name='dotdict', index=1, contents=['a', 'b']) 8 | # add new key 9 | d.new_key = '!' # or d['new_key'] = '!' 10 | # update values 11 | d.new_key = '!!!' 12 | # delete keys 13 | del d.new_key 14 | """ 15 | def __init__(self, *args, **kwargs): 16 | super(DotDict, self).__init__(*args, **kwargs) 17 | for arg in args: 18 | if isinstance(arg, dict): 19 | for k, v in arg.items(): 20 | self[k] = v 21 | 22 | if kwargs: 23 | for k, v in kwargs.items(): 24 | self[k] = v 25 | 26 | def __getattr__(self, attr): 27 | return self.get(attr) 28 | 29 | def __setattr__(self, key, value): 30 | self.__setitem__(key, value) 31 | 32 | def __setitem__(self, key, value): 33 | super(DotDict, self).__setitem__(key, value) 34 | self.__dict__.update({key: value}) 35 | 36 | def __delattr__(self, item): 37 | self.__delitem__(item) 38 | 39 | def __delitem__(self, key): 40 | super(DotDict, self).__delitem__(key) 41 | del self.__dict__[key] 42 | 43 | 44 | def namedtuple_with_defaults(typename, field_names, default_values=()): 45 | """ create a namedtuple with default values """ 46 | T = collections.namedtuple(typename, field_names) 47 | T.__new__.__defaults__ = (None, ) * len(T._fields) 48 | if isinstance(default_values, collections.Mapping): 49 | prototype = T(**default_values) 50 | else: 51 | prototype = T(*default_values) 52 | T.__new__.__defaults__ = tuple(prototype) 53 | return T 54 | 55 | def merge_dict(a, b): 56 | """ merge dict a, b, with b overriding keys in a """ 57 | c = a.copy() 58 | c.update(b) 59 | return c 60 | 61 | def zip_namedtuple(nt_list): 62 | """ accept list of namedtuple, return a dict of zipped fields """ 63 | if not nt_list: 64 | return dict() 65 | if not isinstance(nt_list, list): 66 | nt_list = [nt_list] 67 | for nt in nt_list: 68 | assert type(nt) == type(nt_list[0]) 69 | ret = {k : [v] for k, v in nt_list[0]._asdict().items()} 70 | for nt in nt_list[1:]: 71 | for k, v in nt._asdict().items(): 72 | ret[k].append(v) 73 | return ret 74 | 75 | def config_as_dict(cfg): 76 | """ convert raw configuration to unified dictionary """ 77 | ret = cfg.__dict__.copy() 78 | # random cropping params 79 | del ret['rand_crop_samplers'] 80 | assert isinstance(cfg.rand_crop_samplers, list) 81 | ret = merge_dict(ret, zip_namedtuple(cfg.rand_crop_samplers)) 82 | num_crop_sampler = len(cfg.rand_crop_samplers) 83 | ret['num_crop_sampler'] = num_crop_sampler # must specify the # 84 | ret['rand_crop_prob'] = 1.0 / (num_crop_sampler + 1) * num_crop_sampler 85 | # random padding params 86 | del ret['rand_pad'] 87 | ret = merge_dict(ret, cfg.rand_pad._asdict()) 88 | # color jitter 89 | del ret['color_jitter'] 90 | ret = merge_dict(ret, cfg.color_jitter._asdict()) 91 | return ret 92 | -------------------------------------------------------------------------------- /dataset/imdb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os.path as osp 3 | 4 | class Imdb(object): 5 | """ 6 | Base class for dataset loading 7 | 8 | Parameters: 9 | ---------- 10 | name : str 11 | name of dataset 12 | """ 13 | def __init__(self, name): 14 | self.name = name 15 | self.classes = [] 16 | self.num_classes = 0 17 | self.image_set_index = None 18 | self.num_images = 0 19 | self.labels = None 20 | self.padding = 0 21 | 22 | def image_path_from_index(self, index): 23 | """ 24 | load image full path given specified index 25 | 26 | Parameters: 27 | ---------- 28 | index : int 29 | index of image requested in dataset 30 | 31 | Returns: 32 | ---------- 33 | full path of specified image 34 | """ 35 | raise NotImplementedError 36 | 37 | def label_from_index(self, index): 38 | """ 39 | load ground-truth of image given specified index 40 | 41 | Parameters: 42 | ---------- 43 | index : int 44 | index of image requested in dataset 45 | 46 | Returns: 47 | ---------- 48 | object ground-truths, in format 49 | numpy.array([id, xmin, ymin, xmax, ymax]...) 50 | """ 51 | raise NotImplementedError 52 | 53 | def save_imglist(self, fname=None, root=None, shuffle=False): 54 | """ 55 | save imglist to disk 56 | 57 | Parameters: 58 | ---------- 59 | fname : str 60 | saved filename 61 | """ 62 | def progress_bar(count, total, suffix=''): 63 | import sys 64 | bar_len = 24 65 | filled_len = int(round(bar_len * count / float(total))) 66 | 67 | percents = round(100.0 * count / float(total), 1) 68 | bar = '=' * filled_len + '-' * (bar_len - filled_len) 69 | sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', suffix)) 70 | sys.stdout.flush() 71 | 72 | str_list = [] 73 | for index in range(self.num_images): 74 | progress_bar(index, self.num_images) 75 | label = self.label_from_index(index) 76 | if label.size < 1: 77 | continue 78 | path = self.image_path_from_index(index) 79 | if root: 80 | path = osp.relpath(path, root) 81 | str_list.append('\t'.join([str(index), str(2), str(label.shape[1])] \ 82 | + ["{0:.4f}".format(x) for x in label.ravel()] + [path,]) + '\n') 83 | if str_list: 84 | if shuffle: 85 | import random 86 | random.shuffle(str_list) 87 | if not fname: 88 | fname = self.name + '.lst' 89 | with open(fname, 'w') as f: 90 | for line in str_list: 91 | f.write(line) 92 | else: 93 | raise RuntimeError("No image in imdb") 94 | 95 | def _load_class_names(self, filename, dirname): 96 | """ 97 | load class names from text file 98 | 99 | Parameters: 100 | ---------- 101 | filename: str 102 | file stores class names 103 | dirname: str 104 | file directory 105 | """ 106 | full_path = osp.join(dirname, filename) 107 | classes = [] 108 | with open(full_path, 'r') as f: 109 | classes = [l.strip() for l in f.readlines()] 110 | return classes 111 | -------------------------------------------------------------------------------- /dataset/concat_db.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .imdb import Imdb 3 | import random 4 | 5 | class ConcatDB(Imdb): 6 | """ 7 | ConcatDB is used to concatenate multiple imdbs to form a larger db. 8 | It is very useful to combine multiple dataset with same classes. 9 | Parameters 10 | ---------- 11 | imdbs : Imdb or list of Imdb 12 | Imdbs to be concatenated 13 | shuffle : bool 14 | whether to shuffle the initial list 15 | """ 16 | def __init__(self, imdbs, shuffle): 17 | super(ConcatDB, self).__init__('concatdb') 18 | if not isinstance(imdbs, list): 19 | imdbs = [imdbs] 20 | self.imdbs = imdbs 21 | self._check_classes() 22 | self.image_set_index = self._load_image_set_index(shuffle) 23 | 24 | def _check_classes(self): 25 | """ 26 | check input imdbs, make sure they have same classes 27 | """ 28 | try: 29 | self.classes = self.imdbs[0].classes 30 | self.num_classes = len(self.classes) 31 | except AttributeError: 32 | # fine, if no classes is provided 33 | pass 34 | 35 | if self.num_classes > 0: 36 | for db in self.imdbs: 37 | assert self.classes == db.classes, "Multiple imdb must have same classes" 38 | 39 | def _load_image_set_index(self, shuffle): 40 | """ 41 | get total number of images, init indices 42 | 43 | Parameters 44 | ---------- 45 | shuffle : bool 46 | whether to shuffle the initial indices 47 | """ 48 | self.num_images = 0 49 | for db in self.imdbs: 50 | self.num_images += db.num_images 51 | indices = list(range(self.num_images)) 52 | if shuffle: 53 | random.shuffle(indices) 54 | return indices 55 | 56 | def _locate_index(self, index): 57 | """ 58 | given index, find out sub-db and sub-index 59 | 60 | Parameters 61 | ---------- 62 | index : int 63 | index of a specific image 64 | 65 | Returns 66 | ---------- 67 | a tuple (sub-db, sub-index) 68 | """ 69 | assert index >= 0 and index < self.num_images, "index out of range" 70 | pos = self.image_set_index[index] 71 | for k, v in enumerate(self.imdbs): 72 | if pos >= v.num_images: 73 | pos -= v.num_images 74 | else: 75 | return (k, pos) 76 | 77 | def image_path_from_index(self, index): 78 | """ 79 | given image index, find out full path 80 | 81 | Parameters 82 | ---------- 83 | index: int 84 | index of a specific image 85 | 86 | Returns 87 | ---------- 88 | full path of this image 89 | """ 90 | assert self.image_set_index is not None, "Dataset not initialized" 91 | pos = self.image_set_index[index] 92 | n_db, n_index = self._locate_index(index) 93 | return self.imdbs[n_db].image_path_from_index(n_index) 94 | 95 | def label_from_index(self, index): 96 | """ 97 | given image index, return preprocessed ground-truth 98 | 99 | Parameters 100 | ---------- 101 | index: int 102 | index of a specific image 103 | 104 | Returns 105 | ---------- 106 | ground-truths of this image 107 | """ 108 | assert self.image_set_index is not None, "Dataset not initialized" 109 | pos = self.image_set_index[index] 110 | n_db, n_index = self._locate_index(index) 111 | return self.imdbs[n_db].label_from_index(n_index) 112 | -------------------------------------------------------------------------------- /dataset/mscoco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | import numpy as np 4 | from .imdb import Imdb 5 | from .pycocotools.coco import COCO 6 | 7 | 8 | class Coco(Imdb): 9 | """ 10 | Implementation of Imdb for MSCOCO dataset: https://http://mscoco.org 11 | 12 | Parameters: 13 | ---------- 14 | anno_file : str 15 | annotation file for coco, a json file 16 | image_dir : str 17 | image directory for coco images 18 | shuffle : bool 19 | whether initially shuffle image list 20 | 21 | """ 22 | def __init__(self, anno_file, image_dir, shuffle=True, names='mscoco.names'): 23 | assert os.path.isfile(anno_file), "Invalid annotation file: " + anno_file 24 | basename = os.path.splitext(os.path.basename(anno_file))[0] 25 | super(Coco, self).__init__('coco_' + basename) 26 | self.image_dir = image_dir 27 | 28 | self.classes = self._load_class_names(names, 29 | os.path.join(os.path.dirname(__file__), 'names')) 30 | 31 | self.num_classes = len(self.classes) 32 | self._load_all(anno_file, shuffle) 33 | self.num_images = len(self.image_set_index) 34 | 35 | 36 | def image_path_from_index(self, index): 37 | """ 38 | given image index, find out full path 39 | 40 | Parameters: 41 | ---------- 42 | index: int 43 | index of a specific image 44 | Returns: 45 | ---------- 46 | full path of this image 47 | """ 48 | assert self.image_set_index is not None, "Dataset not initialized" 49 | name = self.image_set_index[index] 50 | image_file = os.path.join(self.image_dir, 'images', name) 51 | assert os.path.isfile(image_file), 'Path does not exist: {}'.format(image_file) 52 | return image_file 53 | 54 | def label_from_index(self, index): 55 | """ 56 | given image index, return preprocessed ground-truth 57 | 58 | Parameters: 59 | ---------- 60 | index: int 61 | index of a specific image 62 | Returns: 63 | ---------- 64 | ground-truths of this image 65 | """ 66 | assert self.labels is not None, "Labels not processed" 67 | return self.labels[index] 68 | 69 | def _load_all(self, anno_file, shuffle): 70 | """ 71 | initialize all entries given annotation json file 72 | 73 | Parameters: 74 | ---------- 75 | anno_file: str 76 | annotation json file 77 | shuffle: bool 78 | whether to shuffle image list 79 | """ 80 | image_set_index = [] 81 | labels = [] 82 | coco = COCO(anno_file) 83 | img_ids = coco.getImgIds() 84 | for img_id in img_ids: 85 | # filename 86 | image_info = coco.loadImgs(img_id)[0] 87 | filename = image_info["file_name"] 88 | subdir = filename.split('_')[1] 89 | height = image_info["height"] 90 | width = image_info["width"] 91 | # label 92 | anno_ids = coco.getAnnIds(imgIds=img_id) 93 | annos = coco.loadAnns(anno_ids) 94 | label = [] 95 | for anno in annos: 96 | cat_id = int(anno["category_id"]) 97 | bbox = anno["bbox"] 98 | assert len(bbox) == 4 99 | xmin = float(bbox[0]) / width 100 | ymin = float(bbox[1]) / height 101 | xmax = xmin + float(bbox[2]) / width 102 | ymax = ymin + float(bbox[3]) / height 103 | label.append([cat_id, xmin, ymin, xmax, ymax, 0]) 104 | if label: 105 | labels.append(np.array(label)) 106 | image_set_index.append(os.path.join(subdir, filename)) 107 | 108 | if shuffle: 109 | import random 110 | indices = list(range(len(image_set_index))) 111 | random.shuffle(indices) 112 | image_set_index = [image_set_index[i] for i in indices] 113 | labels = [labels[i] for i in indices] 114 | # store the results 115 | self.image_set_index = image_set_index 116 | self.labels = labels 117 | -------------------------------------------------------------------------------- /evaluate/evaluate_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | import importlib 5 | import mxnet as mx 6 | from dataset.iterator import DetRecordIter 7 | from config.config import cfg 8 | from evaluate.eval_metric import MApMetric, VOC07MApMetric 9 | import logging 10 | from symbol.symbol_factory import get_symbol 11 | 12 | def evaluate_net(net, path_imgrec, num_classes, mean_pixels, data_shape, 13 | model_prefix, epoch, ctx=mx.cpu(), batch_size=1, 14 | path_imglist="", nms_thresh=0.45, force_nms=False, 15 | ovp_thresh=0.5, use_difficult=False, class_names=None, 16 | voc07_metric=False, frequent=20): 17 | """ 18 | evalute network given validation record file 19 | 20 | Parameters: 21 | ---------- 22 | net : str or None 23 | Network name or use None to load from json without modifying 24 | path_imgrec : str 25 | path to the record validation file 26 | path_imglist : str 27 | path to the list file to replace labels in record file, optional 28 | num_classes : int 29 | number of classes, not including background 30 | mean_pixels : tuple 31 | (mean_r, mean_g, mean_b) 32 | data_shape : tuple or int 33 | (3, height, width) or height/width 34 | model_prefix : str 35 | model prefix of saved checkpoint 36 | epoch : int 37 | load model epoch 38 | ctx : mx.ctx 39 | mx.gpu() or mx.cpu() 40 | batch_size : int 41 | validation batch size 42 | nms_thresh : float 43 | non-maximum suppression threshold 44 | force_nms : boolean 45 | whether suppress different class objects 46 | ovp_thresh : float 47 | AP overlap threshold for true/false postives 48 | use_difficult : boolean 49 | whether to use difficult objects in evaluation if applicable 50 | class_names : comma separated str 51 | class names in string, must correspond to num_classes if set 52 | voc07_metric : boolean 53 | whether to use 11-point evluation as in VOC07 competition 54 | frequent : int 55 | frequency to print out validation status 56 | """ 57 | # set up logger 58 | logging.basicConfig() 59 | logger = logging.getLogger() 60 | logger.setLevel(logging.INFO) 61 | 62 | # args 63 | if isinstance(data_shape, int): 64 | data_shape = (3, data_shape, data_shape) 65 | assert len(data_shape) == 3 and data_shape[0] == 3 66 | #model_prefix += '_' + str(data_shape[1]) 67 | 68 | # iterator 69 | eval_iter = DetRecordIter(path_imgrec, batch_size, data_shape, 70 | path_imglist=path_imglist, **cfg.valid) 71 | # model params 72 | load_net, args, auxs = mx.model.load_checkpoint(model_prefix, epoch) 73 | # network 74 | if net is None: 75 | net = load_net 76 | else: 77 | net = get_symbol(net, data_shape[1], num_classes=num_classes, 78 | nms_thresh=nms_thresh, force_suppress=force_nms) 79 | if not 'label' in net.list_arguments(): 80 | label = mx.sym.Variable(name='label') 81 | net = mx.sym.Group([net, label]) 82 | 83 | # init module 84 | mod = mx.mod.Module(net, label_names=('label',), logger=logger, context=ctx, 85 | fixed_param_names=net.list_arguments()) 86 | mod.bind(data_shapes=eval_iter.provide_data, label_shapes=eval_iter.provide_label) 87 | mod.set_params(args, auxs, allow_missing=False, force_init=True) 88 | 89 | # run evaluation 90 | if voc07_metric: 91 | metric = VOC07MApMetric(ovp_thresh, use_difficult, class_names, 92 | roc_output_path=os.path.join(os.path.dirname(model_prefix), 'roc')) 93 | else: 94 | metric = MApMetric(ovp_thresh, use_difficult, class_names, 95 | roc_output_path=os.path.join(os.path.dirname(model_prefix), 'roc')) 96 | results = mod.score(eval_iter, metric, num_batch=None, 97 | batch_end_callback=mx.callback.Speedometer(batch_size, 98 | frequent=frequent, 99 | auto_reset=False)) 100 | for k, v in results: 101 | print("{}: {}".format(k, v)) 102 | -------------------------------------------------------------------------------- /symbol/vgg16_reduced.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | def get_symbol(num_classes=1000, **kwargs): 4 | """ 5 | VGG 16 layers network 6 | This is a modified version, with fc6/fc7 layers replaced by conv layers 7 | And the network is slightly smaller than original VGG 16 network 8 | """ 9 | data = mx.symbol.Variable(name="data") 10 | label = mx.symbol.Variable(name="label") 11 | 12 | # group 1 13 | conv1_1 = mx.symbol.Convolution( 14 | data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") 15 | relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1") 16 | conv1_2 = mx.symbol.Convolution( 17 | data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2") 18 | relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2") 19 | pool1 = mx.symbol.Pooling( 20 | data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1") 21 | # group 2 22 | conv2_1 = mx.symbol.Convolution( 23 | data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") 24 | relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1") 25 | conv2_2 = mx.symbol.Convolution( 26 | data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2") 27 | relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2") 28 | pool2 = mx.symbol.Pooling( 29 | data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2") 30 | # group 3 31 | conv3_1 = mx.symbol.Convolution( 32 | data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") 33 | relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1") 34 | conv3_2 = mx.symbol.Convolution( 35 | data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") 36 | relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2") 37 | conv3_3 = mx.symbol.Convolution( 38 | data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3") 39 | relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3") 40 | pool3 = mx.symbol.Pooling( 41 | data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \ 42 | pooling_convention="full", name="pool3") 43 | # group 4 44 | conv4_1 = mx.symbol.Convolution( 45 | data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") 46 | relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1") 47 | conv4_2 = mx.symbol.Convolution( 48 | data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") 49 | relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2") 50 | conv4_3 = mx.symbol.Convolution( 51 | data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3") 52 | relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3") 53 | pool4 = mx.symbol.Pooling( 54 | data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4") 55 | # group 5 56 | conv5_1 = mx.symbol.Convolution( 57 | data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") 58 | relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1") 59 | conv5_2 = mx.symbol.Convolution( 60 | data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") 61 | relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2") 62 | conv5_3 = mx.symbol.Convolution( 63 | data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3") 64 | relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3") 65 | pool5 = mx.symbol.Pooling( 66 | data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1), 67 | pad=(1,1), name="pool5") 68 | # group 6 69 | conv6 = mx.symbol.Convolution( 70 | data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6), 71 | num_filter=1024, name="fc6") 72 | relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6") 73 | # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6") 74 | # group 7 75 | conv7 = mx.symbol.Convolution( 76 | data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="fc7") 77 | relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7") 78 | # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7") 79 | 80 | gpool = mx.symbol.Pooling(data=relu7, pool_type='avg', kernel=(7, 7), 81 | global_pool=True, name='global_pool') 82 | conv8 = mx.symbol.Convolution(data=gpool, num_filter=num_classes, kernel=(1, 1), 83 | name='fc8') 84 | flat = mx.symbol.Flatten(data=conv8) 85 | softmax = mx.symbol.SoftmaxOutput(data=flat, name='softmax') 86 | return softmax 87 | -------------------------------------------------------------------------------- /symbol/mobilenet.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | import mxnet as mx 19 | 20 | def Conv(data, num_filter=1, kernel=(1, 1), stride=(1, 1), pad=(0, 0), num_group=1, name=None, suffix=''): 21 | conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, num_group=num_group, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix)) 22 | bn = mx.sym.BatchNorm(data=conv, name='%s%s_batchnorm' %(name, suffix), fix_gamma=True) 23 | act = mx.sym.Activation(data=bn, act_type='relu', name='%s%s_relu' %(name, suffix)) 24 | return act 25 | 26 | def get_symbol(num_classes, **kwargs): 27 | data = mx.symbol.Variable(name="data") # 224 28 | conv_1 = Conv(data, num_filter=32, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_1") # 224/112 29 | conv_2_dw = Conv(conv_1, num_group=32, num_filter=32, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_2_dw") # 112/112 30 | conv_2 = Conv(conv_2_dw, num_filter=64, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_2") # 112/112 31 | conv_3_dw = Conv(conv_2, num_group=64, num_filter=64, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_3_dw") # 112/56 32 | conv_3 = Conv(conv_3_dw, num_filter=128, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_3") # 56/56 33 | conv_4_dw = Conv(conv_3, num_group=128, num_filter=128, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_4_dw") # 56/56 34 | conv_4 = Conv(conv_4_dw, num_filter=128, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_4") # 56/56 35 | conv_5_dw = Conv(conv_4, num_group=128, num_filter=128, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_5_dw") # 56/28 36 | conv_5 = Conv(conv_5_dw, num_filter=256, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_5") # 28/28 37 | conv_6_dw = Conv(conv_5, num_group=256, num_filter=256, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_6_dw") # 28/28 38 | conv_6 = Conv(conv_6_dw, num_filter=256, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_6") # 28/28 39 | conv_7_dw = Conv(conv_6, num_group=256, num_filter=256, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_7_dw") # 28/14 40 | conv_7 = Conv(conv_7_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_7") # 14/14 41 | 42 | conv_8_dw = Conv(conv_7, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_8_dw") # 14/14 43 | conv_8 = Conv(conv_8_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_8") # 14/14 44 | conv_9_dw = Conv(conv_8, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_9_dw") # 14/14 45 | conv_9 = Conv(conv_9_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_9") # 14/14 46 | conv_10_dw = Conv(conv_9, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_10_dw") # 14/14 47 | conv_10 = Conv(conv_10_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_10") # 14/14 48 | conv_11_dw = Conv(conv_10, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_11_dw") # 14/14 49 | conv_11 = Conv(conv_11_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_11") # 14/14 50 | conv_12_dw = Conv(conv_11, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_12_dw") # 14/14 51 | conv_12 = Conv(conv_12_dw, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_12") # 14/14 52 | 53 | conv_13_dw = Conv(conv_12, num_group=512, num_filter=512, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_13_dw") # 14/7 54 | conv_13 = Conv(conv_13_dw, num_filter=1024, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_13") # 7/7 55 | conv_14_dw = Conv(conv_13, num_group=1024, num_filter=1024, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_14_dw") # 7/7 56 | conv_14 = Conv(conv_14_dw, num_filter=1024, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_14") # 7/7 57 | 58 | pool = mx.sym.Pooling(data=conv_14, kernel=(7, 7), stride=(1, 1), pool_type="avg", name="global_pool", global_pool=True) 59 | flatten = mx.sym.Flatten(data=pool, name="flatten") 60 | fc = mx.symbol.FullyConnected(data=flatten, num_hidden=num_classes, name='fc') 61 | softmax = mx.symbol.SoftmaxOutput(data=fc, name='softmax') 62 | return softmax 63 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tools.find_mxnet 3 | import mxnet as mx 4 | import os 5 | import sys 6 | from evaluate.evaluate_net import evaluate_net 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Evaluate a network') 11 | parser.add_argument('--rec-path', dest='rec_path', help='which record file to use', 12 | default=os.path.join(os.getcwd(), 'data', 'val.rec'), type=str) 13 | parser.add_argument('--list-path', dest='list_path', help='which list file to use', 14 | default="", type=str) 15 | parser.add_argument('--network', dest='network', type=str, default='vgg16_reduced', 16 | help='which network to use') 17 | parser.add_argument('--batch-size', dest='batch_size', type=int, default=32, 18 | help='evaluation batch size') 19 | parser.add_argument('--num-class', dest='num_class', type=int, default=20, 20 | help='number of classes') 21 | parser.add_argument('--class-names', dest='class_names', type=str, 22 | default='aeroplane, bicycle, bird, boat, bottle, bus, \ 23 | car, cat, chair, cow, diningtable, dog, horse, motorbike, \ 24 | person, pottedplant, sheep, sofa, train, tvmonitor', 25 | help='string of comma separated names, or text filename') 26 | parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model', 27 | default=0, type=int) 28 | parser.add_argument('--prefix', dest='prefix', help='load model prefix', 29 | default=os.path.join(os.getcwd(), 'model', 'ssd_'), type=str) 30 | parser.add_argument('--gpus', dest='gpu_id', help='GPU devices to evaluate with', 31 | default='0', type=str) 32 | parser.add_argument('--cpu', dest='cpu', help='use cpu to evaluate, this can be slow', 33 | action='store_true') 34 | parser.add_argument('--data-shape', dest='data_shape', type=int, default=300, 35 | help='set image shape') 36 | parser.add_argument('--mean-r', dest='mean_r', type=float, default=123, 37 | help='red mean value') 38 | parser.add_argument('--mean-g', dest='mean_g', type=float, default=117, 39 | help='green mean value') 40 | parser.add_argument('--mean-b', dest='mean_b', type=float, default=104, 41 | help='blue mean value') 42 | parser.add_argument('--nms', dest='nms_thresh', type=float, default=0.45, 43 | help='non-maximum suppression threshold') 44 | parser.add_argument('--overlap', dest='overlap_thresh', type=float, default=0.5, 45 | help='evaluation overlap threshold') 46 | parser.add_argument('--force', dest='force_nms', type=bool, default=False, 47 | help='force non-maximum suppression on different class') 48 | parser.add_argument('--use-difficult', dest='use_difficult', type=bool, default=False, 49 | help='use difficult ground-truths in evaluation') 50 | parser.add_argument('--voc07', dest='use_voc07_metric', type=bool, default=True, 51 | help='use PASCAL VOC 07 metric') 52 | parser.add_argument('--deploy', dest='deploy_net', help='Load network from model', 53 | action='store_true', default=False) 54 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging', 55 | default=20, type=int) 56 | args = parser.parse_args() 57 | return args 58 | 59 | if __name__ == '__main__': 60 | args = parse_args() 61 | # choose ctx 62 | if args.cpu: 63 | ctx = mx.cpu() 64 | else: 65 | ctx = [mx.gpu(int(i)) for i in args.gpu_id.split(',')] 66 | # parse # classes and class_names if applicable 67 | num_class = args.num_class 68 | if len(args.class_names) > 0: 69 | if os.path.isfile(args.class_names): 70 | # try to open it to read class names 71 | with open(args.class_names, 'r') as f: 72 | class_names = [l.strip() for l in f.readlines()] 73 | else: 74 | class_names = [c.strip() for c in args.class_names.split(',')] 75 | assert len(class_names) == num_class 76 | for name in class_names: 77 | assert len(name) > 0 78 | else: 79 | class_names = None 80 | 81 | network = None if args.deploy_net else args.network 82 | if args.prefix.endswith('_'): 83 | prefix = args.prefix + args.network 84 | else: 85 | prefix = args.prefix 86 | evaluate_net(network, args.rec_path, num_class, 87 | (args.mean_r, args.mean_g, args.mean_b), args.data_shape, 88 | prefix, args.epoch, ctx, batch_size=args.batch_size, 89 | path_imglist=args.list_path, nms_thresh=args.nms_thresh, 90 | force_nms=args.force_nms, ovp_thresh=args.overlap_thresh, 91 | use_difficult=args.use_difficult, class_names=class_names, 92 | voc07_metric=args.use_voc07_metric, frequent=args.frequent) 93 | -------------------------------------------------------------------------------- /tools/prepare_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys, os 3 | import argparse 4 | import subprocess 5 | import mxnet 6 | curr_path = os.path.abspath(os.path.dirname(__file__)) 7 | sys.path.append(os.path.join(curr_path, '..')) 8 | from dataset.pascal_voc import PascalVoc 9 | from dataset.mscoco import Coco 10 | from dataset.concat_db import ConcatDB 11 | 12 | def load_pascal(image_set, year, devkit_path, shuffle=False, class_names=None, true_negative=None): 13 | """ 14 | wrapper function for loading pascal voc dataset 15 | 16 | Parameters: 17 | ---------- 18 | image_set : str 19 | train, trainval... 20 | year : str 21 | 2007, 2012 or combinations splitted by comma 22 | devkit_path : str 23 | root directory of dataset 24 | shuffle : bool 25 | whether to shuffle initial list 26 | 27 | Returns: 28 | ---------- 29 | Imdb 30 | """ 31 | image_set = [y.strip() for y in image_set.split(',')] 32 | assert image_set, "No image_set specified" 33 | year = [y.strip() for y in year.split(',')] 34 | assert year, "No year specified" 35 | 36 | # make sure (# sets == # years) 37 | if len(image_set) > 1 and len(year) == 1: 38 | year = year * len(image_set) 39 | if len(image_set) == 1 and len(year) > 1: 40 | image_set = image_set * len(year) 41 | assert len(image_set) == len(year), "Number of sets and year mismatch" 42 | 43 | imdbs = [] 44 | for s, y in zip(image_set, year): 45 | imdbs.append(PascalVoc(s, y, devkit_path, shuffle, is_train=True, class_names=class_names, true_negative_images=true_negative)) 46 | if len(imdbs) > 1: 47 | return ConcatDB(imdbs, shuffle) 48 | else: 49 | return imdbs[0] 50 | 51 | def load_coco(image_set, dirname, shuffle=False): 52 | """ 53 | wrapper function for loading ms coco dataset 54 | 55 | Parameters: 56 | ---------- 57 | image_set : str 58 | train2014, val2014, valminusminival2014, minival2014 59 | dirname: str 60 | root dir for coco 61 | shuffle: boolean 62 | initial shuffle 63 | """ 64 | anno_files = ['instances_' + y.strip() + '.json' for y in image_set.split(',')] 65 | assert anno_files, "No image set specified" 66 | imdbs = [] 67 | for af in anno_files: 68 | af_path = os.path.join(dirname, 'annotations', af) 69 | imdbs.append(Coco(af_path, dirname, shuffle=shuffle)) 70 | if len(imdbs) > 1: 71 | return ConcatDB(imdbs, shuffle) 72 | else: 73 | return imdbs[0] 74 | 75 | def parse_args(): 76 | parser = argparse.ArgumentParser(description='Prepare lists for dataset') 77 | parser.add_argument('--dataset', dest='dataset', help='dataset to use', 78 | default='pascal', type=str) 79 | parser.add_argument('--year', dest='year', help='which year to use', 80 | default='2007,2012', type=str) 81 | parser.add_argument('--set', dest='set', help='train, val, trainval, test', 82 | default='trainval', type=str) 83 | parser.add_argument('--target', dest='target', help='output list file', 84 | default=None, 85 | type=str) 86 | parser.add_argument('--class-names', dest='class_names', type=str, 87 | default=None, help='string of comma separated names, or text filename') 88 | parser.add_argument('--root', dest='root_path', help='dataset root path', 89 | default=os.path.join(curr_path, '..', 'data', 'VOCdevkit'), 90 | type=str) 91 | parser.add_argument('--shuffle', dest='shuffle', help='shuffle list', 92 | type=bool, default=True) 93 | parser.add_argument('--true-negative', dest='true_negative', help='use images with no GT as true_negative', 94 | type=bool, default=False) 95 | args = parser.parse_args() 96 | return args 97 | 98 | if __name__ == '__main__': 99 | args = parse_args() 100 | if args.class_names is not None: 101 | assert args.target is not None, 'for a subset of classes, specify a target path. Its for your own safety' 102 | if args.dataset == 'pascal': 103 | db = load_pascal(args.set, args.year, args.root_path, args.shuffle, args.class_names, args.true_negative) 104 | print("saving list to disk...") 105 | db.save_imglist(args.target, root=args.root_path) 106 | elif args.dataset == 'coco': 107 | db = load_coco(args.set, args.root_path, args.shuffle) 108 | print("saving list to disk...") 109 | db.save_imglist(args.target, root=args.root_path) 110 | else: 111 | raise NotImplementedError("No implementation for dataset: " + args.dataset) 112 | 113 | print("List file {} generated...".format(args.target)) 114 | 115 | im2rec_path = os.path.join(mxnet.__path__[0], 'tools/im2rec.py') 116 | # final validation - sometimes __path__ (or __file__) gives 'mxnet/python/mxnet' instead of 'mxnet' 117 | if not os.path.exists(im2rec_path): 118 | im2rec_path = os.path.join(os.path.dirname(os.path.dirname(mxnet.__path__[0])), 'tools/im2rec.py') 119 | subprocess.check_call(["python", im2rec_path, 120 | os.path.abspath(args.target), os.path.abspath(args.root_path), 121 | "--shuffle", str(int(args.shuffle)), "--pack-label", "1"]) 122 | 123 | print("Record file {} generated...".format(args.target.split('.')[0] + '.rec')) 124 | -------------------------------------------------------------------------------- /dataset/yolo_format.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | import numpy as np 4 | from .imdb import Imdb 5 | 6 | 7 | class YoloFormat(Imdb): 8 | """ 9 | Base class for loading datasets as used in YOLO 10 | 11 | Parameters: 12 | ---------- 13 | name : str 14 | name for this dataset 15 | classes : list or tuple of str 16 | class names in this dataset 17 | list_file : str 18 | filename of the image list file 19 | image_dir : str 20 | image directory 21 | label_dir : str 22 | label directory 23 | extension : str 24 | by default .jpg 25 | label_extension : str 26 | by default .txt 27 | shuffle : bool 28 | whether to shuffle the initial order when loading this dataset, 29 | default is True 30 | """ 31 | def __init__(self, name, classes, list_file, image_dir, label_dir, \ 32 | extension='.jpg', label_extension='.txt', shuffle=True): 33 | if isinstance(classes, list) or isinstance(classes, tuple): 34 | num_classes = len(classes) 35 | elif isinstance(classes, str): 36 | with open(classes, 'r') as f: 37 | classes = [l.strip() for l in f.readlines()] 38 | num_classes = len(classes) 39 | else: 40 | raise ValueError("classes should be list/tuple or text file") 41 | assert num_classes > 0, "number of classes must > 0" 42 | super(YoloFormat, self).__init__(name + '_' + str(num_classes)) 43 | self.classes = classes 44 | self.num_classes = num_classes 45 | self.list_file = list_file 46 | self.image_dir = image_dir 47 | self.label_dir = label_dir 48 | self.extension = extension 49 | self.label_extension = label_extension 50 | 51 | self.image_set_index = self._load_image_set_index(shuffle) 52 | self.num_images = len(self.image_set_index) 53 | self.labels = self._load_image_labels() 54 | 55 | 56 | def _load_image_set_index(self, shuffle): 57 | """ 58 | find out which indexes correspond to given image set (train or val) 59 | 60 | Parameters: 61 | ---------- 62 | shuffle : boolean 63 | whether to shuffle the image list 64 | Returns: 65 | ---------- 66 | entire list of images specified in the setting 67 | """ 68 | assert os.path.exists(self.list_file), 'Path does not exists: {}'.format(self.list_file) 69 | with open(self.list_file, 'r') as f: 70 | image_set_index = [x.strip() for x in f.readlines()] 71 | if shuffle: 72 | np.random.shuffle(image_set_index) 73 | return image_set_index 74 | 75 | def image_path_from_index(self, index): 76 | """ 77 | given image index, find out full path 78 | 79 | Parameters: 80 | ---------- 81 | index: int 82 | index of a specific image 83 | Returns: 84 | ---------- 85 | full path of this image 86 | """ 87 | assert self.image_set_index is not None, "Dataset not initialized" 88 | name = self.image_set_index[index] 89 | image_file = os.path.join(self.image_dir, name) + self.extension 90 | assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file) 91 | return image_file 92 | 93 | def label_from_index(self, index): 94 | """ 95 | given image index, return preprocessed ground-truth 96 | 97 | Parameters: 98 | ---------- 99 | index: int 100 | index of a specific image 101 | Returns: 102 | ---------- 103 | ground-truths of this image 104 | """ 105 | assert self.labels is not None, "Labels not processed" 106 | return self.labels[index] 107 | 108 | def _label_path_from_index(self, index): 109 | """ 110 | given image index, find out annotation path 111 | 112 | Parameters: 113 | ---------- 114 | index: int 115 | index of a specific image 116 | 117 | Returns: 118 | ---------- 119 | full path of annotation file 120 | """ 121 | label_file = os.path.join(self.label_dir, index + self.label_extension) 122 | assert os.path.exists(label_file), 'Path does not exist: {}'.format(label_file) 123 | return label_file 124 | 125 | def _load_image_labels(self): 126 | """ 127 | preprocess all ground-truths 128 | 129 | Returns: 130 | ---------- 131 | labels packed in [num_images x max_num_objects x 5] tensor 132 | """ 133 | temp = [] 134 | 135 | # load ground-truths 136 | for idx in self.image_set_index: 137 | label_file = self._label_path_from_index(idx) 138 | with open(label_file, 'r') as f: 139 | label = [] 140 | for line in f.readlines(): 141 | temp_label = line.strip().split() 142 | assert len(temp_label) == 5, "Invalid label file" + label_file 143 | cls_id = int(temp_label[0]) 144 | x = float(temp_label[1]) 145 | y = float(temp_label[2]) 146 | half_width = float(temp_label[3]) / 2 147 | half_height = float(temp_label[4]) / 2 148 | xmin = x - half_width 149 | ymin = y - half_height 150 | xmax = x + half_width 151 | ymax = y + half_height 152 | label.append([cls_id, xmin, ymin, xmax, ymax]) 153 | temp.append(np.array(label)) 154 | return temp 155 | -------------------------------------------------------------------------------- /tools/caffe_converter/convert_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import sys 3 | import os.path as osp 4 | sys.path.insert(0, osp.join(osp.dirname(__file__), '..')) 5 | from tools import find_mxnet 6 | import mxnet as mx 7 | import numpy as np 8 | import argparse 9 | import re 10 | from .convert_symbol import proto2symbol 11 | 12 | caffe_flag = True 13 | try: 14 | import caffe 15 | except ImportError: 16 | from .caffe_parse import parse_from_protobuf as parse 17 | 18 | caffe_flag = False 19 | 20 | 21 | def get_caffe_iter(layer_names, layers): 22 | for layer_idx, layer in enumerate(layers): 23 | layer_name = re.sub('[-/]', '_', layer_names[layer_idx]) 24 | layer_type = layer.type 25 | layer_blobs = layer.blobs 26 | yield (layer_name, layer_type, layer_blobs) 27 | 28 | 29 | def get_iter(layers): 30 | for layer in layers: 31 | layer_name = re.sub('[-/]', '_', layer.name) 32 | layer_type = layer.type 33 | layer_blobs = layer.blobs 34 | yield (layer_name, layer_type, layer_blobs) 35 | 36 | 37 | def main(): 38 | parser = argparse.ArgumentParser(description='Caffe prototxt to mxnet model parameter converter.\ 39 | Note that only basic functions are implemented. You are welcomed to contribute to this file.') 40 | parser.add_argument('caffe_prototxt', help='The prototxt file in Caffe format') 41 | parser.add_argument('caffe_model', help='The binary model parameter file in Caffe format') 42 | parser.add_argument('save_model_name', help='The name of the output model prefix') 43 | args = parser.parse_args() 44 | 45 | prob, input_dim = proto2symbol(args.caffe_prototxt) 46 | 47 | layers = '' 48 | layer_names = '' 49 | 50 | if caffe_flag: 51 | caffe.set_mode_cpu() 52 | net_caffe = caffe.Net(args.caffe_prototxt, args.caffe_model, caffe.TEST) 53 | layer_names = net_caffe._layer_names 54 | layers = net_caffe.layers 55 | else: 56 | layers = parse.parse_caffemodel(args.caffe_model) 57 | 58 | arg_shapes, output_shapes, aux_shapes = prob.infer_shape(data=tuple(input_dim)) 59 | arg_names = prob.list_arguments() 60 | arg_shape_dic = dict(zip(arg_names, arg_shapes)) 61 | arg_params = {} 62 | 63 | iter = '' 64 | if caffe_flag: 65 | iter = get_caffe_iter(layer_names, layers) 66 | else: 67 | iter = get_iter(layers) 68 | first_conv = True 69 | 70 | for layer_name, layer_type, layer_blobs in iter: 71 | if layer_type == 'Convolution' or layer_type == 'InnerProduct' or layer_type == 4 or layer_type == 14 \ 72 | or layer_type == 'PReLU' or layer_type == 'Normalize': 73 | if layer_type == 'PReLU': 74 | assert (len(layer_blobs) == 1) 75 | wmat = layer_blobs[0].data 76 | weight_name = layer_name + '_gamma' 77 | arg_params[weight_name] = mx.nd.zeros(wmat.shape) 78 | arg_params[weight_name][:] = wmat 79 | continue 80 | if layer_type == 'Normalize': 81 | assert (len(layer_blobs) == 1) 82 | weight_name = layer_name + '_scale' 83 | wmat = layer_blobs[0].data 84 | arg_params[weight_name] = mx.nd.zeros((1, len(wmat), 1, 1)) 85 | arg_params[weight_name][:] = np.array(list(wmat)).reshape((1, len(wmat), 1, 1)) 86 | continue 87 | assert (len(layer_blobs) == 2) 88 | wmat_dim = [] 89 | if getattr(layer_blobs[0].shape, 'dim', None) is not None: 90 | if len(layer_blobs[0].shape.dim) > 0: 91 | wmat_dim = layer_blobs[0].shape.dim 92 | else: 93 | wmat_dim = [layer_blobs[0].num, layer_blobs[0].channels, layer_blobs[0].height, 94 | layer_blobs[0].width] 95 | else: 96 | wmat_dim = list(layer_blobs[0].shape) 97 | wmat = np.array(layer_blobs[0].data).reshape(wmat_dim) 98 | bias = np.array(layer_blobs[1].data) 99 | channels = wmat_dim[1] 100 | if channels == 3 or channels == 4: # RGB or RGBA 101 | if first_conv: 102 | print('Swapping BGR of caffe into RGB in mxnet') 103 | wmat[:, [0, 2], :, :] = wmat[:, [2, 0], :, :] 104 | 105 | assert (wmat.flags['C_CONTIGUOUS'] is True) 106 | assert (bias.flags['C_CONTIGUOUS'] is True) 107 | print('converting layer {0}, wmat shape = {1}, bias shape = {2}'.format(layer_name, wmat.shape, bias.shape)) 108 | wmat = wmat.reshape((wmat.shape[0], -1)) 109 | bias = bias.reshape((bias.shape[0], 1)) 110 | weight_name = layer_name + "_weight" 111 | bias_name = layer_name + "_bias" 112 | 113 | if weight_name not in arg_shape_dic: 114 | print(weight_name + ' not found in arg_shape_dic.') 115 | continue 116 | wmat = wmat.reshape(arg_shape_dic[weight_name]) 117 | arg_params[weight_name] = mx.nd.zeros(wmat.shape) 118 | arg_params[weight_name][:] = wmat 119 | 120 | bias = bias.reshape(arg_shape_dic[bias_name]) 121 | arg_params[bias_name] = mx.nd.zeros(bias.shape) 122 | arg_params[bias_name][:] = bias 123 | 124 | if first_conv and (layer_type == 'Convolution' or layer_type == 4): 125 | first_conv = False 126 | 127 | model = mx.mod.Module(symbol=prob, label_names=None) 128 | model.bind(data_shapes=[('data', tuple(input_dim))]) 129 | model.init_params(arg_params=arg_params, aux_params={}) 130 | 131 | model.save_checkpoint(args.save_model_name, 1) 132 | 133 | 134 | if __name__ == '__main__': 135 | main() 136 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tools.find_mxnet 3 | import mxnet as mx 4 | import os 5 | import sys 6 | from detect.detector import Detector 7 | from symbol.symbol_factory import get_symbol 8 | 9 | def get_detector(net, prefix, epoch, data_shape, mean_pixels, ctx, num_class, 10 | nms_thresh=0.5, force_nms=True, nms_topk=400): 11 | """ 12 | wrapper for initialize a detector 13 | 14 | Parameters: 15 | ---------- 16 | net : str 17 | test network name 18 | prefix : str 19 | load model prefix 20 | epoch : int 21 | load model epoch 22 | data_shape : int 23 | resize image shape 24 | mean_pixels : tuple (float, float, float) 25 | mean pixel values (R, G, B) 26 | ctx : mx.ctx 27 | running context, mx.cpu() or mx.gpu(?) 28 | num_class : int 29 | number of classes 30 | nms_thresh : float 31 | non-maximum suppression threshold 32 | force_nms : bool 33 | force suppress different categories 34 | """ 35 | if net is not None: 36 | net = get_symbol(net, data_shape, num_classes=num_class, nms_thresh=nms_thresh, 37 | force_nms=force_nms, nms_topk=nms_topk) 38 | detector = Detector(net, prefix, epoch, data_shape, mean_pixels, ctx=ctx) 39 | return detector 40 | 41 | def parse_args(): 42 | parser = argparse.ArgumentParser(description='Single-shot detection network demo') 43 | parser.add_argument('--network', dest='network', type=str, default='resnet50', 44 | help='which network to use') 45 | parser.add_argument('--images', dest='images', type=str, default='./data/demo/dog.jpg', 46 | help='run demo with images, use comma to seperate multiple images') 47 | parser.add_argument('--dir', dest='dir', nargs='?', 48 | help='demo image directory, optional', type=str) 49 | parser.add_argument('--ext', dest='extension', help='image extension, optional', 50 | type=str, nargs='?') 51 | parser.add_argument('--epoch', dest='epoch', help='epoch of trained model', 52 | default=0, type=int) 53 | parser.add_argument('--prefix', dest='prefix', help='trained model prefix', 54 | default=os.path.join(os.getcwd(), 'model', 'ssd_'), 55 | type=str) 56 | parser.add_argument('--cpu', dest='cpu', help='(override GPU) use CPU to detect', 57 | action='store_true', default=False) 58 | parser.add_argument('--gpu', dest='gpu_id', type=int, default=0, 59 | help='GPU device id to detect with') 60 | parser.add_argument('--data-shape', dest='data_shape', type=int, default=512, 61 | help='set image shape') 62 | parser.add_argument('--mean-r', dest='mean_r', type=float, default=123, 63 | help='red mean value') 64 | parser.add_argument('--mean-g', dest='mean_g', type=float, default=117, 65 | help='green mean value') 66 | parser.add_argument('--mean-b', dest='mean_b', type=float, default=104, 67 | help='blue mean value') 68 | parser.add_argument('--thresh', dest='thresh', type=float, default=0.5, 69 | help='object visualize score threshold, default 0.6') 70 | parser.add_argument('--nms', dest='nms_thresh', type=float, default=0.5, 71 | help='non-maximum suppression threshold, default 0.5') 72 | parser.add_argument('--force', dest='force_nms', type=bool, default=True, 73 | help='force non-maximum suppression on different class') 74 | parser.add_argument('--timer', dest='show_timer', type=bool, default=True, 75 | help='show detection time') 76 | parser.add_argument('--deploy', dest='deploy_net', action='store_true', default=False, 77 | help='Load network from json file, rather than from symbol') 78 | parser.add_argument('--class-names', dest='class_names', type=str, 79 | default='aeroplane, bicycle, bird, boat, bottle, bus, \ 80 | car, cat, chair, cow, diningtable, dog, horse, motorbike, \ 81 | person, pottedplant, sheep, sofa, train, tvmonitor', 82 | help='string of comma separated names, or text filename') 83 | args = parser.parse_args() 84 | return args 85 | 86 | def parse_class_names(class_names): 87 | """ parse # classes and class_names if applicable """ 88 | if len(class_names) > 0: 89 | if os.path.isfile(class_names): 90 | # try to open it to read class names 91 | with open(class_names, 'r') as f: 92 | class_names = [l.strip() for l in f.readlines()] 93 | else: 94 | class_names = [c.strip() for c in class_names.split(',')] 95 | for name in class_names: 96 | assert len(name) > 0 97 | else: 98 | raise RuntimeError("No valid class_name provided...") 99 | return class_names 100 | 101 | if __name__ == '__main__': 102 | args = parse_args() 103 | if args.cpu: 104 | ctx = mx.cpu() 105 | else: 106 | ctx = mx.gpu(args.gpu_id) 107 | 108 | # parse image list 109 | image_list = [i.strip() for i in args.images.split(',')] 110 | assert len(image_list) > 0, "No valid image specified to detect" 111 | 112 | network = None if args.deploy_net else args.network 113 | class_names = parse_class_names(args.class_names) 114 | if args.prefix.endswith('_'): 115 | prefix = args.prefix + args.network + '_' + str(args.data_shape) 116 | else: 117 | prefix = args.prefix 118 | detector = get_detector(network, prefix, args.epoch, 119 | args.data_shape, 120 | (args.mean_r, args.mean_g, args.mean_b), 121 | ctx, len(class_names), args.nms_thresh, args.force_nms) 122 | # run detection 123 | detector.detect_and_visualize(image_list, args.dir, args.extension, 124 | class_names, args.thresh, args.show_timer) 125 | -------------------------------------------------------------------------------- /evaluate/eval_voc.py: -------------------------------------------------------------------------------- 1 | """ 2 | given a pascal voc imdb, compute mAP 3 | """ 4 | from __future__ import print_function 5 | import numpy as np 6 | import os 7 | try: 8 | import cPickle as pickle 9 | except ImportError: 10 | import pickle 11 | 12 | 13 | def parse_voc_rec(filename): 14 | """ 15 | parse pascal voc record into a dictionary 16 | :param filename: xml file path 17 | :return: list of dict 18 | """ 19 | import xml.etree.ElementTree as ET 20 | tree = ET.parse(filename) 21 | objects = [] 22 | for obj in tree.findall('object'): 23 | obj_dict = dict() 24 | obj_dict['name'] = obj.find('name').text 25 | obj_dict['difficult'] = int(obj.find('difficult').text) 26 | bbox = obj.find('bndbox') 27 | obj_dict['bbox'] = [int(bbox.find('xmin').text), 28 | int(bbox.find('ymin').text), 29 | int(bbox.find('xmax').text), 30 | int(bbox.find('ymax').text)] 31 | objects.append(obj_dict) 32 | return objects 33 | 34 | 35 | def voc_ap(rec, prec, use_07_metric=False): 36 | """ 37 | average precision calculations 38 | [precision integrated to recall] 39 | :param rec: recall 40 | :param prec: precision 41 | :param use_07_metric: 2007 metric is 11-recall-point based AP 42 | :return: average precision 43 | """ 44 | if use_07_metric: 45 | ap = 0. 46 | for t in np.arange(0., 1.1, 0.1): 47 | if np.sum(rec >= t) == 0: 48 | p = 0 49 | else: 50 | p = np.max(prec[rec >= t]) 51 | ap += p / 11. 52 | else: 53 | # append sentinel values at both ends 54 | mrec = np.concatenate(([0.], rec, [1.])) 55 | mpre = np.concatenate(([0.], prec, [0.])) 56 | 57 | # compute precision integration ladder 58 | for i in range(mpre.size - 1, 0, -1): 59 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 60 | 61 | # look for recall value changes 62 | i = np.where(mrec[1:] != mrec[:-1])[0] 63 | 64 | # sum (\delta recall) * prec 65 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 66 | return ap 67 | 68 | 69 | def voc_eval(detpath, annopath, imageset_file, classname, cache_dir, ovthresh=0.5, use_07_metric=False): 70 | """ 71 | pascal voc evaluation 72 | :param detpath: detection results detpath.format(classname) 73 | :param annopath: annotations annopath.format(classname) 74 | :param imageset_file: text file containing list of images 75 | :param classname: category name 76 | :param cache_dir: caching annotations 77 | :param ovthresh: overlap threshold 78 | :param use_07_metric: whether to use voc07's 11 point ap computation 79 | :return: rec, prec, ap 80 | """ 81 | if not os.path.isdir(cache_dir): 82 | os.mkdir(cache_dir) 83 | cache_file = os.path.join(cache_dir, 'annotations.pkl') 84 | with open(imageset_file, 'r') as f: 85 | lines = f.readlines() 86 | image_filenames = [x.strip() for x in lines] 87 | 88 | # load annotations from cache 89 | if not os.path.isfile(cache_file): 90 | recs = {} 91 | for ind, image_filename in enumerate(image_filenames): 92 | recs[image_filename] = parse_voc_rec(annopath.format(image_filename)) 93 | if ind % 100 == 0: 94 | print('reading annotations for {:d}/{:d}'.format(ind + 1, len(image_filenames))) 95 | print('saving annotations cache to {:s}'.format(cache_file)) 96 | with open(cache_file, 'wb') as f: 97 | pickle.dump(recs, f) 98 | else: 99 | with open(cache_file, 'rb') as f: 100 | recs = pickle.load(f) 101 | 102 | # extract objects in :param classname: 103 | class_recs = {} 104 | npos = 0 105 | for image_filename in image_filenames: 106 | objects = [obj for obj in recs[image_filename] if obj['name'] == classname] 107 | bbox = np.array([x['bbox'] for x in objects]) 108 | difficult = np.array([x['difficult'] for x in objects]).astype(np.bool) 109 | det = [False] * len(objects) # stand for detected 110 | npos = npos + sum(~difficult) 111 | class_recs[image_filename] = {'bbox': bbox, 112 | 'difficult': difficult, 113 | 'det': det} 114 | 115 | # read detections 116 | detfile = detpath.format(classname) 117 | with open(detfile, 'r') as f: 118 | lines = f.readlines() 119 | 120 | splitlines = [x.strip().split(' ') for x in lines] 121 | image_ids = [x[0] for x in splitlines] 122 | confidence = np.array([float(x[1]) for x in splitlines]) 123 | bbox = np.array([[float(z) for z in x[2:]] for x in splitlines]) 124 | 125 | # sort by confidence 126 | sorted_inds = np.argsort(-confidence) 127 | sorted_scores = np.sort(-confidence) 128 | bbox = bbox[sorted_inds, :] 129 | image_ids = [image_ids[x] for x in sorted_inds] 130 | 131 | # go down detections and mark true positives and false positives 132 | nd = len(image_ids) 133 | tp = np.zeros(nd) 134 | fp = np.zeros(nd) 135 | for d in range(nd): 136 | r = class_recs[image_ids[d]] 137 | bb = bbox[d, :].astype(float) 138 | ovmax = -np.inf 139 | bbgt = r['bbox'].astype(float) 140 | 141 | if bbgt.size > 0: 142 | # compute overlaps 143 | # intersection 144 | ixmin = np.maximum(bbgt[:, 0], bb[0]) 145 | iymin = np.maximum(bbgt[:, 1], bb[1]) 146 | ixmax = np.minimum(bbgt[:, 2], bb[2]) 147 | iymax = np.minimum(bbgt[:, 3], bb[3]) 148 | iw = np.maximum(ixmax - ixmin + 1., 0.) 149 | ih = np.maximum(iymax - iymin + 1., 0.) 150 | inters = iw * ih 151 | 152 | # union 153 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 154 | (bbgt[:, 2] - bbgt[:, 0] + 1.) * 155 | (bbgt[:, 3] - bbgt[:, 1] + 1.) - inters) 156 | 157 | overlaps = inters / uni 158 | ovmax = np.max(overlaps) 159 | jmax = np.argmax(overlaps) 160 | 161 | if ovmax > ovthresh: 162 | if not r['difficult'][jmax]: 163 | if not r['det'][jmax]: 164 | tp[d] = 1. 165 | r['det'][jmax] = 1 166 | else: 167 | fp[d] = 1. 168 | else: 169 | fp[d] = 1. 170 | 171 | # compute precision recall 172 | fp = np.cumsum(fp) 173 | tp = np.cumsum(tp) 174 | rec = tp / float(npos) 175 | # avoid division by zero in case first detection matches a difficult ground ruth 176 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 177 | ap = voc_ap(rec, prec, use_07_metric) 178 | 179 | return rec, prec, ap 180 | -------------------------------------------------------------------------------- /detect/detector.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import mxnet as mx 3 | import numpy as np 4 | from timeit import default_timer as timer 5 | from dataset.testdb import TestDB 6 | from dataset.iterator import DetIter 7 | 8 | class Detector(object): 9 | """ 10 | SSD detector which hold a detection network and wraps detection API 11 | 12 | Parameters: 13 | ---------- 14 | symbol : mx.Symbol 15 | detection network Symbol 16 | model_prefix : str 17 | name prefix of trained model 18 | epoch : int 19 | load epoch of trained model 20 | data_shape : int 21 | input data resize shape 22 | mean_pixels : tuple of float 23 | (mean_r, mean_g, mean_b) 24 | batch_size : int 25 | run detection with batch size 26 | ctx : mx.ctx 27 | device to use, if None, use mx.cpu() as default context 28 | """ 29 | def __init__(self, symbol, model_prefix, epoch, data_shape, mean_pixels, \ 30 | batch_size=1, ctx=None): 31 | self.ctx = ctx 32 | if self.ctx is None: 33 | self.ctx = mx.cpu() 34 | load_symbol, args, auxs = mx.model.load_checkpoint(model_prefix, epoch) 35 | if symbol is None: 36 | symbol = load_symbol 37 | self.mod = mx.mod.Module(symbol, label_names=None, context=ctx) 38 | self.data_shape = data_shape 39 | self.mod.bind(data_shapes=[('data', (batch_size, 3, data_shape, data_shape))]) 40 | self.mod.set_params(args, auxs) 41 | self.data_shape = data_shape 42 | self.mean_pixels = mean_pixels 43 | 44 | def detect(self, det_iter, show_timer=False): 45 | """ 46 | detect all images in iterator 47 | 48 | Parameters: 49 | ---------- 50 | det_iter : DetIter 51 | iterator for all testing images 52 | show_timer : Boolean 53 | whether to print out detection exec time 54 | 55 | Returns: 56 | ---------- 57 | list of detection results 58 | """ 59 | num_images = det_iter._size 60 | result = [] 61 | detections = [] 62 | if not isinstance(det_iter, mx.io.PrefetchingIter): 63 | det_iter = mx.io.PrefetchingIter(det_iter) 64 | start = timer() 65 | for pred, _, _ in self.mod.iter_predict(det_iter): 66 | detections.append(pred[0].asnumpy()) 67 | time_elapsed = timer() - start 68 | if show_timer: 69 | print("Detection time for {} images: {:.4f} sec".format( 70 | num_images, time_elapsed)) 71 | for output in detections: 72 | for i in range(output.shape[0]): 73 | det = output[i, :, :] 74 | res = det[np.where(det[:, 0] >= 0)[0]] 75 | result.append(res) 76 | return result 77 | 78 | def im_detect(self, im_list, root_dir=None, extension=None, show_timer=False): 79 | """ 80 | wrapper for detecting multiple images 81 | 82 | Parameters: 83 | ---------- 84 | im_list : list of str 85 | image path or list of image paths 86 | root_dir : str 87 | directory of input images, optional if image path already 88 | has full directory information 89 | extension : str 90 | image extension, eg. ".jpg", optional 91 | 92 | Returns: 93 | ---------- 94 | list of detection results in format [det0, det1...], det is in 95 | format np.array([id, score, xmin, ymin, xmax, ymax]...) 96 | """ 97 | test_db = TestDB(im_list, root_dir=root_dir, extension=extension) 98 | test_iter = DetIter(test_db, 1, self.data_shape, self.mean_pixels, 99 | is_train=False) 100 | return self.detect(test_iter, show_timer) 101 | 102 | def visualize_detection(self, img, dets, classes=[], thresh=0.6): 103 | """ 104 | visualize detections in one image 105 | 106 | Parameters: 107 | ---------- 108 | img : numpy.array 109 | image, in bgr format 110 | dets : numpy.array 111 | ssd detections, numpy.array([[id, score, x1, y1, x2, y2]...]) 112 | each row is one object 113 | classes : tuple or list of str 114 | class names 115 | thresh : float 116 | score threshold 117 | """ 118 | import matplotlib.pyplot as plt 119 | import random 120 | plt.imshow(img) 121 | height = img.shape[0] 122 | width = img.shape[1] 123 | colors = dict() 124 | for i in range(dets.shape[0]): 125 | cls_id = int(dets[i, 0]) 126 | if cls_id >= 0: 127 | score = dets[i, 1] 128 | if score > thresh: 129 | if cls_id not in colors: 130 | colors[cls_id] = (random.random(), random.random(), random.random()) 131 | xmin = int(dets[i, 2] * width) 132 | ymin = int(dets[i, 3] * height) 133 | xmax = int(dets[i, 4] * width) 134 | ymax = int(dets[i, 5] * height) 135 | rect = plt.Rectangle((xmin, ymin), xmax - xmin, 136 | ymax - ymin, fill=False, 137 | edgecolor=colors[cls_id], 138 | linewidth=3.5) 139 | plt.gca().add_patch(rect) 140 | class_name = str(cls_id) 141 | if classes and len(classes) > cls_id: 142 | class_name = classes[cls_id] 143 | plt.gca().text(xmin, ymin - 2, 144 | '{:s} {:.3f}'.format(class_name, score), 145 | bbox=dict(facecolor=colors[cls_id], alpha=0.5), 146 | fontsize=12, color='white') 147 | plt.show() 148 | 149 | def detect_and_visualize(self, im_list, root_dir=None, extension=None, 150 | classes=[], thresh=0.6, show_timer=False): 151 | """ 152 | wrapper for im_detect and visualize_detection 153 | 154 | Parameters: 155 | ---------- 156 | im_list : list of str or str 157 | image path or list of image paths 158 | root_dir : str or None 159 | directory of input images, optional if image path already 160 | has full directory information 161 | extension : str or None 162 | image extension, eg. ".jpg", optional 163 | 164 | Returns: 165 | ---------- 166 | 167 | """ 168 | import cv2 169 | dets = self.im_detect(im_list, root_dir, extension, show_timer=show_timer) 170 | if not isinstance(im_list, list): 171 | im_list = [im_list] 172 | assert len(dets) == len(im_list) 173 | for k, det in enumerate(dets): 174 | img = cv2.imread(im_list[k]) 175 | img[:, :, (0, 1, 2)] = img[:, :, (2, 1, 0)] 176 | self.visualize_detection(img, det, classes, thresh) 177 | -------------------------------------------------------------------------------- /symbol/symbol_builder.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | from common import multi_layer_feature, multibox_layer 3 | 4 | 5 | def import_module(module_name): 6 | """Helper function to import module""" 7 | import sys, os 8 | import importlib 9 | sys.path.append(os.path.dirname(__file__)) 10 | return importlib.import_module(module_name) 11 | 12 | def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pads, 13 | sizes, ratios, normalizations=-1, steps=[], min_filter=128, 14 | nms_thresh=0.5, force_suppress=False, nms_topk=400, minimum_negative_samples=0, **kwargs): 15 | """Build network symbol for training SSD 16 | 17 | Parameters 18 | ---------- 19 | network : str 20 | base network symbol name 21 | num_classes : int 22 | number of object classes not including background 23 | from_layers : list of str 24 | feature extraction layers, use '' for add extra layers 25 | For example: 26 | from_layers = ['relu4_3', 'fc7', '', '', '', ''] 27 | which means extract feature from relu4_3 and fc7, adding 4 extra layers 28 | on top of fc7 29 | num_filters : list of int 30 | number of filters for extra layers, you can use -1 for extracted features, 31 | however, if normalization and scale is applied, the number of filter for 32 | that layer must be provided. 33 | For example: 34 | num_filters = [512, -1, 512, 256, 256, 256] 35 | strides : list of int 36 | strides for the 3x3 convolution appended, -1 can be used for extracted 37 | feature layers 38 | pads : list of int 39 | paddings for the 3x3 convolution, -1 can be used for extracted layers 40 | sizes : list or list of list 41 | [min_size, max_size] for all layers or [[], [], []...] for specific layers 42 | ratios : list or list of list 43 | [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers 44 | normalizations : int or list of int 45 | use normalizations value for all layers or [...] for specific layers, 46 | -1 indicate no normalizations and scales 47 | steps : list 48 | specify steps for each MultiBoxPrior layer, leave empty, it will calculate 49 | according to layer dimensions 50 | min_filter : int 51 | minimum number of filters used in 1x1 convolution 52 | nms_thresh : float 53 | non-maximum suppression threshold 54 | force_suppress : boolean 55 | whether suppress different class objects 56 | nms_topk : int 57 | apply NMS to top K detections 58 | minimum_negative_samples : int 59 | always have some negative examples, no matter how many positive there are. 60 | this is useful when training on images with no ground-truth. 61 | Returns 62 | ------- 63 | mx.Symbol 64 | 65 | """ 66 | label = mx.sym.Variable('label') 67 | body = import_module(network).get_symbol(num_classes=num_classes, **kwargs) 68 | layers = multi_layer_feature(body, from_layers, num_filters, strides, pads, 69 | min_filter=min_filter) 70 | 71 | loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \ 72 | num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ 73 | num_channels=num_filters, clip=False, interm_layer=0, steps=steps) 74 | 75 | tmp = mx.contrib.symbol.MultiBoxTarget( 76 | *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ 77 | ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=minimum_negative_samples, \ 78 | negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), 79 | name="multibox_target") 80 | loc_target = tmp[0] 81 | loc_target_mask = tmp[1] 82 | cls_target = tmp[2] 83 | 84 | cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ 85 | ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ 86 | normalization='valid', name="cls_prob") 87 | loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ 88 | data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) 89 | loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ 90 | normalization='valid', name="loc_loss") 91 | 92 | # monitoring training status 93 | cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") 94 | det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ 95 | name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, 96 | variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) 97 | det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") 98 | 99 | # group output 100 | out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) 101 | return out 102 | 103 | def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios, 104 | strides, pads, normalizations=-1, steps=[], min_filter=128, 105 | nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs): 106 | """Build network for testing SSD 107 | 108 | Parameters 109 | ---------- 110 | network : str 111 | base network symbol name 112 | num_classes : int 113 | number of object classes not including background 114 | from_layers : list of str 115 | feature extraction layers, use '' for add extra layers 116 | For example: 117 | from_layers = ['relu4_3', 'fc7', '', '', '', ''] 118 | which means extract feature from relu4_3 and fc7, adding 4 extra layers 119 | on top of fc7 120 | num_filters : list of int 121 | number of filters for extra layers, you can use -1 for extracted features, 122 | however, if normalization and scale is applied, the number of filter for 123 | that layer must be provided. 124 | For example: 125 | num_filters = [512, -1, 512, 256, 256, 256] 126 | strides : list of int 127 | strides for the 3x3 convolution appended, -1 can be used for extracted 128 | feature layers 129 | pads : list of int 130 | paddings for the 3x3 convolution, -1 can be used for extracted layers 131 | sizes : list or list of list 132 | [min_size, max_size] for all layers or [[], [], []...] for specific layers 133 | ratios : list or list of list 134 | [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers 135 | normalizations : int or list of int 136 | use normalizations value for all layers or [...] for specific layers, 137 | -1 indicate no normalizations and scales 138 | steps : list 139 | specify steps for each MultiBoxPrior layer, leave empty, it will calculate 140 | according to layer dimensions 141 | min_filter : int 142 | minimum number of filters used in 1x1 convolution 143 | nms_thresh : float 144 | non-maximum suppression threshold 145 | force_suppress : boolean 146 | whether suppress different class objects 147 | nms_topk : int 148 | apply NMS to top K detections 149 | 150 | Returns 151 | ------- 152 | mx.Symbol 153 | 154 | """ 155 | body = import_module(network).get_symbol(num_classes=num_classes, **kwargs) 156 | layers = multi_layer_feature(body, from_layers, num_filters, strides, pads, 157 | min_filter=min_filter) 158 | 159 | loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \ 160 | num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ 161 | num_channels=num_filters, clip=False, interm_layer=0, steps=steps) 162 | 163 | cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \ 164 | name='cls_prob') 165 | out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ 166 | name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, 167 | variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) 168 | return out 169 | -------------------------------------------------------------------------------- /symbol/resnet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py 3 | Original author Wei Wu 4 | 5 | Implemented the following paper: 6 | 7 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks" 8 | ''' 9 | import mxnet as mx 10 | 11 | def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False): 12 | """Return ResNet Unit symbol for building ResNet 13 | Parameters 14 | ---------- 15 | data : str 16 | Input data 17 | num_filter : int 18 | Number of output channels 19 | bnf : int 20 | Bottle neck channels factor with regard to num_filter 21 | stride : tupe 22 | Stride used in convolution 23 | dim_match : Boolen 24 | True means channel number between input and output is the same, otherwise means differ 25 | name : str 26 | Base name of the operators 27 | workspace : int 28 | Workspace used in convolution operator 29 | """ 30 | if bottle_neck: 31 | # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper 32 | bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1') 33 | act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1') 34 | conv1 = mx.sym.Convolution(data=act1, num_filter=int(num_filter*0.25), kernel=(1,1), stride=(1,1), pad=(0,0), 35 | no_bias=True, workspace=workspace, name=name + '_conv1') 36 | bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2') 37 | act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2') 38 | conv2 = mx.sym.Convolution(data=act2, num_filter=int(num_filter*0.25), kernel=(3,3), stride=stride, pad=(1,1), 39 | no_bias=True, workspace=workspace, name=name + '_conv2') 40 | bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3') 41 | act3 = mx.sym.Activation(data=bn3, act_type='relu', name=name + '_relu3') 42 | conv3 = mx.sym.Convolution(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True, 43 | workspace=workspace, name=name + '_conv3') 44 | if dim_match: 45 | shortcut = data 46 | else: 47 | shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True, 48 | workspace=workspace, name=name+'_sc') 49 | if memonger: 50 | shortcut._set_attr(mirror_stage='True') 51 | return conv3 + shortcut 52 | else: 53 | bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1') 54 | act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1') 55 | conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1), 56 | no_bias=True, workspace=workspace, name=name + '_conv1') 57 | bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2') 58 | act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2') 59 | conv2 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1), 60 | no_bias=True, workspace=workspace, name=name + '_conv2') 61 | if dim_match: 62 | shortcut = data 63 | else: 64 | shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True, 65 | workspace=workspace, name=name+'_sc') 66 | if memonger: 67 | shortcut._set_attr(mirror_stage='True') 68 | return conv2 + shortcut 69 | 70 | def resnet(units, num_stages, filter_list, num_classes, image_shape, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False): 71 | """Return ResNet symbol of 72 | Parameters 73 | ---------- 74 | units : list 75 | Number of units in each stage 76 | num_stages : int 77 | Number of stage 78 | filter_list : list 79 | Channel size of each stage 80 | num_classes : int 81 | Ouput size of symbol 82 | dataset : str 83 | Dataset type, only cifar10 and imagenet supports 84 | workspace : int 85 | Workspace used in convolution operator 86 | """ 87 | num_unit = len(units) 88 | assert(num_unit == num_stages) 89 | data = mx.sym.Variable(name='data') 90 | data = mx.sym.identity(data=data, name='id') 91 | data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data') 92 | (nchannel, height, width) = image_shape 93 | if height <= 32: # such as cifar10 94 | body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1), 95 | no_bias=True, name="conv0", workspace=workspace) 96 | else: # often expected to be 224 such as imagenet 97 | body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3), 98 | no_bias=True, name="conv0", workspace=workspace) 99 | body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0') 100 | body = mx.sym.Activation(data=body, act_type='relu', name='relu0') 101 | body = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max') 102 | 103 | for i in range(num_stages): 104 | body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False, 105 | name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace, 106 | memonger=memonger) 107 | for j in range(units[i]-1): 108 | body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2), 109 | bottle_neck=bottle_neck, workspace=workspace, memonger=memonger) 110 | bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1') 111 | relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1') 112 | # Although kernel is not used here when global_pool=True, we should put one 113 | pool1 = mx.symbol.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1') 114 | flat = mx.symbol.Flatten(data=pool1) 115 | fc1 = mx.symbol.FullyConnected(data=flat, num_hidden=num_classes, name='fc1') 116 | return mx.symbol.SoftmaxOutput(data=fc1, name='softmax') 117 | 118 | def get_symbol(num_classes, num_layers, image_shape, conv_workspace=256, **kwargs): 119 | """ 120 | Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py 121 | Original author Wei Wu 122 | """ 123 | image_shape = [int(l) for l in image_shape.split(',')] 124 | (nchannel, height, width) = image_shape 125 | if height <= 28: 126 | num_stages = 3 127 | if (num_layers-2) % 9 == 0 and num_layers >= 164: 128 | per_unit = [(num_layers-2)//9] 129 | filter_list = [16, 64, 128, 256] 130 | bottle_neck = True 131 | elif (num_layers-2) % 6 == 0 and num_layers < 164: 132 | per_unit = [(num_layers-2)//6] 133 | filter_list = [16, 16, 32, 64] 134 | bottle_neck = False 135 | else: 136 | raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers)) 137 | units = per_unit * num_stages 138 | else: 139 | if num_layers >= 50: 140 | filter_list = [64, 256, 512, 1024, 2048] 141 | bottle_neck = True 142 | else: 143 | filter_list = [64, 64, 128, 256, 512] 144 | bottle_neck = False 145 | num_stages = 4 146 | if num_layers == 18: 147 | units = [2, 2, 2, 2] 148 | elif num_layers == 34: 149 | units = [3, 4, 6, 3] 150 | elif num_layers == 50: 151 | units = [3, 4, 6, 3] 152 | elif num_layers == 101: 153 | units = [3, 4, 23, 3] 154 | elif num_layers == 152: 155 | units = [3, 8, 36, 3] 156 | elif num_layers == 200: 157 | units = [3, 24, 36, 3] 158 | elif num_layers == 269: 159 | units = [3, 30, 48, 8] 160 | else: 161 | raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers)) 162 | 163 | return resnet(units = units, 164 | num_stages = num_stages, 165 | filter_list = filter_list, 166 | num_classes = num_classes, 167 | image_shape = image_shape, 168 | bottle_neck = bottle_neck, 169 | workspace = conv_workspace) 170 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tools.find_mxnet 3 | import mxnet as mx 4 | import os 5 | import sys 6 | from train.train_net import train_net 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Train a Single-shot detection network') 11 | parser.add_argument('--train-path', dest='train_path', help='train record to use', 12 | default=os.path.join(os.getcwd(), 'data', 'train.rec'), type=str) 13 | parser.add_argument('--train-list', dest='train_list', help='train list to use', 14 | default="", type=str) 15 | parser.add_argument('--val-path', dest='val_path', help='validation record to use', 16 | default=os.path.join(os.getcwd(), 'data', 'val.rec'), type=str) 17 | parser.add_argument('--val-list', dest='val_list', help='validation list to use', 18 | default="", type=str) 19 | parser.add_argument('--network', dest='network', type=str, default='vgg16_reduced', 20 | help='which network to use') 21 | parser.add_argument('--batch-size', dest='batch_size', type=int, default=32, 22 | help='training batch size') 23 | parser.add_argument('--resume', dest='resume', type=int, default=-1, 24 | help='resume training from epoch n') 25 | parser.add_argument('--finetune', dest='finetune', type=int, default=-1, 26 | help='finetune from epoch n, rename the model before doing this') 27 | parser.add_argument('--pretrained', dest='pretrained', help='pretrained model prefix', 28 | default=os.path.join(os.getcwd(), 'model', 'vgg16_reduced'), type=str) 29 | parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model', 30 | default=1, type=int) 31 | parser.add_argument('--prefix', dest='prefix', help='new model prefix', 32 | default=os.path.join(os.getcwd(), 'output', 'exp1', 'ssd'), type=str) 33 | parser.add_argument('--gpus', dest='gpus', help='GPU devices to train with', 34 | default='0', type=str) 35 | parser.add_argument('--begin-epoch', dest='begin_epoch', help='begin epoch of training', 36 | default=0, type=int) 37 | parser.add_argument('--end-epoch', dest='end_epoch', help='end epoch of training', 38 | default=240, type=int) 39 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging', 40 | default=20, type=int) 41 | parser.add_argument('--data-shape', dest='data_shape', type=int, default=300, 42 | help='set image shape') 43 | parser.add_argument('--label-width', dest='label_width', type=int, default=350, 44 | help='force padding label width to sync across train and validation') 45 | parser.add_argument('--optimizer', dest='optimizer', type=str, default='sgd', 46 | help='Whether to use a different optimizer or follow the original code with sgd') 47 | parser.add_argument('--lr', dest='learning_rate', type=float, default=0.004, 48 | help='learning rate') 49 | parser.add_argument('--momentum', dest='momentum', type=float, default=0.9, 50 | help='momentum') 51 | parser.add_argument('--wd', dest='weight_decay', type=float, default=0.0005, 52 | help='weight decay') 53 | parser.add_argument('--mean-r', dest='mean_r', type=float, default=123, 54 | help='red mean value') 55 | parser.add_argument('--mean-g', dest='mean_g', type=float, default=117, 56 | help='green mean value') 57 | parser.add_argument('--mean-b', dest='mean_b', type=float, default=104, 58 | help='blue mean value') 59 | parser.add_argument('--lr-steps', dest='lr_refactor_step', type=str, default='80, 160', 60 | help='refactor learning rate at specified epochs') 61 | parser.add_argument('--lr-factor', dest='lr_refactor_ratio', type=str, default=0.1, 62 | help='ratio to refactor learning rate') 63 | parser.add_argument('--freeze', dest='freeze_pattern', type=str, default="^(conv1_|conv2_).*", 64 | help='freeze layer pattern') 65 | parser.add_argument('--log', dest='log_file', type=str, default="train.log", 66 | help='save training log to file') 67 | parser.add_argument('--monitor', dest='monitor', type=int, default=0, 68 | help='log network parameters every N iters if larger than 0') 69 | parser.add_argument('--pattern', dest='monitor_pattern', type=str, default=".*", 70 | help='monitor parameter pattern, as regex') 71 | parser.add_argument('--num-class', dest='num_class', type=int, default=20, 72 | help='number of classes') 73 | parser.add_argument('--num-example', dest='num_example', type=int, default=16551, 74 | help='number of image examples') 75 | parser.add_argument('--class-names', dest='class_names', type=str, 76 | default='aeroplane, bicycle, bird, boat, bottle, bus, \ 77 | car, cat, chair, cow, diningtable, dog, horse, motorbike, \ 78 | person, pottedplant, sheep, sofa, train, tvmonitor', 79 | help='string of comma separated names, or text filename') 80 | parser.add_argument('--nms', dest='nms_thresh', type=float, default=0.45, 81 | help='non-maximum suppression threshold') 82 | parser.add_argument('--nms_topk', dest='nms_topk', type=int, default=400, 83 | help='final number of detections') 84 | parser.add_argument('--overlap', dest='overlap_thresh', type=float, default=0.5, 85 | help='evaluation overlap threshold') 86 | parser.add_argument('--force', dest='force_nms', type=bool, default=False, 87 | help='force non-maximum suppression on different class') 88 | parser.add_argument('--use-difficult', dest='use_difficult', type=bool, default=False, 89 | help='use difficult ground-truths in evaluation') 90 | parser.add_argument('--voc07', dest='use_voc07_metric', type=bool, default=True, 91 | help='use PASCAL VOC 07 11-point metric') 92 | parser.add_argument('--tensorboard', dest='tensorboard', type=bool, default=False, 93 | help='save metrics into tensorboard readable files') 94 | parser.add_argument('--min_neg_samples', dest='min_neg_samples', type=int, default=0, 95 | help='min number of negative samples taken in hard mining.') 96 | 97 | args = parser.parse_args() 98 | return args 99 | 100 | def parse_class_names(args): 101 | """ parse # classes and class_names if applicable """ 102 | num_class = args.num_class 103 | if len(args.class_names) > 0: 104 | if os.path.isfile(args.class_names): 105 | # try to open it to read class names 106 | with open(args.class_names, 'r') as f: 107 | class_names = [l.strip() for l in f.readlines()] 108 | else: 109 | class_names = [c.strip() for c in args.class_names.split(',')] 110 | assert len(class_names) == num_class, str(len(class_names)) 111 | for name in class_names: 112 | assert len(name) > 0 113 | else: 114 | class_names = None 115 | return class_names 116 | 117 | if __name__ == '__main__': 118 | args = parse_args() 119 | # context list 120 | ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] 121 | ctx = [mx.cpu()] if not ctx else ctx 122 | # class names if applicable 123 | class_names = parse_class_names(args) 124 | # start training 125 | train_net(args.network, args.train_path, 126 | args.num_class, args.batch_size, 127 | args.data_shape, [args.mean_r, args.mean_g, args.mean_b], 128 | args.resume, args.finetune, args.pretrained, 129 | args.epoch, args.prefix, ctx, args.begin_epoch, args.end_epoch, 130 | args.frequent, args.learning_rate, args.momentum, args.weight_decay, 131 | args.lr_refactor_step, args.lr_refactor_ratio, 132 | val_path=args.val_path, 133 | min_neg_samples=args.min_neg_samples, 134 | num_example=args.num_example, 135 | class_names=class_names, 136 | label_pad_width=args.label_width, 137 | freeze_layer_pattern=args.freeze_pattern, 138 | iter_monitor=args.monitor, 139 | monitor_pattern=args.monitor_pattern, 140 | log_file=args.log_file, 141 | nms_thresh=args.nms_thresh, 142 | nms_topk=args.nms_topk, 143 | force_nms=args.force_nms, 144 | ovp_thresh=args.overlap_thresh, 145 | use_difficult=args.use_difficult, 146 | voc07_metric=args.use_voc07_metric, 147 | optimizer=args.optimizer, 148 | tensorboard=args.tensorboard) 149 | -------------------------------------------------------------------------------- /symbol/densenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py 3 | Original author Wei Wu 4 | Referenced https://github.com/bamos/densenet.pytorch/blob/master/densenet.py 5 | Original author bamos 6 | Referenced https://github.com/andreasveit/densenet-pytorch/blob/master/densenet.py 7 | Original author andreasveit 8 | Referenced https://github.com/Nicatio/Densenet/blob/master/mxnet/symbol_densenet.py 9 | Original author Nicatio 10 | 11 | Implemented the following paper: DenseNet-BC 12 | Gao Huang, Zhuang Liu, Kilian Q. Weinberger, Laurens van der Maaten. "Densely Connected Convolutional Networks" 13 | 14 | Coded by Lin Xiong Mar-1, 2017 15 | """ 16 | import mxnet as mx 17 | import math 18 | 19 | def BasicBlock(data, growth_rate, stride, name, bottle_neck=True, drop_out=0.0, bn_mom=0.9, workspace=512): 20 | """Return BaiscBlock Unit symbol for building DenseBlock 21 | Parameters 22 | ---------- 23 | data : str 24 | Input data 25 | growth_rate : int 26 | Number of output channels 27 | stride : tupe 28 | Stride used in convolution 29 | drop_out : float 30 | Probability of an element to be zeroed. Default = 0.2 31 | name : str 32 | Base name of the operators 33 | workspace : int 34 | Workspace used in convolution operator 35 | """ 36 | # import pdb 37 | # pdb.set_trace() 38 | 39 | if bottle_neck: 40 | # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper 41 | bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1') 42 | act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1') 43 | conv1 = mx.sym.Convolution(data=act1, num_filter=int(growth_rate*4), kernel=(1,1), stride=(1,1), pad=(0,0), 44 | no_bias=True, workspace=workspace, name=name + '_conv1') 45 | if drop_out > 0: 46 | conv1 = mx.symbol.Dropout(data=conv1, p=drop_out, name=name + '_dp1') 47 | bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2') 48 | act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2') 49 | conv2 = mx.sym.Convolution(data=act2, num_filter=int(growth_rate), kernel=(3,3), stride=stride, pad=(1,1), 50 | no_bias=True, workspace=workspace, name=name + '_conv2') 51 | if drop_out > 0: 52 | conv2 = mx.symbol.Dropout(data=conv2, p=drop_out, name=name + '_dp2') 53 | #return mx.symbol.Concat(data, conv2, name=name + '_concat0') 54 | return conv2 55 | else: 56 | bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1') 57 | act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1') 58 | conv1 = mx.sym.Convolution(data=act1, num_filter=int(growth_rate), kernel=(3,3), stride=(1,1), pad=(1,1), 59 | no_bias=True, workspace=workspace, name=name + '_conv1') 60 | if drop_out > 0: 61 | conv1 = mx.symbol.Dropout(data=conv1, p=drop_out, name=name + '_dp1') 62 | #return mx.symbol.Concat(data, conv1, name=name + '_concat0') 63 | return conv1 64 | 65 | def DenseBlock(units_num, data, growth_rate, name, bottle_neck=True, drop_out=0.0, bn_mom=0.9, workspace=512): 66 | """Return DenseBlock Unit symbol for building DenseNet 67 | Parameters 68 | ---------- 69 | units_num : int 70 | the number of BasicBlock in each DenseBlock 71 | data : str 72 | Input data 73 | growth_rate : int 74 | Number of output channels 75 | drop_out : float 76 | Probability of an element to be zeroed. Default = 0.2 77 | workspace : int 78 | Workspace used in convolution operator 79 | """ 80 | # import pdb 81 | # pdb.set_trace() 82 | 83 | for i in range(units_num): 84 | Block = BasicBlock(data, growth_rate=growth_rate, stride=(1,1), name=name + '_unit%d' % (i+1), 85 | bottle_neck=bottle_neck, drop_out=drop_out, 86 | bn_mom=bn_mom, workspace=workspace) 87 | data = mx.symbol.Concat(data, Block, name=name + '_concat%d' % (i+1)) 88 | return data 89 | 90 | def TransitionBlock(num_stage, data, num_filter, stride, name, drop_out=0.0, bn_mom=0.9, workspace=512): 91 | """Return TransitionBlock Unit symbol for building DenseNet 92 | Parameters 93 | ---------- 94 | num_stage : int 95 | Number of stage 96 | data : str 97 | Input data 98 | num : int 99 | Number of output channels 100 | stride : tupe 101 | Stride used in convolution 102 | name : str 103 | Base name of the operators 104 | drop_out : float 105 | Probability of an element to be zeroed. Default = 0.2 106 | workspace : int 107 | Workspace used in convolution operator 108 | """ 109 | bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1') 110 | act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1') 111 | conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, 112 | kernel=(1,1), stride=stride, pad=(0,0), no_bias=True, 113 | workspace=workspace, name=name + '_conv1') 114 | if drop_out > 0: 115 | conv1 = mx.symbol.Dropout(data=conv1, p=drop_out, name=name + '_dp1') 116 | return mx.symbol.Pooling(conv1, global_pool=False, kernel=(2,2), stride=(2,2), pool_type='avg', name=name + '_pool%d' % (num_stage+1)) 117 | 118 | def get_symbol(units, num_stage, growth_rate, num_classes, data_type, reduction=0.5, drop_out=0., bottle_neck=True, bn_mom=0.9, workspace=512, **kwargs): 119 | """Return DenseNet symbol of imagenet 120 | Parameters 121 | ---------- 122 | units : list 123 | Number of units in each stage 124 | num_stage : int 125 | Number of stage 126 | growth_rate : int 127 | Number of output channels 128 | num_classes : int 129 | Ouput size of symbol 130 | data_type : str 131 | the type of dataset 132 | reduction : float 133 | Compression ratio. Default = 0.5 134 | drop_out : float 135 | Probability of an element to be zeroed. Default = 0.2 136 | workspace : int 137 | Workspace used in convolution operator 138 | """ 139 | num_unit = len(units) 140 | assert(num_unit == num_stage) 141 | init_channels = 2 * growth_rate 142 | n_channels = init_channels 143 | data = mx.sym.Variable(name='data') 144 | data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data') 145 | if data_type == 'imagenet': 146 | body = mx.sym.Convolution(data=data, num_filter=growth_rate*2, kernel=(7, 7), stride=(2,2), pad=(3, 3), 147 | no_bias=True, name="conv0", workspace=workspace) 148 | body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0') 149 | body = mx.sym.Activation(data=body, act_type='relu', name='relu0') 150 | body = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max') 151 | elif data_type == 'vggface': 152 | body = mx.sym.Convolution(data=data, num_filter=growth_rate*2, kernel=(7, 7), stride=(2,2), pad=(3, 3), 153 | no_bias=True, name="conv0", workspace=workspace) 154 | body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0') 155 | body = mx.sym.Activation(data=body, act_type='relu', name='relu0') 156 | body = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max') 157 | elif data_type == 'msface': 158 | body = mx.sym.Convolution(data=data, num_filter=growth_rate*2, kernel=(7, 7), stride=(2,2), pad=(3, 3), 159 | no_bias=True, name="conv0", workspace=workspace) 160 | body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0') 161 | body = mx.sym.Activation(data=body, act_type='relu', name='relu0') 162 | body = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max') 163 | else: 164 | raise ValueError("do not support {} yet".format(data_type)) 165 | for i in range(num_stage-1): 166 | body = DenseBlock(units[i], body, growth_rate=growth_rate, name='DBstage%d' % (i + 1), bottle_neck=bottle_neck, drop_out=drop_out, bn_mom=bn_mom, workspace=workspace) 167 | n_channels += units[i]*growth_rate 168 | n_channels = int(math.floor(n_channels*reduction)) 169 | body = TransitionBlock(i, body, n_channels, stride=(1,1), name='TBstage%d' % (i + 1), drop_out=drop_out, bn_mom=bn_mom, workspace=workspace) 170 | body = DenseBlock(units[num_stage-1], body, growth_rate=growth_rate, name='DBstage%d' % (num_stage), bottle_neck=bottle_neck, drop_out=drop_out, bn_mom=bn_mom, workspace=workspace) 171 | bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1') 172 | relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1') 173 | pool1 = mx.symbol.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1') 174 | flat = mx.symbol.Flatten(data=pool1) 175 | fc1 = mx.symbol.FullyConnected(data=flat, num_hidden=num_classes, name='fc1') 176 | return mx.symbol.SoftmaxOutput(data=fc1, name='softmax') 177 | -------------------------------------------------------------------------------- /symbol/symbol_factory.py: -------------------------------------------------------------------------------- 1 | """Presets for various network configurations""" 2 | import logging 3 | from symbol import symbol_builder 4 | import numpy as np 5 | 6 | def get_scales(min_scale=0.2, max_scale=0.9,num_layers=6): 7 | """ Following the ssd arxiv paper, regarding the calculation of scales & ratios 8 | 9 | Parameters 10 | ---------- 11 | min_scale : float 12 | max_scales: float 13 | num_layers: int 14 | number of layers that will have a detection head 15 | anchor_ratios: list 16 | first_layer_ratios: list 17 | 18 | return 19 | ------ 20 | sizes : list 21 | list of scale sizes per feature layer 22 | ratios : list 23 | list of anchor_ratios per feature layer 24 | """ 25 | 26 | # this code follows the original implementation of wei liu 27 | # for more, look at ssd/score_ssd_pascal.py:310 in the original caffe implementation 28 | min_ratio = int(min_scale * 100) 29 | max_ratio = int(max_scale * 100) 30 | step = int(np.floor((max_ratio - min_ratio) / (num_layers - 2))) 31 | min_sizes = [] 32 | max_sizes = [] 33 | for ratio in xrange(min_ratio, max_ratio + 1, step): 34 | min_sizes.append(ratio / 100.) 35 | max_sizes.append((ratio + step) / 100.) 36 | min_sizes = [int(100*min_scale / 2.0) / 100.0] + min_sizes 37 | max_sizes = [min_scale] + max_sizes 38 | 39 | # convert it back to this implementation's notation: 40 | scales = [] 41 | for layer_idx in range(num_layers): 42 | scales.append([min_sizes[layer_idx], np.single(np.sqrt(min_sizes[layer_idx] * max_sizes[layer_idx]))]) 43 | return scales 44 | 45 | def get_config(network, data_shape, **kwargs): 46 | """Configuration factory for various networks 47 | 48 | Parameters 49 | ---------- 50 | network : str 51 | base network name, such as vgg_reduced, inceptionv3, resnet... 52 | data_shape : int 53 | input data dimension 54 | kwargs : dict 55 | extra arguments 56 | """ 57 | if network == 'vgg16_reduced': 58 | if data_shape >= 448: 59 | from_layers = ['relu4_3', 'relu7', '', '', '', '', ''] 60 | num_filters = [512, -1, 512, 256, 256, 256, 256] 61 | strides = [-1, -1, 2, 2, 2, 2, 1] 62 | pads = [-1, -1, 1, 1, 1, 1, 1] 63 | sizes = get_scales(min_scale=0.15, max_scale=0.9, num_layers=len(from_layers)) 64 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 65 | [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]] 66 | normalizations = [20, -1, -1, -1, -1, -1, -1] 67 | steps = [] if data_shape != 512 else [x / 512.0 for x in 68 | [8, 16, 32, 64, 128, 256, 512]] 69 | else: 70 | from_layers = ['relu4_3', 'relu7', '', '', '', ''] 71 | num_filters = [512, -1, 512, 256, 256, 256] 72 | strides = [-1, -1, 2, 2, 1, 1] 73 | pads = [-1, -1, 1, 1, 0, 0] 74 | sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers)) 75 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 76 | [1,2,.5], [1,2,.5]] 77 | normalizations = [20, -1, -1, -1, -1, -1] 78 | steps = [] if data_shape != 300 else [x / 300.0 for x in [8, 16, 32, 64, 100, 300]] 79 | if not (data_shape == 300 or data_shape == 512): 80 | logging.warn('data_shape %d was not tested, use with caucious.' % data_shape) 81 | return locals() 82 | elif network == 'inceptionv3': 83 | if data_shape >= 448: 84 | from_layers = ['ch_concat_mixed_7_chconcat', 'ch_concat_mixed_10_chconcat', '', '', '', ''] 85 | num_filters = [-1, -1, 512, 256, 256, 128] 86 | strides = [-1, -1, 2, 2, 2, 2] 87 | pads = [-1, -1, 1, 1, 1, 1] 88 | sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers)) 89 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 90 | [1,2,.5], [1,2,.5]] 91 | normalizations = -1 92 | steps = [] 93 | else: 94 | from_layers = ['ch_concat_mixed_2_chconcat', 'ch_concat_mixed_7_chconcat', 'ch_concat_mixed_10_chconcat', '', '', ''] 95 | num_filters = [-1, -1, -1, 256, 256, 128] 96 | strides = [-1, -1, -1, 2, 2, 2] 97 | pads = [-1, -1, -1, 1, 1, 1] 98 | sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers)) 99 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 100 | [1,2,.5], [1,2,.5]] 101 | normalizations = -1 102 | steps = [] 103 | return locals() 104 | elif network == 'resnet50': 105 | num_layers = 50 106 | image_shape = '3,224,224' # resnet require it as shape check 107 | network = 'resnet' 108 | from_layers = ['_plus12', '_plus15', '', '', '', ''] 109 | num_filters = [-1, -1, 512, 256, 256, 128] 110 | strides = [-1, -1, 2, 2, 2, 2] 111 | pads = [-1, -1, 1, 1, 1, 1] 112 | sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers)) 113 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 114 | [1,2,.5], [1,2,.5]] 115 | normalizations = -1 116 | steps = [] 117 | return locals() 118 | elif network == 'resnet101': 119 | num_layers = 101 120 | image_shape = '3,224,224' 121 | network = 'resnet' 122 | from_layers = ['_plus29', '_plus32', '', '', '', ''] 123 | num_filters = [-1, -1, 512, 256, 256, 128] 124 | strides = [-1, -1, 2, 2, 2, 2] 125 | pads = [-1, -1, 1, 1, 1, 1] 126 | sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers)) 127 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 128 | [1,2,.5], [1,2,.5]] 129 | normalizations = -1 130 | steps = [] 131 | return locals() 132 | elif network == 'mobilenet': 133 | from_layers = ['conv_12_relu', 'conv_14_relu', '', '', '', '', ''] 134 | num_filters = [-1, -1, 512, 256, 256, 256, 256] 135 | strides = [-1, -1, 2, 2, 2, 2, 2] 136 | pads = [-1, -1, 1, 1, 1, 1, 1] 137 | sizes = get_scales(min_scale=0.15, max_scale=0.9, num_layers=len(from_layers)) 138 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 139 | [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]] 140 | normalizations = -1 141 | steps = [] 142 | return locals() 143 | elif network == 'densenet121': 144 | network = 'densenet' 145 | data_type = 'imagenet' 146 | units = [6, 12, 24, 16] 147 | num_stage = 4 148 | growth_rate = 32 149 | bottle_neck = True 150 | from_layers = ['DBstage3_concat24', 'DBstage4_concat16', '', '', '', ''] 151 | num_filters = [-1, -1, 256, 256, 256, 128] 152 | strides = [-1, -1, 2, 2, 2, 2] 153 | pads = [-1, -1, 1, 1, 1, 1] 154 | sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers)) 155 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 156 | [1,2,.5], [1,2,.5]] 157 | normalizations = -1 158 | steps = [] 159 | return locals() 160 | elif network == 'densenet-tiny': 161 | network = 'densenet' 162 | data_type = 'imagenet' 163 | units = [6, 12, 18, 12] 164 | num_stage = 4 165 | growth_rate = 16 166 | bottle_neck = True 167 | from_layers = ['DBstage2_concat12', 'DBstage3_concat18', '', '', '', ''] 168 | num_filters = [-1, -1, 256, 256, 256, 128] 169 | strides = [-1, -1, 2, 2, 2, 2] 170 | pads = [-1, -1, 1, 1, 1, 1] 171 | sizes = get_scales(min_scale=0.2, max_scale=0.9, num_layers=len(from_layers)) 172 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 173 | [1,2,.5], [1,2,.5]] 174 | normalizations = -1 175 | steps = [] 176 | return locals() 177 | else: 178 | msg = 'No configuration found for %s with data_shape %d' % (network, data_shape) 179 | raise NotImplementedError(msg) 180 | 181 | def get_symbol_train(network, data_shape, **kwargs): 182 | """Wrapper for get symbol for train 183 | 184 | Parameters 185 | ---------- 186 | network : str 187 | name for the base network symbol 188 | data_shape : int 189 | input shape 190 | kwargs : dict 191 | see symbol_builder.get_symbol_train for more details 192 | """ 193 | if network.startswith('legacy'): 194 | logging.warn('Using legacy model.') 195 | return symbol_builder.import_module(network).get_symbol_train(**kwargs) 196 | config = get_config(network, data_shape, **kwargs).copy() 197 | config.update(kwargs) 198 | return symbol_builder.get_symbol_train(**config) 199 | 200 | def get_symbol(network, data_shape, **kwargs): 201 | """Wrapper for get symbol for test 202 | 203 | Parameters 204 | ---------- 205 | network : str 206 | name for the base network symbol 207 | data_shape : int 208 | input shape 209 | kwargs : dict 210 | see symbol_builder.get_symbol for more details 211 | """ 212 | if network.startswith('legacy'): 213 | logging.warn('Using legacy model.') 214 | return symbol_builder.import_module(network).get_symbol(**kwargs) 215 | config = get_config(network, data_shape, **kwargs).copy() 216 | config.update(kwargs) 217 | return symbol_builder.get_symbol(**config) 218 | -------------------------------------------------------------------------------- /symbol/legacy_vgg16_ssd_300.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import mxnet as mx 3 | from .common import legacy_conv_act_layer 4 | from .common import multibox_layer 5 | 6 | def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, 7 | nms_topk=400, **kwargs): 8 | """ 9 | Single-shot multi-box detection with VGG 16 layers ConvNet 10 | This is a modified version, with fc6/fc7 layers replaced by conv layers 11 | And the network is slightly smaller than original VGG 16 network 12 | This is a training network with losses 13 | 14 | Parameters: 15 | ---------- 16 | num_classes: int 17 | number of object classes not including background 18 | nms_thresh : float 19 | non-maximum suppression threshold 20 | force_suppress : boolean 21 | whether suppress different class objects 22 | nms_topk : int 23 | apply NMS to top K detections 24 | 25 | Returns: 26 | ---------- 27 | mx.Symbol 28 | """ 29 | data = mx.symbol.Variable(name="data") 30 | label = mx.symbol.Variable(name="label") 31 | 32 | # group 1 33 | conv1_1 = mx.symbol.Convolution( 34 | data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") 35 | relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1") 36 | conv1_2 = mx.symbol.Convolution( 37 | data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2") 38 | relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2") 39 | pool1 = mx.symbol.Pooling( 40 | data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1") 41 | # group 2 42 | conv2_1 = mx.symbol.Convolution( 43 | data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") 44 | relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1") 45 | conv2_2 = mx.symbol.Convolution( 46 | data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2") 47 | relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2") 48 | pool2 = mx.symbol.Pooling( 49 | data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2") 50 | # group 3 51 | conv3_1 = mx.symbol.Convolution( 52 | data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") 53 | relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1") 54 | conv3_2 = mx.symbol.Convolution( 55 | data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") 56 | relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2") 57 | conv3_3 = mx.symbol.Convolution( 58 | data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3") 59 | relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3") 60 | pool3 = mx.symbol.Pooling( 61 | data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \ 62 | pooling_convention="full", name="pool3") 63 | # group 4 64 | conv4_1 = mx.symbol.Convolution( 65 | data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") 66 | relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1") 67 | conv4_2 = mx.symbol.Convolution( 68 | data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") 69 | relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2") 70 | conv4_3 = mx.symbol.Convolution( 71 | data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3") 72 | relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3") 73 | pool4 = mx.symbol.Pooling( 74 | data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4") 75 | # group 5 76 | conv5_1 = mx.symbol.Convolution( 77 | data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") 78 | relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1") 79 | conv5_2 = mx.symbol.Convolution( 80 | data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") 81 | relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2") 82 | conv5_3 = mx.symbol.Convolution( 83 | data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3") 84 | relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3") 85 | pool5 = mx.symbol.Pooling( 86 | data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1), 87 | pad=(1,1), name="pool5") 88 | # group 6 89 | conv6 = mx.symbol.Convolution( 90 | data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6), 91 | num_filter=1024, name="conv6") 92 | relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6") 93 | # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6") 94 | # group 7 95 | conv7 = mx.symbol.Convolution( 96 | data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7") 97 | relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7") 98 | # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7") 99 | 100 | ### ssd extra layers ### 101 | conv8_1, relu8_1 = legacy_conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \ 102 | stride=(1,1), act_type="relu", use_batchnorm=False) 103 | conv8_2, relu8_2 = legacy_conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \ 104 | stride=(2,2), act_type="relu", use_batchnorm=False) 105 | conv9_1, relu9_1 = legacy_conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \ 106 | stride=(1,1), act_type="relu", use_batchnorm=False) 107 | conv9_2, relu9_2 = legacy_conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \ 108 | stride=(2,2), act_type="relu", use_batchnorm=False) 109 | conv10_1, relu10_1 = legacy_conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \ 110 | stride=(1,1), act_type="relu", use_batchnorm=False) 111 | conv10_2, relu10_2 = legacy_conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(0,0), \ 112 | stride=(1,1), act_type="relu", use_batchnorm=False) 113 | conv11_1, relu11_1 = legacy_conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \ 114 | stride=(1,1), act_type="relu", use_batchnorm=False) 115 | conv11_2, relu11_2 = legacy_conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(0,0), \ 116 | stride=(1,1), act_type="relu", use_batchnorm=False) 117 | 118 | # specific parameters for VGG16 network 119 | from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2] 120 | sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]] 121 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 122 | [1,2,.5], [1,2,.5]] 123 | normalizations = [20, -1, -1, -1, -1, -1] 124 | steps = [ x / 300.0 for x in [8, 16, 32, 64, 100, 300]] 125 | num_channels = [512] 126 | 127 | loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \ 128 | num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ 129 | num_channels=num_channels, clip=False, interm_layer=0, steps=steps) 130 | 131 | tmp = mx.contrib.symbol.MultiBoxTarget( 132 | *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ 133 | ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ 134 | negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), 135 | name="multibox_target") 136 | loc_target = tmp[0] 137 | loc_target_mask = tmp[1] 138 | cls_target = tmp[2] 139 | 140 | cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ 141 | ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ 142 | normalization='valid', name="cls_prob") 143 | loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ 144 | data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) 145 | loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ 146 | normalization='valid', name="loc_loss") 147 | 148 | # monitoring training status 149 | cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") 150 | det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ 151 | name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, 152 | variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) 153 | det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") 154 | 155 | # group output 156 | out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) 157 | return out 158 | 159 | def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, 160 | nms_topk=400, **kwargs): 161 | """ 162 | Single-shot multi-box detection with VGG 16 layers ConvNet 163 | This is a modified version, with fc6/fc7 layers replaced by conv layers 164 | And the network is slightly smaller than original VGG 16 network 165 | This is the detection network 166 | 167 | Parameters: 168 | ---------- 169 | num_classes: int 170 | number of object classes not including background 171 | nms_thresh : float 172 | threshold of overlap for non-maximum suppression 173 | force_suppress : boolean 174 | whether suppress different class objects 175 | nms_topk : int 176 | apply NMS to top K detections 177 | 178 | Returns: 179 | ---------- 180 | mx.Symbol 181 | """ 182 | net = get_symbol_train(num_classes) 183 | cls_preds = net.get_internals()["multibox_cls_pred_output"] 184 | loc_preds = net.get_internals()["multibox_loc_pred_output"] 185 | anchor_boxes = net.get_internals()["multibox_anchors_output"] 186 | 187 | cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \ 188 | name='cls_prob') 189 | out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ 190 | name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, 191 | variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) 192 | return out 193 | -------------------------------------------------------------------------------- /symbol/inceptionv3.py: -------------------------------------------------------------------------------- 1 | """ 2 | Inception V3, suitable for images with around 299 x 299 3 | 4 | Reference: 5 | 6 | Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision." arXiv preprint arXiv:1512.00567 (2015). 7 | """ 8 | import mxnet as mx 9 | 10 | def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=''): 11 | conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix)) 12 | bn = mx.sym.BatchNorm(data=conv, name='%s%s_batchnorm' %(name, suffix), fix_gamma=True) 13 | act = mx.sym.Activation(data=bn, act_type='relu', name='%s%s_relu' %(name, suffix)) 14 | return act 15 | 16 | 17 | def Inception7A(data, 18 | num_1x1, 19 | num_3x3_red, num_3x3_1, num_3x3_2, 20 | num_5x5_red, num_5x5, 21 | pool, proj, 22 | name): 23 | tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name)) 24 | tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv') 25 | tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name), suffix='_conv_1') 26 | tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv') 27 | tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1') 28 | tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_2') 29 | pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) 30 | cproj = Conv(pooling, proj, name=('%s_tower_2' % name), suffix='_conv') 31 | concat = mx.sym.Concat(*[tower_1x1, tower_5x5, tower_3x3, cproj], name='ch_concat_%s_chconcat' % name) 32 | return concat 33 | 34 | # First Downsample 35 | def Inception7B(data, 36 | num_3x3, 37 | num_d3x3_red, num_d3x3_1, num_d3x3_2, 38 | pool, 39 | name): 40 | tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_conv' % name)) 41 | tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv') 42 | tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_tower' % name), suffix='_conv_1') 43 | tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_2') 44 | pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0,0), pool_type="max", name=('max_pool_%s_pool' % name)) 45 | concat = mx.sym.Concat(*[tower_3x3, tower_d3x3, pooling], name='ch_concat_%s_chconcat' % name) 46 | return concat 47 | 48 | def Inception7C(data, 49 | num_1x1, 50 | num_d7_red, num_d7_1, num_d7_2, 51 | num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4, 52 | pool, proj, 53 | name): 54 | tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) 55 | tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv') 56 | tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower' % name), suffix='_conv_1') 57 | tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower' % name), suffix='_conv_2') 58 | tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv') 59 | tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_1') 60 | tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_2') 61 | tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_3') 62 | tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_4') 63 | pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) 64 | cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv') 65 | # concat 66 | concat = mx.sym.Concat(*[tower_1x1, tower_d7, tower_q7, cproj], name='ch_concat_%s_chconcat' % name) 67 | return concat 68 | 69 | def Inception7D(data, 70 | num_3x3_red, num_3x3, 71 | num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3, 72 | pool, 73 | name): 74 | tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name), suffix='_conv') 75 | tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0,0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_1') 76 | tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name), suffix='_conv') 77 | tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_1') 78 | tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_2') 79 | tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2), name=('%s_tower_1' % name), suffix='_conv_3') 80 | pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) 81 | # concat 82 | concat = mx.sym.Concat(*[tower_3x3, tower_d7_3x3, pooling], name='ch_concat_%s_chconcat' % name) 83 | return concat 84 | 85 | def Inception7E(data, 86 | num_1x1, 87 | num_d3_red, num_d3_1, num_d3_2, 88 | num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2, 89 | pool, proj, 90 | name): 91 | tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) 92 | tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv') 93 | tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower' % name), suffix='_mixed_conv') 94 | tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower' % name), suffix='_mixed_conv_1') 95 | tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name), suffix='_conv') 96 | tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1') 97 | tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower_1' % name), suffix='_mixed_conv') 98 | tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower_1' % name), suffix='_mixed_conv_1') 99 | pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) 100 | cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv') 101 | # concat 102 | concat = mx.sym.Concat(*[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj], name='ch_concat_%s_chconcat' % name) 103 | return concat 104 | 105 | # In[49]: 106 | 107 | def get_symbol(num_classes=1000, **kwargs): 108 | data = mx.symbol.Variable(name="data") 109 | # stage 1 110 | conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv") 111 | conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1") 112 | conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2") 113 | pool = mx.sym.Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool") 114 | # stage 2 115 | conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3") 116 | conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4") 117 | pool1 = mx.sym.Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1") 118 | # stage 3 119 | in3a = Inception7A(pool1, 64, 120 | 64, 96, 96, 121 | 48, 64, 122 | "avg", 32, "mixed") 123 | in3b = Inception7A(in3a, 64, 124 | 64, 96, 96, 125 | 48, 64, 126 | "avg", 64, "mixed_1") 127 | in3c = Inception7A(in3b, 64, 128 | 64, 96, 96, 129 | 48, 64, 130 | "avg", 64, "mixed_2") 131 | in3d = Inception7B(in3c, 384, 132 | 64, 96, 96, 133 | "max", "mixed_3") 134 | # stage 4 135 | in4a = Inception7C(in3d, 192, 136 | 128, 128, 192, 137 | 128, 128, 128, 128, 192, 138 | "avg", 192, "mixed_4") 139 | in4b = Inception7C(in4a, 192, 140 | 160, 160, 192, 141 | 160, 160, 160, 160, 192, 142 | "avg", 192, "mixed_5") 143 | in4c = Inception7C(in4b, 192, 144 | 160, 160, 192, 145 | 160, 160, 160, 160, 192, 146 | "avg", 192, "mixed_6") 147 | in4d = Inception7C(in4c, 192, 148 | 192, 192, 192, 149 | 192, 192, 192, 192, 192, 150 | "avg", 192, "mixed_7") 151 | in4e = Inception7D(in4d, 192, 320, 152 | 192, 192, 192, 192, 153 | "max", "mixed_8") 154 | # stage 5 155 | in5a = Inception7E(in4e, 320, 156 | 384, 384, 384, 157 | 448, 384, 384, 384, 158 | "avg", 192, "mixed_9") 159 | in5b = Inception7E(in5a, 320, 160 | 384, 384, 384, 161 | 448, 384, 384, 384, 162 | "max", 192, "mixed_10") 163 | # pool 164 | pool = mx.sym.Pooling(data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", name="global_pool") 165 | flatten = mx.sym.Flatten(data=pool, name="flatten") 166 | fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1') 167 | softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') 168 | return softmax 169 | -------------------------------------------------------------------------------- /symbol/legacy_vgg16_ssd_512.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import mxnet as mx 3 | from .common import legacy_conv_act_layer 4 | from .common import multibox_layer 5 | 6 | def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400): 7 | """ 8 | Single-shot multi-box detection with VGG 16 layers ConvNet 9 | This is a modified version, with fc6/fc7 layers replaced by conv layers 10 | And the network is slightly smaller than original VGG 16 network 11 | This is a training network with losses 12 | 13 | Parameters: 14 | ---------- 15 | num_classes: int 16 | number of object classes not including background 17 | nms_thresh : float 18 | non-maximum suppression threshold 19 | force_suppress : boolean 20 | whether suppress different class objects 21 | nms_topk : int 22 | apply NMS to top K detections 23 | 24 | Returns: 25 | ---------- 26 | mx.Symbol 27 | """ 28 | data = mx.symbol.Variable(name="data") 29 | label = mx.symbol.Variable(name="label") 30 | 31 | # group 1 32 | conv1_1 = mx.symbol.Convolution( 33 | data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") 34 | relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1") 35 | conv1_2 = mx.symbol.Convolution( 36 | data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2") 37 | relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2") 38 | pool1 = mx.symbol.Pooling( 39 | data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1") 40 | # group 2 41 | conv2_1 = mx.symbol.Convolution( 42 | data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") 43 | relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1") 44 | conv2_2 = mx.symbol.Convolution( 45 | data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2") 46 | relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2") 47 | pool2 = mx.symbol.Pooling( 48 | data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2") 49 | # group 3 50 | conv3_1 = mx.symbol.Convolution( 51 | data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") 52 | relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1") 53 | conv3_2 = mx.symbol.Convolution( 54 | data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") 55 | relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2") 56 | conv3_3 = mx.symbol.Convolution( 57 | data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3") 58 | relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3") 59 | pool3 = mx.symbol.Pooling( 60 | data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \ 61 | pooling_convention="full", name="pool3") 62 | # group 4 63 | conv4_1 = mx.symbol.Convolution( 64 | data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") 65 | relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1") 66 | conv4_2 = mx.symbol.Convolution( 67 | data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") 68 | relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2") 69 | conv4_3 = mx.symbol.Convolution( 70 | data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3") 71 | relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3") 72 | pool4 = mx.symbol.Pooling( 73 | data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4") 74 | # group 5 75 | conv5_1 = mx.symbol.Convolution( 76 | data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") 77 | relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1") 78 | conv5_2 = mx.symbol.Convolution( 79 | data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") 80 | relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2") 81 | conv5_3 = mx.symbol.Convolution( 82 | data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3") 83 | relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3") 84 | pool5 = mx.symbol.Pooling( 85 | data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1), 86 | pad=(1,1), name="pool5") 87 | # group 6 88 | conv6 = mx.symbol.Convolution( 89 | data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6), 90 | num_filter=1024, name="conv6") 91 | relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6") 92 | # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6") 93 | # group 7 94 | conv7 = mx.symbol.Convolution( 95 | data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7") 96 | relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7") 97 | # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7") 98 | 99 | ### ssd extra layers ### 100 | conv8_1, relu8_1 = legacy_conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \ 101 | stride=(1,1), act_type="relu", use_batchnorm=False) 102 | conv8_2, relu8_2 = legacy_conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \ 103 | stride=(2,2), act_type="relu", use_batchnorm=False) 104 | conv9_1, relu9_1 = legacy_conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \ 105 | stride=(1,1), act_type="relu", use_batchnorm=False) 106 | conv9_2, relu9_2 = legacy_conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \ 107 | stride=(2,2), act_type="relu", use_batchnorm=False) 108 | conv10_1, relu10_1 = legacy_conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \ 109 | stride=(1,1), act_type="relu", use_batchnorm=False) 110 | conv10_2, relu10_2 = legacy_conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \ 111 | stride=(2,2), act_type="relu", use_batchnorm=False) 112 | conv11_1, relu11_1 = legacy_conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \ 113 | stride=(1,1), act_type="relu", use_batchnorm=False) 114 | conv11_2, relu11_2 = legacy_conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(1,1), \ 115 | stride=(2,2), act_type="relu", use_batchnorm=False) 116 | conv12_1, relu12_1 = legacy_conv_act_layer(relu11_2, "12_1", 128, kernel=(1,1), pad=(0,0), \ 117 | stride=(1,1), act_type="relu", use_batchnorm=False) 118 | conv12_2, relu12_2 = legacy_conv_act_layer(relu12_1, "12_2", 256, kernel=(4,4), pad=(1,1), \ 119 | stride=(1,1), act_type="relu", use_batchnorm=False) 120 | 121 | # specific parameters for VGG16 network 122 | from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2, relu12_2] 123 | sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \ 124 | [.75, .8216], [.9, .9721]] 125 | ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ 126 | [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]] 127 | normalizations = [20, -1, -1, -1, -1, -1, -1] 128 | steps = [ x / 512.0 for x in [8, 16, 32, 64, 128, 256, 512]] 129 | num_channels = [512] 130 | 131 | loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \ 132 | num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ 133 | num_channels=num_channels, clip=False, interm_layer=0, steps=steps) 134 | 135 | tmp = mx.contrib.symbol.MultiBoxTarget( 136 | *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ 137 | ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ 138 | negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), 139 | name="multibox_target") 140 | loc_target = tmp[0] 141 | loc_target_mask = tmp[1] 142 | cls_target = tmp[2] 143 | 144 | cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ 145 | ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ 146 | normalization='valid', name="cls_prob") 147 | loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ 148 | data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) 149 | loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ 150 | normalization='valid', name="loc_loss") 151 | 152 | # monitoring training status 153 | cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") 154 | det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ 155 | name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, 156 | variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) 157 | det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") 158 | 159 | # group output 160 | out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) 161 | return out 162 | 163 | def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400): 164 | """ 165 | Single-shot multi-box detection with VGG 16 layers ConvNet 166 | This is a modified version, with fc6/fc7 layers replaced by conv layers 167 | And the network is slightly smaller than original VGG 16 network 168 | This is the detection network 169 | 170 | Parameters: 171 | ---------- 172 | num_classes: int 173 | number of object classes not including background 174 | nms_thresh : float 175 | threshold of overlap for non-maximum suppression 176 | force_suppress : boolean 177 | whether suppress different class objects 178 | nms_topk : int 179 | apply NMS to top K detections 180 | 181 | Returns: 182 | ---------- 183 | mx.Symbol 184 | """ 185 | net = get_symbol_train(num_classes) 186 | cls_preds = net.get_internals()["multibox_cls_pred_output"] 187 | loc_preds = net.get_internals()["multibox_loc_pred_output"] 188 | anchor_boxes = net.get_internals()["multibox_anchors_output"] 189 | 190 | cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \ 191 | name='cls_prob') 192 | out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ 193 | name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, 194 | variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) 195 | return out 196 | -------------------------------------------------------------------------------- /tools/rand_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | class RandSampler(object): 5 | """ 6 | Random sampler base class, used for data augmentation 7 | 8 | Parameters: 9 | ---------- 10 | max_trials : int 11 | maximum trials, if exceed this number, give up anyway 12 | max_sample : int 13 | maximum random crop samples to be generated 14 | """ 15 | def __init__(self, max_trials, max_sample): 16 | assert max_trials > 0 17 | self.max_trials = int(max_trials) 18 | assert max_sample >= 0 19 | self.max_sample = int(max_sample) 20 | 21 | def sample(self, label): 22 | """ 23 | Interface for calling sampling function 24 | 25 | Parameters: 26 | ---------- 27 | label : numpy.array (n x 5 matrix) 28 | ground-truths 29 | 30 | Returns: 31 | ---------- 32 | list of (crop_box, label) tuples, if failed, return empty list [] 33 | """ 34 | return NotImplementedError 35 | 36 | 37 | class RandCropper(RandSampler): 38 | """ 39 | Random cropping original images with various settings 40 | 41 | Parameters: 42 | ---------- 43 | min_scale : float 44 | minimum crop scale, (0, 1] 45 | max_scale : float 46 | maximum crop scale, (0, 1], must larger than min_scale 47 | min_aspect_ratio : float 48 | minimum crop aspect ratio, (0, 1] 49 | max_aspect_ratio : float 50 | maximum crop aspect ratio, [1, inf) 51 | min_overlap : float 52 | hreshold of minimum overlap between a rand crop and any gt 53 | max_trials : int 54 | maximum trials, if exceed this number, give up anyway 55 | max_sample : int 56 | maximum random crop samples to be generated 57 | """ 58 | def __init__(self, min_scale=1., max_scale=1., 59 | min_aspect_ratio=1., max_aspect_ratio=1., 60 | min_overlap=0., max_trials=50, max_sample=1): 61 | super(RandCropper, self).__init__(max_trials, max_sample) 62 | assert min_scale <= max_scale, "min_scale must <= max_scale" 63 | assert 0 < min_scale and min_scale <= 1, "min_scale must in (0, 1]" 64 | assert 0 < max_scale and max_scale <= 1, "max_scale must in (0, 1]" 65 | self.min_scale = min_scale 66 | self.max_scale = max_scale 67 | assert 0 < min_aspect_ratio and min_aspect_ratio <= 1, "min_ratio must in (0, 1]" 68 | assert 1 <= max_aspect_ratio , "max_ratio must >= 1" 69 | self.min_aspect_ratio = min_aspect_ratio 70 | self.max_aspect_ratio = max_aspect_ratio 71 | assert 0 <= min_overlap and min_overlap <= 1, "min_overlap must in [0,1]" 72 | self.min_overlap = min_overlap 73 | 74 | self.config = {'gt_constraint' : 'center'} 75 | 76 | def sample(self, label): 77 | """ 78 | generate random cropping boxes according to parameters 79 | if satifactory crops generated, apply to ground-truth as well 80 | 81 | Parameters: 82 | ---------- 83 | label : numpy.array (n x 5 matrix) 84 | ground-truths 85 | 86 | Returns: 87 | ---------- 88 | list of (crop_box, label) tuples, if failed, return empty list [] 89 | """ 90 | samples = [] 91 | count = 0 92 | for trial in range(self.max_trials): 93 | if count >= self.max_sample: 94 | return samples 95 | scale = np.random.uniform(self.min_scale, self.max_scale) 96 | min_ratio = max(self.min_aspect_ratio, scale * scale) 97 | max_ratio = min(self.max_aspect_ratio, 1. / scale / scale) 98 | ratio = math.sqrt(np.random.uniform(min_ratio, max_ratio)) 99 | width = scale * ratio 100 | height = scale / ratio 101 | left = np.random.uniform(0., 1 - width) 102 | top = np.random.uniform(0., 1 - height) 103 | rand_box = (left, top, left + width, top + height) 104 | valid_mask = np.where(label[:, 0] > -1)[0] 105 | gt = label[valid_mask, :] 106 | ious = self._check_satisfy(rand_box, gt) 107 | if ious is not None: 108 | # transform gt labels after crop, discard bad ones 109 | l, t, r, b = rand_box 110 | new_gt_boxes = [] 111 | new_width = r - l 112 | new_height = b - t 113 | for i in range(valid_mask.size): 114 | if ious[i] > 0: 115 | xmin = max(0., (gt[i, 1] - l) / new_width) 116 | ymin = max(0., (gt[i, 2] - t) / new_height) 117 | xmax = min(1., (gt[i, 3] - l) / new_width) 118 | ymax = min(1., (gt[i, 4] - t) / new_height) 119 | new_gt_boxes.append([gt[i, 0], xmin, ymin, xmax, ymax]) 120 | if not new_gt_boxes: 121 | continue 122 | new_gt_boxes = np.array(new_gt_boxes) 123 | label = np.lib.pad(new_gt_boxes, 124 | ((0, label.shape[0]-new_gt_boxes.shape[0]), (0,0)), \ 125 | 'constant', constant_values=(-1, -1)) 126 | samples.append((rand_box, label)) 127 | count += 1 128 | return samples 129 | 130 | def _check_satisfy(self, rand_box, gt_boxes): 131 | """ 132 | check if overlap with any gt box is larger than threshold 133 | """ 134 | l, t, r, b = rand_box 135 | num_gt = gt_boxes.shape[0] 136 | ls = np.ones(num_gt) * l 137 | ts = np.ones(num_gt) * t 138 | rs = np.ones(num_gt) * r 139 | bs = np.ones(num_gt) * b 140 | mask = np.where(ls < gt_boxes[:, 1])[0] 141 | ls[mask] = gt_boxes[mask, 1] 142 | mask = np.where(ts < gt_boxes[:, 2])[0] 143 | ts[mask] = gt_boxes[mask, 2] 144 | mask = np.where(rs > gt_boxes[:, 3])[0] 145 | rs[mask] = gt_boxes[mask, 3] 146 | mask = np.where(bs > gt_boxes[:, 4])[0] 147 | bs[mask] = gt_boxes[mask, 4] 148 | w = rs - ls 149 | w[w < 0] = 0 150 | h = bs - ts 151 | h[h < 0] = 0 152 | inter_area = h * w 153 | union_area = np.ones(num_gt) * max(0, r - l) * max(0, b - t) 154 | union_area += (gt_boxes[:, 3] - gt_boxes[:, 1]) * (gt_boxes[:, 4] - gt_boxes[:, 2]) 155 | union_area -= inter_area 156 | ious = inter_area / union_area 157 | ious[union_area <= 0] = 0 158 | max_iou = np.amax(ious) 159 | if max_iou < self.min_overlap: 160 | return None 161 | # check ground-truth constraint 162 | if self.config['gt_constraint'] == 'center': 163 | for i in range(ious.shape[0]): 164 | if ious[i] > 0: 165 | gt_x = (gt_boxes[i, 1] + gt_boxes[i, 3]) / 2.0 166 | gt_y = (gt_boxes[i, 2] + gt_boxes[i, 4]) / 2.0 167 | if gt_x < l or gt_x > r or gt_y < t or gt_y > b: 168 | return None 169 | elif self.config['gt_constraint'] == 'corner': 170 | for i in range(ious.shape[0]): 171 | if ious[i] > 0: 172 | if gt_boxes[i, 1] < l or gt_boxes[i, 3] > r \ 173 | or gt_boxes[i, 2] < t or gt_boxes[i, 4] > b: 174 | return None 175 | return ious 176 | 177 | 178 | class RandPadder(RandSampler): 179 | """ 180 | Random cropping original images with various settings 181 | 182 | Parameters: 183 | ---------- 184 | min_scale : float 185 | minimum crop scale, [1, inf) 186 | max_scale : float 187 | maximum crop scale, [1, inf), must larger than min_scale 188 | min_aspect_ratio : float 189 | minimum crop aspect ratio, (0, 1] 190 | max_aspect_ratio : float 191 | maximum crop aspect ratio, [1, inf) 192 | min_gt_scale : float 193 | minimum ground-truth scale to be satisfied after padding, 194 | either width or height, [0, 1] 195 | max_trials : int 196 | maximum trials, if exceed this number, give up anyway 197 | max_sample : int 198 | maximum random crop samples to be generated 199 | """ 200 | def __init__(self, min_scale=1., max_scale=1., min_aspect_ratio=1., \ 201 | max_aspect_ratio=1., min_gt_scale=.01, max_trials=50, 202 | max_sample=1): 203 | super(RandPadder, self).__init__(max_trials, max_sample) 204 | assert min_scale <= max_scale, "min_scale must <= max_scale" 205 | assert min_scale >= 1, "min_scale must in (0, 1]" 206 | self.min_scale = min_scale 207 | self.max_scale = max_scale 208 | assert 0 < min_aspect_ratio and min_aspect_ratio <= 1, "min_ratio must in (0, 1]" 209 | assert 1 <= max_aspect_ratio , "max_ratio must >= 1" 210 | self.min_aspect_ratio = min_aspect_ratio 211 | self.max_aspect_ratio = max_aspect_ratio 212 | assert 0 <= min_gt_scale and min_gt_scale <= 1, "min_gt_scale must in [0, 1]" 213 | self.min_gt_scale = min_gt_scale 214 | 215 | def sample(self, label): 216 | """ 217 | generate random padding boxes according to parameters 218 | if satifactory padding generated, apply to ground-truth as well 219 | 220 | Parameters: 221 | ---------- 222 | label : numpy.array (n x 5 matrix) 223 | ground-truths 224 | 225 | Returns: 226 | ---------- 227 | list of (crop_box, label) tuples, if failed, return empty list [] 228 | """ 229 | samples = [] 230 | count = 0 231 | for trial in range(self.max_trials): 232 | if count >= self.max_sample: 233 | return samples 234 | scale = np.random.uniform(self.min_scale, self.max_scale) 235 | min_ratio = max(self.min_aspect_ratio, scale * scale) 236 | max_ratio = min(self.max_aspect_ratio, 1. / scale / scale) 237 | ratio = math.sqrt(np.random.uniform(min_ratio, max_ratio)) 238 | width = scale * ratio 239 | if width < 1: 240 | continue 241 | height = scale / ratio 242 | if height < 1: 243 | continue 244 | left = np.random.uniform(0., 1 - width) 245 | top = np.random.uniform(0., 1 - height) 246 | right = left + width 247 | bot = top + height 248 | rand_box = (left, top, right, bot) 249 | valid_mask = np.where(label[:, 0] > -1)[0] 250 | gt = label[valid_mask, :] 251 | new_gt_boxes = [] 252 | for i in range(gt.shape[0]): 253 | xmin = (gt[i, 1] - left) / width 254 | ymin = (gt[i, 2] - top) / height 255 | xmax = (gt[i, 3] - left) / width 256 | ymax = (gt[i, 4] - top) / height 257 | new_size = min(xmax - xmin, ymax - ymin) 258 | if new_size < self.min_gt_scale: 259 | new_gt_boxes = [] 260 | break 261 | new_gt_boxes.append([gt[i, 0], xmin, ymin, xmax, ymax]) 262 | if not new_gt_boxes: 263 | continue 264 | new_gt_boxes = np.array(new_gt_boxes) 265 | label = np.lib.pad(new_gt_boxes, 266 | ((0, label.shape[0]-new_gt_boxes.shape[0]), (0,0)), \ 267 | 'constant', constant_values=(-1, -1)) 268 | samples.append((rand_box, label)) 269 | count += 1 270 | return samples 271 | -------------------------------------------------------------------------------- /evaluate/custom_callbacks.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import scipy.misc 4 | import numpy as np 5 | import random 6 | import matplotlib.pyplot as plt 7 | 8 | class ParseLogCallback(object): 9 | """ 10 | 1. log distribution's std to tensorboard (as distribution) 11 | This function make use of mxnet's "monitor" module, and it's output to a log file. 12 | while training, it is possible to specify layers to be monitored. 13 | these layers will be printed to a given log file, 14 | their values are computed **asynchronously**. 15 | 16 | 2. log training loss to tensorboard (as scalar) 17 | 18 | Currently - does not support resume training.. 19 | """ 20 | def __init__(self, dist_logging_dir=None, scalar_logging_dir=None, 21 | logfile_path=None, batch_size=None, iter_monitor=0, 22 | frequent=None, prefix='ssd'): 23 | self.scalar_logging_dir = scalar_logging_dir 24 | self.dist_logging_dir = dist_logging_dir 25 | self.logfile_path = logfile_path 26 | self.batch_size = batch_size 27 | self.iter_monitor = iter_monitor 28 | self.frequent = frequent 29 | self.prefix = prefix 30 | self.batch = 0 31 | self.line_idx = 0 32 | try: 33 | from tensorboard import SummaryWriter 34 | self.dist_summary_writer = SummaryWriter(dist_logging_dir) 35 | self.scalar_summary_writer = SummaryWriter(scalar_logging_dir) 36 | except ImportError: 37 | logging.error('You can install tensorboard via `pip install tensorboard`.') 38 | 39 | def __call__(self, param): 40 | """Callback to parse a log file and and add params to TensorBoard.""" 41 | 42 | # save distributions from the monitor output log 43 | if not self.iter_monitor == 0 and self.batch % self.iter_monitor == 0: 44 | with open(self.logfile_path) as fp: 45 | for i in range(self.line_idx): 46 | fp.next() 47 | for line in fp: 48 | if line.startswith('Batch'): 49 | line = line.split(' ') 50 | line = [x for x in line if x] 51 | layer_name = line[2] 52 | layer_value = np.array(float(line[3].split('\t')[0])).flatten() 53 | if np.isfinite(layer_value): 54 | self.dist_summary_writer.add_histogram(layer_name, layer_value) 55 | self.line_idx += 1 56 | 57 | # save training loss 58 | if self.batch % self.frequent == 0: 59 | if param.eval_metric is None: 60 | return 61 | name_value = param.eval_metric.get_name_value() 62 | for name, value in name_value: 63 | if self.prefix is not None: 64 | name = '%s-%s' % (self.prefix, name) 65 | self.scalar_summary_writer.add_scalar(name, value, global_step=self.batch) 66 | self.batch += 1 67 | 68 | class LogROCCallback(object): 69 | """save roc graphs periodically in TensorBoard. 70 | write TensorBoard event file, holding the roc graph for every epoch 71 | logging_dir : str 72 | this function can only be executed after 'eval_metric.py', since that function is responsible for the graph creation 73 | where the tensorboard file will be created 74 | roc_path : list[str] 75 | list of paths to future roc's 76 | class_names : list[str] 77 | list of class names. 78 | """ 79 | def __init__(self, logging_dir=None, prefix='val', roc_path=None, class_names=None): 80 | self.prefix = prefix 81 | self.roc_path = roc_path 82 | self.class_names = class_names 83 | try: 84 | from tensorboard import SummaryWriter 85 | self.summary_writer = SummaryWriter(logging_dir) 86 | except ImportError: 87 | logging.error('You can install tensorboard via `pip install tensorboard`.') 88 | 89 | def __call__(self, param): 90 | """Callback to log ROC graph as an image in TensorBoard.""" 91 | for class_name in self.class_names: 92 | roc = os.path.join(self.roc_path, 'roc_'+class_name+'.png') 93 | if not os.path.exists(roc): 94 | continue 95 | im = scipy.misc.imread(roc) 96 | self.summary_writer.add_image(self.prefix+'_'+class_name, im) 97 | 98 | class LogDetectionsCallback(object): 99 | """ TODO complete 100 | """ 101 | def __init__(self, logging_dir=None, prefix='val', images_path=None, 102 | class_names=None, batch_size=None, mean_pixels=None, det_thresh=0.5): 103 | 104 | self.logging_dir = logging_dir 105 | self.prefix = prefix 106 | if not os.path.exists(images_path): 107 | os.mkdir(images_path) 108 | self.images_path = images_path 109 | self.class_names = class_names 110 | self.batch_size = batch_size 111 | self.mean_pixels = mean_pixels 112 | self.det_thresh = det_thresh 113 | try: 114 | from tensorboard import SummaryWriter 115 | self.summary_writer = SummaryWriter(logging_dir) 116 | except ImportError: 117 | logging.error('You can install tensorboard via `pip install tensorboard`.') 118 | 119 | def __call__(self, param): 120 | """Callback to log detections and gt-boxes as an image in TensorBoard.""" 121 | if param.locals is None: 122 | return 123 | 124 | result = [] 125 | pad = param.locals['eval_batch'].pad 126 | images = param.locals['eval_batch'].data[0][0:self.batch_size-pad].asnumpy() 127 | labels = param.locals['eval_batch'].label[0][0:self.batch_size - pad].asnumpy() 128 | outputs = [out[0:out.shape[0] - pad] for out in param.locals['self'].get_outputs()] 129 | # 'det' variable can be in different positions depending with train/test symbols 130 | if len(outputs) > 1: 131 | det_idx = [idx for idx,f in enumerate(param.locals['self'].output_names) if f.startswith('det')][0] 132 | detections = outputs[det_idx].asnumpy() 133 | else: 134 | detections = outputs[0].asnumpy() 135 | for i in range(detections.shape[0]): 136 | det = detections[i, :, :] 137 | det = det[np.where(det[:, 0] >= 0)[0]] 138 | label = labels[i,:,:] 139 | label = label[np.where(label[:, 0] >= 0)[0]] 140 | img = images[i,:,:,:] + np.reshape(self.mean_pixels, (3,1,1)) 141 | img = img.astype(np.uint8) 142 | img = img.transpose([1,2,0]) 143 | img[:, :, (0, 1, 2)] = img[:, :, (2, 1, 0)] 144 | self._visualize_detection_and_labels(img, det, label=label, 145 | classes=self.class_names, thresh=self.det_thresh, 146 | plt_path=os.path.join(self.images_path, 'image'+str(i)+'.png')) 147 | # save to tensorboard 148 | img_det_graph = scipy.misc.imread(os.path.join(self.images_path, 'image'+str(i)+'.png')) 149 | self.summary_writer.add_image('image'+str(i)+'.png', img_det_graph) 150 | return result 151 | 152 | def _visualize_detection_and_labels(self, img, dets, label, classes=[], thresh=None, plt_path=None): 153 | """ 154 | visualize detections in one image 155 | 156 | Parameters: 157 | ---------- 158 | img : numpy.array 159 | image, in bgr format 160 | dets : numpy.array 161 | ssd detections, numpy.array([[id, score, x1, y1, x2, y2]...]) 162 | each row is one object 163 | classes : tuple or list of str 164 | class names 165 | thresh : float 166 | score threshold 167 | """ 168 | fig = plt.figure() 169 | plt.imshow(img) 170 | height = img.shape[0] 171 | width = img.shape[1] 172 | colors = dict() 173 | # Visualize ground-truth boxes 174 | gt_color = (1.0, 0.0, 0.0) 175 | for i in range(label.shape[0]): 176 | cls_id = int(label[i, 0]) 177 | if cls_id >= 0: 178 | xmin = int(label[i, 1] * width) 179 | ymin = int(label[i, 2] * height) 180 | xmax = int(label[i, 3] * width) 181 | ymax = int(label[i, 4] * height) 182 | rect = plt.Rectangle((xmin, ymin), xmax - xmin, 183 | ymax - ymin, fill=False, 184 | edgecolor=gt_color, 185 | linewidth=2) 186 | plt.gca().add_patch(rect) 187 | class_name = str(cls_id) 188 | if classes and len(classes) > cls_id: 189 | class_name = classes[cls_id] 190 | plt.gca().text(xmin, ymin - 2, 191 | 'gt', 192 | bbox=dict(facecolor=gt_color, alpha=0.5), 193 | fontsize=8, color='white') 194 | # visualize predictions 195 | for i in range(dets.shape[0]): 196 | cls_id = int(dets[i, 0]) 197 | if cls_id >= 0: 198 | score = dets[i, 1] 199 | if score > thresh: 200 | if cls_id not in colors: 201 | colors[cls_id] = (random.random(), random.random(), random.random()) 202 | xmin = int(dets[i, 2] * width) 203 | ymin = int(dets[i, 3] * height) 204 | xmax = int(dets[i, 4] * width) 205 | ymax = int(dets[i, 5] * height) 206 | rect = plt.Rectangle((xmin, ymin), xmax - xmin, 207 | ymax - ymin, fill=False, 208 | edgecolor=colors[cls_id], 209 | linewidth=3.5) 210 | plt.gca().add_patch(rect) 211 | class_name = str(cls_id) 212 | if classes and len(classes) > cls_id: 213 | class_name = classes[cls_id] 214 | plt.gca().text(xmin, ymin - 2, 215 | '{:s} {:.3f}'.format(class_name, score), 216 | bbox=dict(facecolor=colors[cls_id], alpha=0.5), 217 | fontsize=8, color='white') 218 | plt.savefig(plt_path) 219 | plt.close(fig) 220 | 221 | 222 | 223 | class LogDistributionsCallback(object): 224 | """ 225 | This function has been deprecated because it consumes too much time. 226 | The faster way is to use "ParseLogCallback" with a 'iter_monitor' flag 227 | 228 | Log metrics periodically in TensorBoard. 229 | This callback works almost same as `callback.Speedometer`, but write TensorBoard event file 230 | for visualization. 231 | logging_dir : str 232 | where the tensorboard file will be created 233 | layers_list : list[str] 234 | list of layers to be tracked 235 | """ 236 | def __init__(self, logging_dir, prefix=None, layers_list=None): 237 | self.prefix = prefix 238 | self.layers_list = layers_list 239 | try: 240 | from tensorboard import SummaryWriter 241 | self.summary_writer = SummaryWriter(logging_dir) 242 | except ImportError: 243 | logging.error('You can install tensorboard via `pip install tensorboard`.') 244 | 245 | def __call__(self, param): 246 | """Callback to log layers' distributions in TensorBoard.""" 247 | if param.locals is None: 248 | return 249 | for name, value in param.locals['arg_params'].iteritems(): 250 | # TODO - implement layer to choose from.. 251 | if self.layers_list is None: 252 | continue 253 | if self.prefix is not None: 254 | name = '%s-%s' % (self.prefix, name) 255 | self.summary_writer.add_histogram(name, value.asnumpy().flatten()) -------------------------------------------------------------------------------- /dataset/iterator.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | import cv2 4 | from tools.rand_sampler import RandSampler 5 | 6 | class DetRecordIter(mx.io.DataIter): 7 | """ 8 | The new detection iterator wrapper for mx.io.ImageDetRecordIter which is 9 | written in C++, it takes record file as input and runs faster. 10 | Supports various augment operations for object detection. 11 | 12 | Parameters: 13 | ----------- 14 | path_imgrec : str 15 | path to the record file 16 | path_imglist : str 17 | path to the list file to replace the labels in record 18 | batch_size : int 19 | batch size 20 | data_shape : tuple 21 | (3, height, width) 22 | label_width : int 23 | specify the label width, use -1 for variable length 24 | label_pad_width : int 25 | labels must have same shape in batches, use -1 for automatic estimation 26 | in each record, otherwise force padding to width in case you want t 27 | rain/validation to match the same width 28 | label_pad_value : float 29 | label padding value 30 | resize_mode : str 31 | force - resize to data_shape regardless of aspect ratio 32 | fit - try fit to data_shape preserving aspect ratio 33 | shrink - shrink to data_shape only, preserving aspect ratio 34 | mean_pixels : list or tuple 35 | mean values for red/green/blue 36 | kwargs : dict 37 | see mx.io.ImageDetRecordIter 38 | 39 | Returns: 40 | ---------- 41 | 42 | """ 43 | def __init__(self, path_imgrec, batch_size, data_shape, path_imglist="", 44 | label_width=-1, label_pad_width=-1, label_pad_value=-1, 45 | resize_mode='force', mean_pixels=[123.68, 116.779, 103.939], 46 | **kwargs): 47 | super(DetRecordIter, self).__init__() 48 | self.rec = mx.io.ImageDetRecordIter( 49 | path_imgrec = path_imgrec, 50 | path_imglist = path_imglist, 51 | label_width = label_width, 52 | label_pad_width = label_pad_width, 53 | label_pad_value = label_pad_value, 54 | batch_size = batch_size, 55 | data_shape = data_shape, 56 | mean_r = mean_pixels[0], 57 | mean_g = mean_pixels[1], 58 | mean_b = mean_pixels[2], 59 | resize_mode = resize_mode, 60 | **kwargs) 61 | 62 | self.provide_label = None 63 | self._get_batch() 64 | if not self.provide_label: 65 | raise RuntimeError("Invalid ImageDetRecordIter: " + path_imgrec) 66 | self.reset() 67 | 68 | @property 69 | def provide_data(self): 70 | return self.rec.provide_data 71 | 72 | def reset(self): 73 | self.rec.reset() 74 | 75 | def iter_next(self): 76 | return self._get_batch() 77 | 78 | def next(self): 79 | if self.iter_next(): 80 | return self._batch 81 | else: 82 | raise StopIteration 83 | 84 | def _get_batch(self): 85 | self._batch = self.rec.next() 86 | if not self._batch: 87 | return False 88 | 89 | if self.provide_label is None: 90 | # estimate the label shape for the first batch, always reshape to n*5 91 | first_label = self._batch.label[0][0].asnumpy() 92 | self.batch_size = self._batch.label[0].shape[0] 93 | self.label_header_width = int(first_label[4]) 94 | self.label_object_width = int(first_label[5]) 95 | assert self.label_object_width >= 5, "object width must >=5" 96 | self.label_start = 4 + self.label_header_width 97 | self.max_objects = (first_label.size - self.label_start) // self.label_object_width 98 | self.label_shape = (self.batch_size, self.max_objects, self.label_object_width) 99 | self.label_end = self.label_start + self.max_objects * self.label_object_width 100 | self.provide_label = [('label', self.label_shape)] 101 | 102 | # modify label 103 | label = self._batch.label[0].asnumpy() 104 | label = label[:, self.label_start:self.label_end].reshape( 105 | (self.batch_size, self.max_objects, self.label_object_width)) 106 | self._batch.label = [mx.nd.array(label)] 107 | return True 108 | 109 | class DetIter(mx.io.DataIter): 110 | """ 111 | Detection Iterator, which will feed data and label to network 112 | Optional data augmentation is performed when providing batch 113 | 114 | Parameters: 115 | ---------- 116 | imdb : Imdb 117 | image database 118 | batch_size : int 119 | batch size 120 | data_shape : int or (int, int) 121 | image shape to be resized 122 | mean_pixels : float or float list 123 | [R, G, B], mean pixel values 124 | rand_samplers : list 125 | random cropping sampler list, if not specified, will 126 | use original image only 127 | rand_mirror : bool 128 | whether to randomly mirror input images, default False 129 | shuffle : bool 130 | whether to shuffle initial image list, default False 131 | rand_seed : int or None 132 | whether to use fixed random seed, default None 133 | max_crop_trial : bool 134 | if random crop is enabled, defines the maximum trial time 135 | if trial exceed this number, will give up cropping 136 | is_train : bool 137 | whether in training phase, default True, if False, labels might 138 | be ignored 139 | """ 140 | def __init__(self, imdb, batch_size, data_shape, \ 141 | mean_pixels=[128, 128, 128], rand_samplers=[], \ 142 | rand_mirror=False, shuffle=False, rand_seed=None, \ 143 | is_train=True, max_crop_trial=50): 144 | super(DetIter, self).__init__() 145 | 146 | self._imdb = imdb 147 | self.batch_size = batch_size 148 | if isinstance(data_shape, int): 149 | data_shape = (data_shape, data_shape) 150 | self._data_shape = data_shape 151 | self._mean_pixels = mx.nd.array(mean_pixels).reshape((3,1,1)) 152 | if not rand_samplers: 153 | self._rand_samplers = [] 154 | else: 155 | if not isinstance(rand_samplers, list): 156 | rand_samplers = [rand_samplers] 157 | assert isinstance(rand_samplers[0], RandSampler), "Invalid rand sampler" 158 | self._rand_samplers = rand_samplers 159 | self.is_train = is_train 160 | self._rand_mirror = rand_mirror 161 | self._shuffle = shuffle 162 | if rand_seed: 163 | np.random.seed(rand_seed) # fix random seed 164 | self._max_crop_trial = max_crop_trial 165 | 166 | self._current = 0 167 | self._size = imdb.num_images 168 | self._index = np.arange(self._size) 169 | 170 | self._data = None 171 | self._label = None 172 | self._get_batch() 173 | 174 | @property 175 | def provide_data(self): 176 | return [(k, v.shape) for k, v in self._data.items()] 177 | 178 | @property 179 | def provide_label(self): 180 | if self.is_train: 181 | return [(k, v.shape) for k, v in self._label.items()] 182 | else: 183 | return [] 184 | 185 | def reset(self): 186 | self._current = 0 187 | if self._shuffle: 188 | np.random.shuffle(self._index) 189 | 190 | def iter_next(self): 191 | return self._current < self._size 192 | 193 | def next(self): 194 | if self.iter_next(): 195 | self._get_batch() 196 | data_batch = mx.io.DataBatch(data=list(self._data.values()), 197 | label=list(self._label.values()), 198 | pad=self.getpad(), index=self.getindex()) 199 | self._current += self.batch_size 200 | return data_batch 201 | else: 202 | raise StopIteration 203 | 204 | def getindex(self): 205 | return self._current // self.batch_size 206 | 207 | def getpad(self): 208 | pad = self._current + self.batch_size - self._size 209 | return 0 if pad < 0 else pad 210 | 211 | def _get_batch(self): 212 | """ 213 | Load data/label from dataset 214 | """ 215 | batch_data = mx.nd.zeros((self.batch_size, 3, self._data_shape[0], self._data_shape[1])) 216 | batch_label = [] 217 | for i in range(self.batch_size): 218 | if (self._current + i) >= self._size: 219 | if not self.is_train: 220 | continue 221 | # use padding from middle in each epoch 222 | idx = (self._current + i + self._size // 2) % self._size 223 | index = self._index[idx] 224 | else: 225 | index = self._index[self._current + i] 226 | # index = self.debug_index 227 | im_path = self._imdb.image_path_from_index(index) 228 | with open(im_path, 'rb') as fp: 229 | img_content = fp.read() 230 | img = mx.img.imdecode(img_content) 231 | gt = self._imdb.label_from_index(index).copy() if self.is_train else None 232 | data, label = self._data_augmentation(img, gt) 233 | batch_data[i] = data 234 | if self.is_train: 235 | batch_label.append(label) 236 | self._data = {'data': batch_data} 237 | if self.is_train: 238 | self._label = {'label': mx.nd.array(np.array(batch_label))} 239 | else: 240 | self._label = {'label': None} 241 | 242 | def _data_augmentation(self, data, label): 243 | """ 244 | perform data augmentations: crop, mirror, resize, sub mean, swap channels... 245 | """ 246 | if self.is_train and self._rand_samplers: 247 | rand_crops = [] 248 | for rs in self._rand_samplers: 249 | rand_crops += rs.sample(label) 250 | num_rand_crops = len(rand_crops) 251 | # randomly pick up one as input data 252 | if num_rand_crops > 0: 253 | index = int(np.random.uniform(0, 1) * num_rand_crops) 254 | width = data.shape[1] 255 | height = data.shape[0] 256 | crop = rand_crops[index][0] 257 | xmin = int(crop[0] * width) 258 | ymin = int(crop[1] * height) 259 | xmax = int(crop[2] * width) 260 | ymax = int(crop[3] * height) 261 | if xmin >= 0 and ymin >= 0 and xmax <= width and ymax <= height: 262 | data = mx.img.fixed_crop(data, xmin, ymin, xmax-xmin, ymax-ymin) 263 | else: 264 | # padding mode 265 | new_width = xmax - xmin 266 | new_height = ymax - ymin 267 | offset_x = 0 - xmin 268 | offset_y = 0 - ymin 269 | data_bak = data 270 | data = mx.nd.full((new_height, new_width, 3), 128, dtype='uint8') 271 | data[offset_y:offset_y+height, offset_x:offset_x + width, :] = data_bak 272 | label = rand_crops[index][1] 273 | if self.is_train: 274 | interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, \ 275 | cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] 276 | else: 277 | interp_methods = [cv2.INTER_LINEAR] 278 | interp_method = interp_methods[int(np.random.uniform(0, 1) * len(interp_methods))] 279 | data = mx.img.imresize(data, self._data_shape[1], self._data_shape[0], interp_method) 280 | if self.is_train and self._rand_mirror: 281 | if np.random.uniform(0, 1) > 0.5: 282 | data = mx.nd.flip(data, axis=1) 283 | valid_mask = np.where(label[:, 0] > -1)[0] 284 | tmp = 1.0 - label[valid_mask, 1] 285 | label[valid_mask, 1] = 1.0 - label[valid_mask, 3] 286 | label[valid_mask, 3] = tmp 287 | data = mx.nd.transpose(data, (2,0,1)) 288 | data = data.astype('float32') 289 | data = data - self._mean_pixels 290 | return data, label 291 | --------------------------------------------------------------------------------