├── utils ├── __init__.py ├── core.py └── cmc.py ├── .gitmodules ├── models ├── fc_only │ ├── 3dpes_solver.prototxt │ ├── ilids_solver.prototxt │ ├── cuhk01_solver.prototxt │ ├── prid_solver.prototxt │ ├── viper_solver.prototxt │ ├── cuhk03_solver.prototxt │ ├── shinpuhkan_solver.prototxt │ ├── 3dpes_trainval.prototxt │ ├── ilids_trainval.prototxt │ └── prid_trainval.prototxt ├── jstl │ ├── 3dpes_solver.prototxt │ ├── ilids_solver.prototxt │ ├── prid_solver.prototxt │ ├── viper_solver.prototxt │ ├── cuhk01_solver.prototxt │ ├── cuhk03_solver.prototxt │ ├── jstl_solver.prototxt │ └── jstl_deploy_inference.prototxt ├── dgd │ ├── 3dpes_solver.prototxt │ ├── cuhk01_solver.prototxt │ ├── cuhk03_solver.prototxt │ ├── ilids_solver.prototxt │ ├── jstl_solver.prototxt │ ├── prid_solver.prototxt │ └── viper_solver.prototxt └── individually │ ├── 3dpes_solver.prototxt │ ├── ilids_solver.prototxt │ ├── prid_solver.prototxt │ ├── viper_solver.prototxt │ ├── cuhk01_solver.prototxt │ └── cuhk03_solver.prototxt ├── scripts ├── exp_individually.sh ├── extract_features.sh ├── exp_jstl.sh ├── merge_dbs.sh ├── make_dbs.sh ├── format_rawdata.sh ├── exp_ft_dgd.sh ├── exp_dgd.sh └── routines.sh ├── .gitignore ├── tools ├── convert_lmdb_to_numpy.py ├── save_individual_impact_score.py ├── merge_lists_single_task.py ├── save_joint_impact_score.py ├── compute_impact_score.py └── make_lists_id_training.py ├── data ├── format_cuhk01.py ├── format_shinpuhkan.py ├── format_viper.py ├── format_ilids.py ├── format_3dpes.py ├── format_cuhk03.py └── format_prid.py ├── eval └── metric_learning.py └── README.md /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import * 2 | from .cmc import * 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/caffe"] 2 | path = external/caffe 3 | url = https://github.com/Cysu/caffe 4 | branch = domain-guided-dropout 5 | -------------------------------------------------------------------------------- /models/fc_only/3dpes_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/fc_only/3dpes_trainval.prototxt" 2 | test_iter: 6 3 | test_interval: 50 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | 9 | lr_policy: "poly" 10 | base_lr: 0.01 11 | power: 0.5 12 | max_iter: 200 13 | 14 | momentum: 0.9 15 | weight_decay: 0.0005 16 | 17 | snapshot_prefix: "external/exp/snapshots/fc_only/3dpes" 18 | solver_mode: GPU -------------------------------------------------------------------------------- /models/fc_only/ilids_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/fc_only/ilids_trainval.prototxt" 2 | test_iter: 3 3 | test_interval: 20 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | 9 | lr_policy: "poly" 10 | base_lr: 0.01 11 | power: 0.5 12 | max_iter: 200 13 | 14 | momentum: 0.9 15 | weight_decay: 0.0005 16 | 17 | snapshot_prefix: "external/exp/snapshots/fc_only/ilids" 18 | solver_mode: GPU -------------------------------------------------------------------------------- /models/fc_only/cuhk01_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/fc_only/cuhk01_trainval.prototxt" 2 | test_iter: 20 3 | test_interval: 100 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | 9 | lr_policy: "poly" 10 | base_lr: 0.01 11 | power: 0.5 12 | max_iter: 500 13 | 14 | momentum: 0.9 15 | weight_decay: 0.0005 16 | 17 | snapshot_prefix: "external/exp/snapshots/fc_only/cuhk01" 18 | solver_mode: GPU -------------------------------------------------------------------------------- /models/fc_only/prid_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/fc_only/prid_trainval.prototxt" 2 | test_iter: 38 3 | test_interval: 200 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | 9 | lr_policy: "poly" 10 | base_lr: 0.01 11 | power: 0.5 12 | max_iter: 300 13 | 14 | momentum: 0.9 15 | weight_decay: 0.0005 16 | 17 | snapshot_prefix: "external/exp/snapshots/fc_only/prid" 18 | solver_mode: GPU 19 | -------------------------------------------------------------------------------- /models/fc_only/viper_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/fc_only/viper_trainval.prototxt" 2 | test_iter: 7 3 | test_interval: 50 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | 9 | lr_policy: "poly" 10 | base_lr: 0.01 11 | power: 0.5 12 | max_iter: 200 13 | 14 | momentum: 0.9 15 | weight_decay: 0.0005 16 | 17 | snapshot_prefix: "external/exp/snapshots/fc_only/viper" 18 | solver_mode: GPU 19 | -------------------------------------------------------------------------------- /models/fc_only/cuhk03_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/fc_only/cuhk03_trainval.prototxt" 2 | test_iter: 263 3 | test_interval: 200 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | 9 | lr_policy: "poly" 10 | base_lr: 0.01 11 | power: 0.5 12 | max_iter: 2000 13 | 14 | momentum: 0.9 15 | weight_decay: 0.0005 16 | 17 | snapshot_prefix: "external/exp/snapshots/fc_only/cuhk03" 18 | solver_mode: GPU -------------------------------------------------------------------------------- /models/jstl/3dpes_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/jstl/3dpes_trainval.prototxt" 2 | test_iter: 6 3 | test_interval: 50 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 500 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/jstl/3dpes" 19 | solver_mode: GPU -------------------------------------------------------------------------------- /models/jstl/ilids_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/jstl/ilids_trainval.prototxt" 2 | test_iter: 3 3 | test_interval: 20 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 200 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/jstl/ilids" 19 | solver_mode: GPU -------------------------------------------------------------------------------- /models/jstl/prid_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/jstl/prid_trainval.prototxt" 2 | test_iter: 38 3 | test_interval: 200 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 3000 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/jstl/prid" 19 | solver_mode: GPU -------------------------------------------------------------------------------- /models/jstl/viper_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/jstl/viper_trainval.prototxt" 2 | test_iter: 7 3 | test_interval: 50 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 500 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/jstl/viper" 19 | solver_mode: GPU -------------------------------------------------------------------------------- /models/dgd/3dpes_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/dgd/3dpes_trainval.prototxt" 2 | test_iter: 6 3 | test_interval: 50 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 500 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/dgd/3dpes" 19 | solver_mode: GPU 20 | -------------------------------------------------------------------------------- /models/dgd/cuhk01_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/dgd/cuhk01_trainval.prototxt" 2 | test_iter: 20 3 | test_interval: 100 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 2000 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/dgd/cuhk01" 19 | solver_mode: GPU -------------------------------------------------------------------------------- /models/dgd/cuhk03_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/dgd/cuhk03_trainval.prototxt" 2 | test_iter: 263 3 | test_interval: 500 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 5000 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/dgd/cuhk03" 19 | solver_mode: GPU -------------------------------------------------------------------------------- /models/dgd/ilids_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/dgd/ilids_trainval.prototxt" 2 | test_iter: 3 3 | test_interval: 50 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 200 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/dgd/ilids" 19 | solver_mode: GPU 20 | -------------------------------------------------------------------------------- /models/dgd/jstl_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/dgd/jstl_trainval.prototxt" 2 | test_iter: 559 3 | test_interval: 500 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 20000 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/dgd/jstl" 19 | solver_mode: GPU 20 | -------------------------------------------------------------------------------- /models/dgd/prid_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/dgd/prid_trainval.prototxt" 2 | test_iter: 38 3 | test_interval: 200 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 1000 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/dgd/prid" 19 | solver_mode: GPU 20 | -------------------------------------------------------------------------------- /models/dgd/viper_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/dgd/viper_trainval.prototxt" 2 | test_iter: 7 3 | test_interval: 50 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 1000 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/dgd/viper" 19 | solver_mode: GPU 20 | -------------------------------------------------------------------------------- /models/jstl/cuhk01_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/jstl/cuhk01_trainval.prototxt" 2 | test_iter: 20 3 | test_interval: 100 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 2000 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/jstl/cuhk01" 19 | solver_mode: GPU -------------------------------------------------------------------------------- /models/jstl/cuhk03_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/jstl/cuhk03_trainval.prototxt" 2 | test_iter: 263 3 | test_interval: 200 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "poly" 11 | base_lr: 0.01 12 | power: 0.5 13 | max_iter: 10000 14 | 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | 18 | snapshot_prefix: "external/exp/snapshots/jstl/cuhk03" 19 | solver_mode: GPU -------------------------------------------------------------------------------- /models/fc_only/shinpuhkan_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/fc_only/shinpuhkan_trainval.prototxt" 2 | test_iter: 225 3 | test_interval: 500 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | 9 | lr_policy: "poly" 10 | base_lr: 0.01 11 | power: 0.5 12 | max_iter: 2000 13 | 14 | momentum: 0.9 15 | weight_decay: 0.0005 16 | 17 | snapshot_prefix: "external/exp/snapshots/fc_only/shinpuhkan" 18 | solver_mode: GPU 19 | -------------------------------------------------------------------------------- /models/jstl/jstl_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/jstl/jstl_trainval.prototxt" 2 | test_iter: 559 3 | test_interval: 500 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "stepdecr" 11 | base_lr: 0.1 12 | gamma: 0.04 13 | stepsize: 2000 14 | max_iter: 55000 15 | min_lr: 0.0005 16 | 17 | momentum: 0.9 18 | weight_decay: 0.0005 19 | 20 | snapshot: 10000 21 | snapshot_prefix: "external/exp/snapshots/jstl/jstl" 22 | solver_mode: GPU -------------------------------------------------------------------------------- /models/individually/3dpes_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/individually/3dpes_trainval.prototxt" 2 | test_iter: 6 3 | test_interval: 50 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "stepdecr" 11 | base_lr: 0.1 12 | gamma: 0.04 13 | stepsize: 100 14 | max_iter: 2700 15 | min_lr: 0.0005 16 | 17 | momentum: 0.9 18 | weight_decay: 0.0005 19 | 20 | snapshot_prefix: "external/exp/snapshots/individually/3dpes" 21 | solver_mode: GPU -------------------------------------------------------------------------------- /models/individually/ilids_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/individually/ilids_trainval.prototxt" 2 | test_iter: 3 3 | test_interval: 20 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "stepdecr" 11 | base_lr: 0.1 12 | gamma: 0.04 13 | stepsize: 100 14 | max_iter: 2700 15 | min_lr: 0.0005 16 | 17 | momentum: 0.9 18 | weight_decay: 0.0005 19 | 20 | snapshot_prefix: "external/exp/snapshots/individually/ilids" 21 | solver_mode: GPU -------------------------------------------------------------------------------- /models/individually/prid_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/individually/prid_trainval.prototxt" 2 | test_iter: 38 3 | test_interval: 200 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "stepdecr" 11 | base_lr: 0.1 12 | gamma: 0.04 13 | stepsize: 400 14 | max_iter: 11000 15 | min_lr: 0.0005 16 | 17 | momentum: 0.9 18 | weight_decay: 0.0005 19 | 20 | snapshot_prefix: "external/exp/snapshots/individually/prid" 21 | solver_mode: GPU -------------------------------------------------------------------------------- /models/individually/viper_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/individually/viper_trainval.prototxt" 2 | test_iter: 7 3 | test_interval: 50 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "stepdecr" 11 | base_lr: 0.1 12 | gamma: 0.04 13 | stepsize: 100 14 | max_iter: 2700 15 | min_lr: 0.0005 16 | 17 | momentum: 0.9 18 | weight_decay: 0.0005 19 | 20 | snapshot_prefix: "external/exp/snapshots/individually/viper" 21 | solver_mode: GPU -------------------------------------------------------------------------------- /models/individually/cuhk01_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/individually/cuhk01_trainval.prototxt" 2 | test_iter: 20 3 | test_interval: 100 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "stepdecr" 11 | base_lr: 0.1 12 | gamma: 0.04 13 | stepsize: 200 14 | max_iter: 5500 15 | min_lr: 0.0005 16 | 17 | momentum: 0.9 18 | weight_decay: 0.0005 19 | 20 | snapshot_prefix: "external/exp/snapshots/individually/cuhk01" 21 | solver_mode: GPU -------------------------------------------------------------------------------- /models/individually/cuhk03_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/individually/cuhk03_trainval.prototxt" 2 | test_iter: 263 3 | test_interval: 200 4 | test_initialization: true 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "step" 11 | base_lr: 0.1 12 | gamma: 0.1 13 | stepsize: 20000 14 | max_iter: 50000 15 | 16 | momentum: 0.9 17 | weight_decay: 0.0005 18 | 19 | snapshot: 10000 20 | snapshot_prefix: "external/exp/snapshots/individually/cuhk03" 21 | solver_mode: GPU -------------------------------------------------------------------------------- /scripts/exp_individually.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Experiments of training and testing a model individually on each dataset 3 | 4 | # Change to the project root directory. Assume this file is at scripts/. 5 | cd $(dirname ${BASH_SOURCE[0]})/../ 6 | 7 | source scripts/routines.sh 8 | 9 | # Parse arguments. 10 | if [[ $# -ne 1 ]]; then 11 | echo "Usage: $(basename $0) dataset" 12 | echo " dataset Dataset name (3dpes, cuhk01, cuhk03, ilids, prid, viper)" 13 | exit 14 | fi 15 | 16 | exp='individually' 17 | dataset=$1 18 | 19 | train_model ${exp} ${dataset} 20 | test_model ${exp} ${dataset} $(get_trained_model ${exp} ${dataset}) -------------------------------------------------------------------------------- /scripts/extract_features.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Extract features. The batch size is assumed to be 100. 3 | # Use GPU 0 by default. 4 | 5 | # Change to the project root directory. Assume this file is at scripts/. 6 | cd $(dirname ${BASH_SOURCE[0]})/../ 7 | 8 | source scripts/routines.sh 9 | 10 | if [[ $# -ne 3 ]] && [[ $# -ne 4 ]]; then 11 | echo "Usage: $(basename $0) exp dataset weights [blob=fc7_bn]" 12 | echo " exp Subfolder name to store the extracted features" 13 | echo " dataset Dataset name (3dpes, cuhk01, cuhk03, ilids, prid, viper)" 14 | echo " weights Trained caffemodel" 15 | echo " blob Features blob name (fc7_bn by default)" 16 | exit 17 | fi 18 | 19 | extract_features "$@" 20 | -------------------------------------------------------------------------------- /scripts/exp_jstl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Experiments of joint single task learning (JSTL) 3 | 4 | # Change to the project root directory. Assume this file is at scripts/. 5 | cd $(dirname ${BASH_SOURCE[0]})/../ 6 | 7 | source scripts/routines.sh 8 | 9 | exp='jstl' 10 | 11 | # Train JSTL model 12 | train_model ${exp} jstl 13 | trained_model=$(get_trained_model ${exp} jstl) 14 | 15 | # Extract features on all datasets 16 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids; do 17 | extract_features ${exp} ${dataset} ${trained_model} 18 | done 19 | 20 | # Evaluate performance 21 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids; do 22 | result_dir=$(get_result_dir ${exp} ${dataset} ${trained_model}) 23 | echo ${dataset} 24 | python2 eval/metric_learning.py ${result_dir} 25 | echo 26 | done 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## General 2 | 3 | # Compiled Object files 4 | *.slo 5 | *.lo 6 | *.o 7 | *.cuo 8 | 9 | # Compiled Dynamic libraries 10 | *.so 11 | *.dylib 12 | 13 | # Compiled Static libraries 14 | *.lai 15 | *.la 16 | *.a 17 | 18 | # Compiled protocol buffers 19 | *.pb.h 20 | *.pb.cc 21 | *_pb2.py 22 | 23 | # Compiled python 24 | *.pyc 25 | 26 | # Compiled MATLAB 27 | *.mex* 28 | 29 | # IPython notebook checkpoints 30 | .ipynb_checkpoints 31 | 32 | # CMake generated files 33 | *.gen.cmake 34 | 35 | # Editor temporaries 36 | *.swp 37 | *~ 38 | 39 | # Sublime Text settings 40 | *.sublime-workspace 41 | *.sublime-project 42 | 43 | # Eclipse Project settings 44 | *.*project 45 | .settings 46 | 47 | # OSX dir files 48 | .DS_Store 49 | 50 | ## Project specific 51 | *.log 52 | *.png 53 | *.jpg 54 | *.mat 55 | *.tgz 56 | *.zip 57 | *.tar.gz 58 | *.caffemodel 59 | *.solverstate 60 | -------------------------------------------------------------------------------- /scripts/merge_dbs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | RAW=external/raw 4 | EXP=external/exp 5 | CAFFE=external/caffe 6 | DATASETS=$EXP/datasets 7 | DB=$EXP/db 8 | 9 | cd $(dirname ${BASH_SOURCE[0]})/../ 10 | 11 | python2 tools/merge_lists_single_task.py \ 12 | --dataset-dirs $DATASETS/3dpes $DATASETS/cuhk01 $DATASETS/cuhk03 \ 13 | $DATASETS/ilids $DATASETS/prid $DATASETS/viper \ 14 | $DATASETS/shinpuhkan \ 15 | --db-dirs $DB/3dpes $DB/cuhk01 $DB/cuhk03 \ 16 | $DB/ilids $DB/prid $DB/viper \ 17 | $DB/shinpuhkan \ 18 | -- $DB/jstl 19 | 20 | DB=$DB/jstl 21 | echo "Making training set" 22 | $CAFFE/build/tools/convert_imageset \ 23 | $(pwd)/ $DB/train.txt $DB/train_lmdb \ 24 | -resize_height 160 -resize_width 64 25 | 26 | echo "Making validation set" 27 | $CAFFE/build/tools/convert_imageset \ 28 | $(pwd)/ $DB/val.txt $DB/val_lmdb \ 29 | -resize_height 160 -resize_width 64 30 | 31 | echo "Computing images mean" 32 | $CAFFE/build/tools/compute_image_mean $DB/train_lmdb $DB/mean.binaryproto 33 | -------------------------------------------------------------------------------- /scripts/make_dbs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | RAW=external/raw 4 | EXP=external/exp 5 | CAFFE=external/caffe 6 | 7 | cd $(dirname ${BASH_SOURCE[0]})/../ 8 | 9 | make_db () { 10 | ROOT_DIR=$1 11 | DB_DIR=$2 12 | if [[ $# -eq 4 ]]; then 13 | RESIZE_HEIGHT=$3 14 | RESIZE_WIDTH=$4 15 | else 16 | RESIZE_HEIGHT=160 17 | RESIZE_WIDTH=64 18 | fi 19 | 20 | for subset in train val test_probe test_gallery; do 21 | if [[ ! -f ${DB_DIR}/${subset}.txt ]]; then 22 | continue 23 | fi 24 | echo "Making ${subset} set" 25 | $CAFFE/build/tools/convert_imageset \ 26 | ${ROOT_DIR}/ ${DB_DIR}/${subset}.txt ${DB_DIR}/${subset}_lmdb \ 27 | -resize_height ${RESIZE_HEIGHT} -resize_width ${RESIZE_WIDTH} 28 | done 29 | 30 | echo "Computing images mean" 31 | $CAFFE/build/tools/compute_image_mean \ 32 | ${DB_DIR}/train_lmdb ${DB_DIR}/mean.binaryproto 33 | } 34 | 35 | for d in cuhk03 cuhk01 prid viper 3dpes ilids shinpuhkan; do 36 | echo "Making $d" 37 | python2 tools/make_lists_id_training.py $EXP/datasets/$d $EXP/db/$d 38 | make_db $EXP/datasets/$d $EXP/db/$d 39 | done 40 | -------------------------------------------------------------------------------- /tools/convert_lmdb_to_numpy.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | from argparse import ArgumentParser 4 | 5 | import lmdb 6 | import numpy as np 7 | 8 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 9 | if osp.join(root, 'external/caffe/python') not in sys.path: 10 | sys.path.insert(0, osp.join(root, 'external/caffe/python')) 11 | from caffe.proto.caffe_pb2 import Datum 12 | 13 | 14 | def main(args): 15 | datum = Datum() 16 | data = [] 17 | env = lmdb.open(args.input_lmdb) 18 | with env.begin() as txn: 19 | cursor = txn.cursor() 20 | for i, (key, value) in enumerate(cursor): 21 | if i >= args.truncate: break 22 | datum.ParseFromString(value) 23 | data.append(datum.float_data) 24 | data = np.squeeze(np.asarray(data)) 25 | np.save(args.output_npy, data) 26 | 27 | 28 | if __name__ == '__main__': 29 | parser = ArgumentParser() 30 | parser.add_argument('input_lmdb') 31 | parser.add_argument('output_npy') 32 | parser.add_argument('--truncate', type=int, default=np.inf, 33 | help="Stop converting the items from the database after this. " 34 | "All the items will be converted if not specified.") 35 | args = parser.parse_args() 36 | main(args) -------------------------------------------------------------------------------- /tools/save_individual_impact_score.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | import shutil 4 | from argparse import ArgumentParser 5 | 6 | import lmdb 7 | import numpy as np 8 | 9 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 10 | if osp.join(root, 'external/caffe/python') not in sys.path: 11 | sys.path.insert(0, osp.join(root, 'external/caffe/python')) 12 | import caffe 13 | from caffe.proto.caffe_pb2 import Datum 14 | 15 | 16 | def main(args): 17 | impact = np.load(args.input_npy) 18 | assert impact.ndim == 1, "The impact score should be a vector." 19 | # Create a datum and copy the impact values along the channel 20 | datum = Datum() 21 | datum.channels = len(impact) 22 | datum.height = 1 23 | datum.width = 1 24 | del datum.float_data[:] 25 | datum.float_data.extend(list(impact)) 26 | # Put into lmdb 27 | if osp.isdir(args.output_lmdb): shutil.rmtree(args.output_lmdb) 28 | with lmdb.open(args.output_lmdb, map_size=1099511627776) as db: 29 | with db.begin(write=True) as txn: 30 | txn.put('impact', datum.SerializeToString()) 31 | 32 | 33 | if __name__ == '__main__': 34 | parser = ArgumentParser( 35 | description="Save neurons impact score for an individual domain") 36 | parser.add_argument('input_npy') 37 | parser.add_argument('output_lmdb') 38 | args = parser.parse_args() 39 | main(args) -------------------------------------------------------------------------------- /scripts/format_rawdata.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | RAW=external/raw 6 | EXP=external/exp 7 | 8 | echo "Formatting CUHK03 ..." 9 | unzip -q -d $RAW/ $RAW/cuhk03_release.zip 10 | # Save the matfile in the v7 format to fast computation 11 | cd $RAW/cuhk03_release 12 | matlab -nodisplay -nojvm -nosplash -r "load('cuhk-03.mat'); save('cuhk-03.mat', 'detected', 'labeled', 'testsets', '-v7'); exit;" 13 | cd - 14 | python2 data/format_cuhk03.py $RAW/cuhk03_release $EXP/datasets/cuhk03 15 | 16 | echo "Formatting CUHK01 ..." 17 | unzip -q -d $RAW/cuhk01/ $RAW/CUHK01.zip 18 | python2 data/format_cuhk01.py $RAW/cuhk01/campus $EXP/datasets/cuhk01 19 | 20 | echo "Formatting PRID ..." 21 | unzip -q -d $RAW/prid/ $RAW/prid_2011.zip 22 | python2 data/format_prid.py $RAW/prid $EXP/datasets/prid 23 | 24 | echo "Formatting VIPeR ..." 25 | unzip -q -d $RAW/ $RAW/VIPeR.v1.0.zip 26 | python2 data/format_viper.py $RAW/VIPeR $EXP/datasets/viper 27 | 28 | echo "Formatting 3DPeS ..." 29 | unzip -q -d $RAW/ $RAW/3DPeS_ReId_Snap.zip 30 | python2 data/format_3dpes.py $RAW/3DPeS $EXP/datasets/3dpes 31 | 32 | echo "Formatting i-LIDS ..." 33 | tar xf $RAW/i-LIDS.tar.gz -C $RAW/ 34 | python2 data/format_ilids.py $RAW/i-LIDS $EXP/datasets/ilids 35 | 36 | echo "Formatting Shinpuhkan ..." 37 | unzip -q -d $RAW/ $RAW/Shinpuhkan2014dataset.zip 38 | python2 data/format_shinpuhkan.py $RAW/Shinpuhkan2014dataset $EXP/datasets/shinpuhkan 39 | -------------------------------------------------------------------------------- /scripts/exp_ft_dgd.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Experiments of fine-tuning on each dataset from JSTL+DGD. 3 | 4 | # Change to the project root directory. Assume this file is at scripts/. 5 | cd $(dirname ${BASH_SOURCE[0]})/../ 6 | 7 | source scripts/routines.sh 8 | 9 | exp='dgd' 10 | 11 | # Make a model for inference (treat BN as fixed affine layer) 12 | # to fast the neuron impact scores computation 13 | pretrained_model=$(get_trained_model ${exp} jstl) 14 | python2 ${CAFFE_DIR}/python/gen_bn_inference.py \ 15 | models/jstl/jstl_deploy.prototxt ${pretrained_model} 16 | inference_model=$(get_trained_model_for_inference jstl jstl) 17 | 18 | # Compute neuron impact scores (NIS) for each dataset 19 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids; do 20 | compute_neuron_impact_scores ${dataset} ${inference_model} 21 | done 22 | 23 | # Fine-tune on each dataset 24 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids; do 25 | train_model ${exp} ${dataset} ${pretrained_model} 26 | done 27 | 28 | # Extract features on all datasets 29 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids; do 30 | trained_model=$(get_trained_model ${exp} ${dataset}) 31 | extract_features ${exp} ${dataset} ${trained_model} 32 | done 33 | 34 | # Evaluate performance 35 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids; do 36 | trained_model=$(get_trained_model ${exp} ${dataset}) 37 | result_dir=$(get_result_dir ${exp} ${dataset} ${trained_model}) 38 | echo ${dataset} 39 | python2 eval/metric_learning.py ${result_dir} 40 | echo 41 | done 42 | 43 | -------------------------------------------------------------------------------- /scripts/exp_dgd.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Experiments of joint single task learning (JSTL) with domain guided dropout (DGD) 3 | 4 | # Change to the project root directory. Assume this file is at scripts/. 5 | cd $(dirname ${BASH_SOURCE[0]})/../ 6 | 7 | source scripts/routines.sh 8 | 9 | exp='dgd' 10 | 11 | # Make a model for inference (treat BN as fixed affine layer) 12 | # to fast the neuron impact scores computation 13 | trained_model=$(get_trained_model jstl jstl) 14 | python2 ${CAFFE_DIR}/python/gen_bn_inference.py \ 15 | models/jstl/jstl_deploy.prototxt ${trained_model} 16 | inference_model=$(get_trained_model_for_inference jstl jstl) 17 | 18 | # Compute neuron impact scores (NIS) for each dataset 19 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids shinpuhkan; do 20 | compute_neuron_impact_scores ${dataset} ${inference_model} 21 | done 22 | 23 | # Combine the NIS together for JSTL data samples 24 | for subset in train val; do 25 | python2 tools/save_joint_impact_score.py \ 26 | ${NIS_DIR} ${DB_DIR}/jstl/${subset}.txt ${DB_DIR}/jstl/nis_${subset}_lmdb 27 | done 28 | 29 | # Resume the JSTL training with DGD 30 | train_model ${exp} jstl ${trained_model} 31 | trained_model=$(get_trained_model ${exp} jstl) 32 | 33 | # Extract features on all datasets 34 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids; do 35 | extract_features ${exp} ${dataset} ${trained_model} 36 | done 37 | 38 | # Evaluate performance 39 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids; do 40 | result_dir=$(get_result_dir ${exp} ${dataset} ${trained_model}) 41 | echo ${dataset} 42 | python2 eval/metric_learning.py ${result_dir} 43 | echo 44 | done 45 | -------------------------------------------------------------------------------- /utils/core.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import json 4 | import codecs 5 | import cPickle 6 | 7 | 8 | def mkdir_if_missing(d): 9 | if not osp.isdir(d): 10 | os.makedirs(d) 11 | 12 | 13 | def pickle(data, file_path): 14 | with open(file_path, 'wb') as f: 15 | cPickle.dump(data, f, cPickle.HIGHEST_PROTOCOL) 16 | 17 | 18 | def unpickle(file_path): 19 | with open(file_path, 'rb') as f: 20 | data = cPickle.load(f) 21 | return data 22 | 23 | 24 | def read_list(file_path, coding=None): 25 | if coding is None: 26 | with open(file_path, 'r') as f: 27 | arr = [line.strip() for line in f.readlines()] 28 | else: 29 | with codecs.open(file_path, 'r', coding) as f: 30 | arr = [line.strip() for line in f.readlines()] 31 | return arr 32 | 33 | 34 | def write_list(arr, file_path, coding=None): 35 | if coding is None: 36 | arr = ['{}'.format(item) for item in arr] 37 | with open(file_path, 'w') as f: 38 | f.write('\n'.join(arr)) 39 | else: 40 | with codecs.open(file_path, 'w', coding) as f: 41 | f.write(u'\n'.join(arr)) 42 | 43 | 44 | def read_kv(file_path, coding=None): 45 | arr = read_list(file_path, coding) 46 | if len(arr) == 0: 47 | return [], [] 48 | return zip(*map(str.split, arr)) 49 | 50 | 51 | def write_kv(k, v, file_path, coding=None): 52 | arr = zip(k, v) 53 | arr = [' '.join(item) for item in arr] 54 | write_list(arr, file_path, coding) 55 | 56 | 57 | def read_json(file_path): 58 | with open(file_path, 'r') as f: 59 | obj = json.load(f) 60 | return obj 61 | 62 | 63 | def write_json(obj, file_path): 64 | with open(file_path, 'w') as f: 65 | json.dump(obj, f, indent=4, separators=(',', ': ')) -------------------------------------------------------------------------------- /data/format_cuhk01.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | import shutil 4 | from argparse import ArgumentParser 5 | 6 | import numpy as np 7 | from scipy.misc import imsave 8 | 9 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 10 | if root not in sys.path: 11 | sys.path.insert(0, root) 12 | from utils import mkdir_if_missing, write_json 13 | 14 | 15 | def main(args): 16 | output_dir = args.output_dir 17 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 18 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 19 | num_identities = 971 20 | identities = [0] * num_identities 21 | for i in xrange(num_identities): 22 | p_images = [[], []] 23 | for j in xrange(4): 24 | cam_id = j // 2 25 | src_file = '{:04d}{:03d}.png'.format(i + 1, j + 1) 26 | tgt_file = 'cam_{}/{:05d}_{:05d}.png'.format(cam_id, i, j % 2) 27 | shutil.copy(osp.join(args.cuhk01_dir, src_file), 28 | osp.join(args.output_dir, tgt_file)) 29 | p_images[cam_id].append(tgt_file) 30 | identities[i] = p_images 31 | # Save meta information into a json file 32 | meta = {'name': 'cuhk01', 'shot': 'multiple', 'num_cameras': 2} 33 | meta['identities'] = identities 34 | write_json(meta, osp.join(output_dir, 'meta.json')) 35 | # Randomly create a training and test split 36 | num = len(identities) 37 | pids = np.random.permutation(num) 38 | trainval_pids = sorted(pids[:num // 2]) 39 | test_pids = sorted(pids[num // 2:]) 40 | split = {'trainval': trainval_pids, 41 | 'test_probe': test_pids, 42 | 'test_gallery': test_pids} 43 | write_json(split, osp.join(output_dir, 'split.json')) 44 | 45 | 46 | if __name__ == '__main__': 47 | parser = ArgumentParser( 48 | description="Convert the CUHK-01 dataset into the uniform format") 49 | parser.add_argument('cuhk01_dir', 50 | help="Root directory of the CUHK-01 dataset containing image files") 51 | parser.add_argument('output_dir', 52 | help="Output directory for the formatted CUHK-01 dataset") 53 | args = parser.parse_args() 54 | main(args) -------------------------------------------------------------------------------- /tools/merge_lists_single_task.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | from argparse import ArgumentParser 4 | 5 | import numpy as np 6 | 7 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 8 | if root not in sys.path: 9 | sys.path.insert(0, root) 10 | from utils import mkdir_if_missing, write_list, read_kv, write_kv 11 | 12 | 13 | def main(args): 14 | id_offset = 0 15 | merged_train_kv = {} 16 | merged_val_kv = {} 17 | for dataset_dir, db_dir in zip(args.dataset_dirs, args.db_dirs): 18 | train_files, train_labels = read_kv(osp.join(db_dir, 'train.txt')) 19 | val_files, val_labels = read_kv(osp.join(db_dir, 'val.txt')) 20 | unique_ids = set(map(int, train_labels + val_labels)) 21 | id_mapping = {idx: i + id_offset for i, idx in enumerate(unique_ids)} 22 | for k, v in zip(train_files, train_labels): 23 | merged_train_kv[osp.join(dataset_dir, k)] = id_mapping[int(v)] 24 | for k, v in zip(val_files, val_labels): 25 | merged_val_kv[osp.join(dataset_dir, k)] = id_mapping[int(v)] 26 | id_offset += len(id_mapping) 27 | mkdir_if_missing(osp.join(args.output_dir)) 28 | train_list = [k + ' ' + str(v) for k, v in merged_train_kv.iteritems()] 29 | np.random.shuffle(train_list) 30 | write_list(train_list, osp.join(args.output_dir, 'train.txt')) 31 | write_kv(merged_val_kv.keys(), map(str, merged_val_kv.values()), 32 | osp.join(args.output_dir, 'val.txt')) 33 | print "Max ID:", id_offset 34 | 35 | 36 | if __name__ == '__main__': 37 | parser = ArgumentParser( 38 | description="Merge multiple lists of train / val image file and " 39 | "label into a single-task one") 40 | parser.add_argument('--dataset-dirs', type=str, nargs='+', 41 | help="Dataset directories containing cam_0/, cam_1/, ...") 42 | parser.add_argument('--db-dirs', type=str, nargs='+', 43 | help="Database directories containing train.txt and val.txt. " 44 | "Must have the same number of dirs with dataset_dirs") 45 | parser.add_argument('output_dir', help="Output directories for the lists") 46 | args = parser.parse_args() 47 | assert len(args.dataset_dirs) == len(args.db_dirs) 48 | main(args) -------------------------------------------------------------------------------- /data/format_shinpuhkan.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | import shutil 4 | from argparse import ArgumentParser 5 | from glob import glob 6 | 7 | import numpy as np 8 | from scipy.misc import imsave 9 | 10 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 11 | if root not in sys.path: 12 | sys.path.insert(0, root) 13 | from utils import mkdir_if_missing, write_json 14 | 15 | 16 | def main(args): 17 | # cam_0 to cam_15 18 | for i in xrange(16): 19 | mkdir_if_missing(osp.join(args.output_dir, 'cam_' + str(i))) 20 | images = glob(osp.join(args.shinpuhkan_dir, 'images', '*.jpg')) 21 | images.sort() 22 | identities = [] 23 | prev_pid = -1 24 | for name in images: 25 | name = osp.basename(name) 26 | p_id = int(name[0:3]) - 1 27 | c_id = int(name[4:6]) - 1 28 | if prev_pid != p_id: 29 | identities.append([]) 30 | prev_cid = -1 31 | p_images = identities[-1] 32 | if prev_cid != c_id: 33 | p_images.append([]) 34 | v_images = p_images[-1] 35 | file_name = 'cam_{}/{:05d}_{:05d}.jpg'.format(c_id, p_id, len(v_images)) 36 | shutil.copy(osp.join(args.shinpuhkan_dir, 'images', name), 37 | osp.join(args.output_dir, file_name)) 38 | v_images.append(file_name) 39 | prev_pid = p_id 40 | prev_cid = c_id 41 | # Save meta information into a json file 42 | meta = {'name': 'Shinpuhkan', 'shot': 'multiple', 'num_cameras': 16} 43 | meta['identities'] = identities 44 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 45 | # We don't test on this dataset. Just use all the data for train / val. 46 | split = {'trainval': range(len(identities)), 47 | 'test_probe': [], 48 | 'test_gallery': []} 49 | write_json(split, osp.join(args.output_dir, 'split.json')) 50 | 51 | 52 | if __name__ == '__main__': 53 | parser = ArgumentParser( 54 | description="Convert the Shinpuhkan dataset into the uniform format") 55 | parser.add_argument('shinpuhkan_dir', 56 | help="Root directory of the Shinpuhkan dataset containing images/") 57 | parser.add_argument('output_dir', 58 | help="Output directory for the formatted Shinpuhkan dataset") 59 | args = parser.parse_args() 60 | main(args) -------------------------------------------------------------------------------- /data/format_viper.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | import shutil 4 | from argparse import ArgumentParser 5 | from glob import glob 6 | 7 | import numpy as np 8 | from scipy.misc import imsave 9 | 10 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 11 | if root not in sys.path: 12 | sys.path.insert(0, root) 13 | from utils import mkdir_if_missing, write_json 14 | 15 | 16 | def main(args): 17 | output_dir = args.output_dir 18 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 19 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 20 | identities = [] 21 | cam1_images = glob(osp.join(args.viper_dir, 'cam_a', '*.bmp')) 22 | cam2_images = glob(osp.join(args.viper_dir, 'cam_b', '*.bmp')) 23 | cam1_images.sort() 24 | cam2_images.sort() 25 | assert len(cam1_images) == len(cam2_images) 26 | for i in xrange(len(cam1_images)): 27 | p_id = len(identities) 28 | p_images = [] 29 | # view-0 30 | file_name = 'cam_0/{:05d}_{:05d}.bmp'.format(p_id, 0) 31 | shutil.copy(cam1_images[i], 32 | osp.join(args.output_dir, file_name)) 33 | p_images.append([file_name]) 34 | # view-1 35 | file_name = 'cam_1/{:05d}_{:05d}.bmp'.format(p_id, 0) 36 | shutil.copy(cam2_images[i], 37 | osp.join(args.output_dir, file_name)) 38 | p_images.append([file_name]) 39 | identities.append(p_images) 40 | # Save meta information into a json file 41 | meta = {'name': 'VIPeR', 'shot': 'single', 'num_cameras': 2} 42 | meta['identities'] = identities 43 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 44 | # Randomly create a training and test split 45 | num = len(identities) 46 | pids = np.random.permutation(num) 47 | trainval_pids = sorted(pids[:num // 2]) 48 | test_pids = sorted(pids[num // 2:]) 49 | split = {'trainval': trainval_pids, 50 | 'test_probe': test_pids, 51 | 'test_gallery': test_pids} 52 | write_json(split, osp.join(output_dir, 'split.json')) 53 | 54 | 55 | if __name__ == '__main__': 56 | parser = ArgumentParser( 57 | description="Convert the VIPeR dataset into the uniform format") 58 | parser.add_argument('viper_dir', 59 | help="Root directory of the VIPeR dataset containing " 60 | "cam_a/ and cam_b/") 61 | parser.add_argument('output_dir', 62 | help="Output directory for the formatted VIPeR dataset") 63 | args = parser.parse_args() 64 | main(args) -------------------------------------------------------------------------------- /tools/save_joint_impact_score.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | import shutil 4 | from argparse import ArgumentParser 5 | from glob import glob 6 | 7 | import lmdb 8 | import numpy as np 9 | 10 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 11 | if root not in sys.path: 12 | sys.path.insert(0, root) 13 | from utils import read_list 14 | 15 | if osp.join(root, 'external/caffe/python') not in sys.path: 16 | sys.path.insert(0, osp.join(root, 'external/caffe/python')) 17 | import caffe 18 | from caffe.proto.caffe_pb2 import Datum 19 | 20 | 21 | def load_domain_impact(impact_dir): 22 | files = glob(osp.join(impact_dir, '*.npy')) 23 | domain_datum = {} 24 | for file_name in files: 25 | domain_name = osp.splitext(osp.basename(file_name))[0] 26 | impact = np.load(file_name) 27 | assert impact.ndim == 1, "The impact score should be a vector." 28 | datum = Datum() 29 | datum.channels = len(impact) 30 | datum.height = 1 31 | datum.width = 1 32 | del datum.float_data[:] 33 | datum.float_data.extend(list(impact)) 34 | domain_datum[domain_name] = datum.SerializeToString() 35 | return domain_datum 36 | 37 | 38 | def main(args): 39 | domain_datum = load_domain_impact(args.impact_dir) 40 | file_paths = read_list(args.image_list_file) 41 | if osp.isdir(args.output_lmdb): shutil.rmtree(args.output_lmdb) 42 | with lmdb.open(args.output_lmdb, map_size=1099511627776) as db: 43 | with db.begin(write=True) as txn: 44 | for i, file_path in enumerate(file_paths): 45 | find_match = False 46 | for domain, datum in domain_datum.iteritems(): 47 | if domain not in file_path: continue 48 | txn.put('{:010d}_{}'.format(i, domain), datum) 49 | find_match = True 50 | break 51 | if not find_match: 52 | print "Warning: cannot find the domain of {}".format( 53 | file_path) 54 | 55 | 56 | if __name__ == '__main__': 57 | parser = ArgumentParser( 58 | description="Save neurons impact score a joint dataset. Each sample " 59 | "has a datum.") 60 | parser.add_argument('impact_dir', 61 | help="A directory of numpy files which are named after each domain") 62 | parser.add_argument('image_list_file', 63 | help="A txt file of a list of images. KV format is also fine.") 64 | parser.add_argument('output_lmdb') 65 | args = parser.parse_args() 66 | main(args) -------------------------------------------------------------------------------- /tools/compute_impact_score.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | from argparse import ArgumentParser 5 | 6 | import numpy as np 7 | from google.protobuf import text_format 8 | 9 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 10 | if osp.join(root, 'external/caffe/python') not in sys.path: 11 | sys.path.insert(0, osp.join(root, 'external/caffe/python')) 12 | import caffe 13 | from caffe.proto.caffe_pb2 import NetParameter 14 | 15 | 16 | def parse_prototxt(model_file, layer_name): 17 | with open(model_file) as fp: 18 | net = NetParameter() 19 | text_format.Parse(fp.read(), net) 20 | for i, layer in enumerate(net.layer): 21 | if layer.name != layer_name: continue 22 | blob = layer.top[0] 23 | for j in xrange(i + 1, len(net.layer)): 24 | if blob in net.layer[j].bottom: 25 | next_layer = net.layer[j].name 26 | return blob, next_layer 27 | raise ValueError( 28 | "Cannot find layer {} or its next layer".format(layer_name)) 29 | 30 | 31 | def main(args): 32 | caffe.set_device(0) 33 | caffe.set_mode_gpu() 34 | blob, next_layer = parse_prototxt(args.model, args.layer) 35 | net = caffe.Net(args.model, args.weights, caffe.TEST) 36 | # Channelwise for conv 37 | impact = np.zeros(net.blobs[blob].shape[1]) 38 | for i in xrange(args.num_iters): 39 | net.forward() 40 | f = net.blobs[blob].data.copy() 41 | loss = net.blobs['loss'].data.copy() 42 | for n in xrange(f.shape[1]): 43 | net.blobs[blob].data[...] = f.copy() 44 | net.blobs[blob].data[:, n] = 0 45 | net.forward(start=next_layer) 46 | delta = net.blobs['loss'].data - loss 47 | impact[n] += delta.sum() 48 | # Normalize 49 | if args.normalize: 50 | assert impact.max() > 0, "No neuron has positive impact" 51 | scale = np.log(9) / impact.max() 52 | impact *= scale 53 | else: 54 | batch_size = net.blobs[blob].shape[0] 55 | impact /= (batch_size * args.num_iters) 56 | # Save 57 | np.save(args.output, impact) 58 | 59 | 60 | if __name__ == '__main__': 61 | parser = ArgumentParser( 62 | description="Compute neurons impact on a particular domain") 63 | parser.add_argument('model') 64 | parser.add_argument('weights') 65 | parser.add_argument('output') 66 | parser.add_argument('--num_iters', type=int, required=True) 67 | parser.add_argument('--layer', type=str, default='fc7') 68 | parser.add_argument('--normalize', action='store_true', 69 | help="Normalize to make sigmoid(highest impact) == 0.9") 70 | args = parser.parse_args() 71 | main(args) -------------------------------------------------------------------------------- /data/format_ilids.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | import shutil 4 | from argparse import ArgumentParser 5 | from collections import defaultdict 6 | from glob import glob 7 | 8 | import numpy as np 9 | from scipy.misc import imsave 10 | 11 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 12 | if root not in sys.path: 13 | sys.path.insert(0, root) 14 | from utils import mkdir_if_missing, write_json 15 | 16 | 17 | def main(args): 18 | output_dir = args.output_dir 19 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 20 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 21 | # Collect the images of each person into dict 22 | images = glob(osp.join(args.ilids_dir, 'Persons', '*.jpg')) 23 | pdict = defaultdict(list) 24 | for imname in images: 25 | pid = int(osp.basename(imname)[:4]) 26 | pdict[pid].append(imname) 27 | # Randomly choose half of the images as cam_0, others as cam_1 28 | identities = [] 29 | for i, (pid, images) in enumerate(pdict.iteritems()): 30 | num = len(images) 31 | np.random.shuffle(images) 32 | p_images = [[], []] 33 | for src_file in images[:(num // 2)]: 34 | tgt_file = 'cam_0/{:05d}_{:05d}.jpg'.format(i, len(p_images[0])) 35 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 36 | p_images[0].append(tgt_file) 37 | for src_file in images[(num // 2):]: 38 | tgt_file = 'cam_1/{:05d}_{:05d}.jpg'.format(i, len(p_images[1])) 39 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 40 | p_images[1].append(tgt_file) 41 | identities.append(p_images) 42 | # Save meta information into a json file 43 | meta = {'name': 'i-LIDS', 'shot': 'multiple', 'num_cameras': 2} 44 | meta['identities'] = identities 45 | write_json(meta, osp.join(output_dir, 'meta.json')) 46 | # Randomly create a training and test split 47 | num = len(identities) 48 | pids = np.random.permutation(num) 49 | trainval_pids = sorted(pids[:num // 2]) 50 | test_pids = sorted(pids[num // 2:]) 51 | split = {'trainval': trainval_pids, 52 | 'test_probe': test_pids, 53 | 'test_gallery': test_pids} 54 | write_json(split, osp.join(output_dir, 'split.json')) 55 | 56 | 57 | if __name__ == '__main__': 58 | parser = ArgumentParser( 59 | description="Convert the i-LIDS dataset into the uniform format") 60 | parser.add_argument('ilids_dir', 61 | help="Root directory of the i-LIDS dataset containing Persons/") 62 | parser.add_argument('output_dir', 63 | help="Output directory for the formatted i-LIDS dataset") 64 | args = parser.parse_args() 65 | main(args) -------------------------------------------------------------------------------- /data/format_3dpes.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | import shutil 4 | from argparse import ArgumentParser 5 | from collections import defaultdict 6 | from glob import glob 7 | 8 | import numpy as np 9 | from scipy.misc import imsave 10 | 11 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 12 | if root not in sys.path: 13 | sys.path.insert(0, root) 14 | from utils import mkdir_if_missing, write_json 15 | 16 | 17 | def main(args): 18 | output_dir = args.output_dir 19 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 20 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 21 | # Collect the person_id and view_id into dict 22 | images = glob(osp.join(args.input_dir, 'RGB', '*.bmp')) 23 | pdict = defaultdict(lambda: defaultdict(list)) 24 | for imname in images: 25 | pid, vid = osp.basename(imname).split('_')[0:2] 26 | pdict[pid][vid].append(imname) 27 | # Randomly choose half of the views as cam_0, others as cam_1 28 | identities = [] 29 | for i, pid in enumerate(pdict): 30 | vids = pdict[pid].keys() 31 | num_views = len(vids) 32 | np.random.shuffle(vids) 33 | p_images = [[], []] 34 | for vid in vids[:(num_views // 2)]: 35 | for src_file in pdict[pid][vid]: 36 | tgt_file = 'cam_0/{:05d}_{:05d}.bmp'.format(i, len(p_images[0])) 37 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 38 | p_images[0].append(tgt_file) 39 | for vid in vids[(num_views // 2):]: 40 | for src_file in pdict[pid][vid]: 41 | tgt_file = 'cam_1/{:05d}_{:05d}.bmp'.format(i, len(p_images[1])) 42 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 43 | p_images[1].append(tgt_file) 44 | identities.append(p_images) 45 | # Save meta information into a json file 46 | meta = {'name': '3DPeS', 'shot': 'multiple', 'num_cameras': 2} 47 | meta['identities'] = identities 48 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 49 | # Randomly create a training and test split 50 | num = len(identities) 51 | pids = np.random.permutation(num) 52 | trainval_pids = sorted(pids[100:]) 53 | test_pids = sorted(pids[:100]) 54 | split = {'trainval': trainval_pids, 55 | 'test_probe': test_pids, 56 | 'test_gallery': test_pids} 57 | write_json(split, osp.join(output_dir, 'split.json')) 58 | 59 | 60 | if __name__ == '__main__': 61 | parser = ArgumentParser( 62 | description="Convert the 3DPeS dataset into the uniform format") 63 | parser.add_argument('input_dir', 64 | help="Root directory of the 3DPeS dataset containing " 65 | "RGB/") 66 | parser.add_argument('output_dir', 67 | help="Output directory for the formatted 3DPeS dataset") 68 | args = parser.parse_args() 69 | main(args) -------------------------------------------------------------------------------- /utils/cmc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def _cmc_core(D, G, P): 5 | m, n = D.shape 6 | order = np.argsort(D, axis=0) 7 | match = (G[order] == P) 8 | return (match.sum(axis=1) * 1.0 / n).cumsum() 9 | 10 | 11 | def cmc(distmat, glabels=None, plabels=None, ds=None, repeat=None): 12 | """Compute the Cumulative Match Characteristic (CMC) 13 | This function assumes that gallery labels have no duplication. If there are 14 | duplications, random downsampling will be performed on gallery labels, and 15 | the computation will be repeated to get an average result. 16 | Parameters 17 | ---------- 18 | distmat : numpy.ndarray 19 | The distance matrix. ``distmat[i, j]`` is the distance between i-th 20 | gallery sample and j-th probe sample. 21 | glabels : numpy.ndarray or None, optional 22 | plabels : numpy.ndarray or None, optional 23 | If None, then gallery and probe labels are assumed to have no 24 | duplications. Otherwise, they represent the vector of gallery and probe 25 | labels. Default is None. 26 | ds : int or None, optional 27 | If None, then no downsampling on gallery labels will be performed. 28 | Otherwise, it represents the number of gallery labels to be randomly 29 | selected. Default is None. 30 | repeat : int or None, optional 31 | If None, then the function will repeat the computation for 100 times 32 | when downsampling is performed. Otherwise, it specifies the number of 33 | repetition. Default is None. 34 | Returns 35 | ------- 36 | out : numpy.ndarray 37 | The rank-1 to rank-m accuracy, where m is the number of (downsampled) 38 | gallery labels. 39 | """ 40 | m, n = distmat.shape 41 | if glabels is None and plabels is None: 42 | glabels = np.arange(0, m) 43 | plabels = np.arange(0, n) 44 | if isinstance(glabels, list): 45 | glabels = np.asarray(glabels) 46 | if isinstance(plabels, list): 47 | plabels = np.asarray(plabels) 48 | ug = np.unique(glabels) 49 | if ds is None: 50 | ds = ug.size 51 | if repeat is None: 52 | if ds == ug.size and ug.size == len(glabels): 53 | repeat = 1 54 | else: 55 | repeat = 100 56 | 57 | ret = 0 58 | for __ in xrange(repeat): 59 | # Randomly select gallery labels 60 | G = np.random.choice(ug, ds, replace=False) 61 | # Select corresponding probe samples 62 | p_inds = [i for i in xrange(len(plabels)) if plabels[i] in G] 63 | P = plabels[p_inds] 64 | # Randomly select one gallery sample per label selected 65 | D = np.zeros((ds, P.size)) 66 | for i, g in enumerate(G): 67 | samples = np.where(glabels == g)[0] 68 | j = np.random.choice(samples) 69 | D[i, :] = distmat[j, p_inds] 70 | # Compute CMC 71 | ret += _cmc_core(D, G, P) 72 | return ret / repeat 73 | -------------------------------------------------------------------------------- /tools/make_lists_id_training.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | from argparse import ArgumentParser 4 | 5 | import numpy as np 6 | 7 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 8 | if root not in sys.path: 9 | sys.path.insert(0, root) 10 | from utils import mkdir_if_missing, write_list, read_json 11 | 12 | 13 | def _get_list(identities): 14 | ret = [] 15 | for views in identities: 16 | for v in views: 17 | for file in v: 18 | label = int(osp.basename(file)[:5]) 19 | ret.append((file, label)) 20 | return np.asarray(ret) 21 | 22 | 23 | def _save(file_label_list, file_path): 24 | content = ['{} {}'.format(x, y) for x, y in file_label_list] 25 | write_list(content, file_path) 26 | 27 | 28 | def main(args): 29 | meta = read_json(osp.join(args.dataset_dir, 'meta.json')) 30 | split = read_json(osp.join(args.dataset_dir, 'split.json')) 31 | identities = np.asarray(meta['identities']) 32 | # Make train / val. Consider single or multiple shot. 33 | trainval = identities[split['trainval']] 34 | if meta['shot'] == 'single': 35 | # When single shot, to ensure each identity has at least one training 36 | # image, we first randomly choose validation identities, then randomly 37 | # split their views equally for training and validation. 38 | num_val = int(len(trainval) * args.val_ratio) * 2 39 | np.random.shuffle(trainval) 40 | train = list(trainval[num_val:]) 41 | val = [] 42 | for views in trainval[:num_val]: 43 | np.random.shuffle(views) 44 | train.append(views[:len(views) // 2]) 45 | val.append(views[len(views) // 2:]) 46 | train = _get_list(train) 47 | val = _get_list(val) 48 | else: 49 | # When multiple shots, we just randomly split the trainval images 50 | trainval = _get_list(trainval) 51 | np.random.shuffle(trainval) 52 | num_val = int(len(trainval) * args.val_ratio) 53 | train = trainval[num_val:] 54 | val = trainval[:num_val] 55 | # Make test probe / gallery. Probe identities should be a subset of 56 | # gallery's. First half views are probe, others are gallery. 57 | assert len(set(split['test_probe']) - set(split['test_gallery'])) == 0 58 | test_probe, test_gallery = [], [] 59 | for views in identities[split['test_probe']]: 60 | test_probe.append(views[:len(views) // 2]) 61 | test_gallery.append(views[len(views) // 2:]) 62 | only_in_gallery = list( 63 | set(split['test_gallery']) - set(split['test_probe'])) 64 | test_gallery.extend(identities[only_in_gallery]) 65 | test_probe = _get_list(test_probe) 66 | test_gallery = _get_list(test_gallery) 67 | # Save to files 68 | mkdir_if_missing(args.output_dir) 69 | _save(train, osp.join(args.output_dir, 'train.txt')) 70 | _save(val, osp.join(args.output_dir, 'val.txt')) 71 | _save(test_probe, osp.join(args.output_dir, 'test_probe.txt')) 72 | _save(test_gallery, osp.join(args.output_dir, 'test_gallery.txt')) 73 | 74 | 75 | if __name__ == '__main__': 76 | parser = ArgumentParser( 77 | description="Create lists of image file and label for making lmdbs") 78 | parser.add_argument( 79 | 'dataset_dir', 80 | help="Directory of a formatted dataset") 81 | parser.add_argument( 82 | 'output_dir', 83 | help="Output directory for the lists") 84 | parser.add_argument( 85 | '--val-ratio', 86 | type=float, 87 | default=0.2, 88 | help="Ratio between validation and trainval data. Default 0.2.") 89 | args = parser.parse_args() 90 | main(args) -------------------------------------------------------------------------------- /data/format_cuhk03.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | from argparse import ArgumentParser 4 | 5 | import numpy as np 6 | from scipy.misc import imsave 7 | 8 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 9 | if root not in sys.path: 10 | sys.path.insert(0, root) 11 | from utils import mkdir_if_missing, write_json 12 | 13 | 14 | def _load(cuhk03_dir): 15 | try: 16 | from scipy.io import loadmat 17 | matdata = loadmat(osp.join(cuhk03_dir, 'cuhk-03.mat')) 18 | except: 19 | from hdf5storage import loadmat 20 | matdata = loadmat(osp.join(cuhk03_dir, 'cuhk-03.mat')) 21 | return matdata 22 | 23 | 24 | def main(args): 25 | matdata = _load(args.cuhk03_dir) 26 | output_dir = args.output_dir 27 | # Although there are 5 pairs of camera views, we tile them up as one pair. 28 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 29 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 30 | identities = [] 31 | for imgs_labeled, imgs_detected in zip( 32 | matdata['labeled'].squeeze(), matdata['detected'].squeeze()): 33 | # We merge the manually labeled and automatically detected images of 34 | # the same view. 35 | for i in xrange(imgs_labeled.shape[0]): 36 | pid = len(identities) 37 | p_images = [] 38 | # view-0 39 | v_images = [] 40 | for j in xrange(5): 41 | if imgs_labeled[i, j].size == 0: 42 | break 43 | file_name = 'cam_0/{:05d}_{:05d}.jpg'.format(pid, len(v_images)) 44 | imsave(osp.join(output_dir, file_name), imgs_labeled[i, j]) 45 | v_images.append(file_name) 46 | for j in xrange(5): 47 | if imgs_detected[i, j].size == 0: 48 | break 49 | file_name = 'cam_0/{:05d}_{:05d}.jpg'.format(pid, len(v_images)) 50 | imsave(osp.join(output_dir, file_name), imgs_detected[i, j]) 51 | v_images.append(file_name) 52 | p_images.append(v_images) 53 | # view-1 54 | v_images = [] 55 | for j in xrange(5, 10): 56 | if imgs_labeled[i, j].size == 0: 57 | break 58 | file_name = 'cam_1/{:05d}_{:05d}.jpg'.format(pid, len(v_images)) 59 | imsave(osp.join(output_dir, file_name), imgs_labeled[i, j]) 60 | v_images.append(file_name) 61 | for j in xrange(5, 10): 62 | if imgs_detected[i, j].size == 0: 63 | break 64 | file_name = 'cam_1/{:05d}_{:05d}.jpg'.format(pid, len(v_images)) 65 | imsave(osp.join(output_dir, file_name), imgs_detected[i, j]) 66 | v_images.append(file_name) 67 | p_images.append(v_images) 68 | identities.append(p_images) 69 | # Save meta information into a json file 70 | meta = {'name': 'cuhk03', 'shot': 'multiple', 'num_cameras': 2} 71 | meta['identities'] = identities 72 | write_json(meta, osp.join(output_dir, 'meta.json')) 73 | # Save training and test splits into a json file 74 | view_counts = [a.shape[0] for a in matdata['labeled'].squeeze()] 75 | vid_offsets = np.r_[0, np.cumsum(view_counts)] 76 | test_info = np.random.choice(matdata['testsets'].squeeze()) 77 | test_pids = [] 78 | for i, j in test_info: 79 | pid = vid_offsets[i - 1] + j - 1 80 | test_pids.append(pid) 81 | test_pids.sort() 82 | trainval_pids = list(set(xrange(vid_offsets[-1])) - set(test_pids)) 83 | split = {'trainval': trainval_pids, 84 | 'test_probe': test_pids, 85 | 'test_gallery': test_pids} 86 | write_json(split, osp.join(output_dir, 'split.json')) 87 | 88 | 89 | if __name__ == '__main__': 90 | parser = ArgumentParser( 91 | description="Convert the CUHK-03 dataset into the uniform format") 92 | parser.add_argument( 93 | 'cuhk03_dir', 94 | help="Root directory of the CUHK-03 dataset containing cuhk-03.mat") 95 | parser.add_argument( 96 | 'output_dir', 97 | help="Output directory for the formatted CUHK-03 dataset") 98 | args = parser.parse_args() 99 | main(args) -------------------------------------------------------------------------------- /eval/metric_learning.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | from argparse import ArgumentParser 4 | 5 | import numpy as np 6 | from sklearn.decomposition import PCA 7 | from sklearn.metrics.pairwise import pairwise_distances 8 | 9 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 10 | if root not in sys.path: 11 | sys.path.insert(0, root) 12 | from utils import cmc 13 | 14 | 15 | def _get_train_data(result_dir): 16 | # Merge training and validation features and labels 17 | features = np.r_[np.load(osp.join(result_dir, 'train_features.npy')), 18 | np.load(osp.join(result_dir, 'val_features.npy'))] 19 | labels = np.r_[np.load(osp.join(result_dir, 'train_labels.npy')), 20 | np.load(osp.join(result_dir, 'val_labels.npy'))] 21 | # Reassign the labels to make them sequentially numbered from zero 22 | unique_labels = np.unique(labels) 23 | labels_map = {l: i for i, l in enumerate(unique_labels)} 24 | labels = np.asarray([labels_map[l] for l in labels]) 25 | return features, labels 26 | 27 | 28 | def _get_test_data(result_dir): 29 | PX = np.load(osp.join(result_dir, 'test_probe_features.npy')) 30 | PY = np.load(osp.join(result_dir, 'test_probe_labels.npy')) 31 | GX = np.load(osp.join(result_dir, 'test_gallery_features.npy')) 32 | GY = np.load(osp.join(result_dir, 'test_gallery_labels.npy')) 33 | # Reassign the labels to make them sequentially numbered from zero 34 | unique_labels = np.unique(np.r_[PY, GY]) 35 | labels_map = {l: i for i, l in enumerate(unique_labels)} 36 | PY = np.asarray([labels_map[l] for l in PY]) 37 | GY = np.asarray([labels_map[l] for l in GY]) 38 | return PX, PY, GX, GY 39 | 40 | 41 | def _learn_pca(X, dim): 42 | pca = PCA(n_components=dim) 43 | pca.fit(X) 44 | return pca 45 | 46 | 47 | def _learn_metric(X, Y, method): 48 | if method == 'euclidean': 49 | M = np.eye(X.shape[1]) 50 | elif method == 'kissme': 51 | num = len(Y) 52 | X1, X2 = np.meshgrid(np.arange(0, num), np.arange(0, num)) 53 | X1, X2 = X1[X1 < X2], X2[X1 < X2] 54 | matches = (Y[X1] == Y[X2]) 55 | num_matches = matches.sum() 56 | num_non_matches = len(matches) - num_matches 57 | idxa = X1[matches] 58 | idxb = X2[matches] 59 | S = X[idxa] - X[idxb] 60 | C1 = S.transpose().dot(S) / num_matches 61 | p = np.random.choice(num_non_matches, num_matches, replace=False) 62 | idxa = X1[matches == False] 63 | idxb = X2[matches == False] 64 | idxa = idxa[p] 65 | idxb = idxb[p] 66 | S = X[idxa] - X[idxb] 67 | C0 = S.transpose().dot(S) / num_matches 68 | M = np.linalg.inv(C1) - np.linalg.inv(C0) 69 | return M 70 | 71 | 72 | def _eval_cmc(PX, PY, GX, GY, M): 73 | D = pairwise_distances(GX, PX, metric='mahalanobis', VI=M, n_jobs=-2) 74 | C = cmc(D, GY, PY) 75 | return C 76 | 77 | 78 | def main(args): 79 | X, Y = _get_train_data(args.result_dir) 80 | PX, PY, GX, GY = _get_test_data(args.result_dir) 81 | 82 | file_suffix = args.method 83 | if args.pca is not None: 84 | S = _learn_pca(X, args.pca) 85 | X = S.transform(X) 86 | PX = S.transform(PX) 87 | GX = S.transform(GX) 88 | file_suffix += '_pca_{}'.format(args.pca) 89 | np.save(osp.join(args.result_dir, 'pca_' + file_suffix), S) 90 | 91 | M = _learn_metric(X, Y, args.method) 92 | C = _eval_cmc(PX, PY, GX, GY, M) 93 | np.save(osp.join(args.result_dir, 'metric_' + file_suffix), M) 94 | np.save(osp.join(args.result_dir, 'cmc_' + file_suffix), C) 95 | 96 | for topk in [1, 5, 10, 20]: 97 | print "{:8}{:8.1%}".format('top-' + str(topk), C[topk - 1]) 98 | 99 | if __name__ == '__main__': 100 | parser = ArgumentParser( 101 | description="Metric learning and evaluate performance") 102 | parser.add_argument('result_dir', 103 | help="Result directory. Containing extracted features and labels. " 104 | "CMC curve will also be saved to this directory.") 105 | parser.add_argument('--method', choices=['euclidean', 'kissme'], 106 | default='euclidean') 107 | parser.add_argument('--pca', type=int, 108 | help="If specified, will reduce features to this dimension by PCA") 109 | args = parser.parse_args() 110 | main(args) -------------------------------------------------------------------------------- /data/format_prid.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path as osp 3 | import shutil 4 | from argparse import ArgumentParser 5 | from glob import glob 6 | 7 | import numpy as np 8 | from scipy.misc import imsave 9 | 10 | root = osp.join(osp.dirname(osp.abspath(__file__)), '..') 11 | if root not in sys.path: 12 | sys.path.insert(0, root) 13 | from utils import mkdir_if_missing, write_json 14 | 15 | 16 | def main(args): 17 | output_dir = args.output_dir 18 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 19 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 20 | # Randomly choose 100 people from the 200 shared people as test probe 21 | p = list(np.random.permutation(200)) 22 | test_probe = range(100) 23 | test_gallery = range(100) 24 | identities = [] 25 | for pid in p[:100]: 26 | p_images = [] 27 | src_file = osp.join(args.prid_dir, 'single_shot', 'cam_a', 28 | 'person_{:04d}.png'.format(pid + 1)) 29 | tgt_file = osp.join('cam_0', '{:05d}_00000.png'.format(len(identities))) 30 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 31 | p_images.append([tgt_file]) 32 | src_file = osp.join(args.prid_dir, 'single_shot', 'cam_b', 33 | 'person_{:04d}.png'.format(pid + 1)) 34 | tgt_file = osp.join('cam_1', '{:05d}_00000.png'.format(len(identities))) 35 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 36 | p_images.append([tgt_file]) 37 | identities.append(p_images) 38 | # Other 100 people from the 200 as a part of trainval 39 | # Choose 10 images randomly from the multi-shot images 40 | trainval = range(100, 200) 41 | for pid in p[100:]: 42 | p_images = [[], []] 43 | images = glob(osp.join(args.prid_dir, 'multi_shot', 'cam_a', 44 | 'person_{:04d}'.format(pid + 1), '*.png')) 45 | images = np.random.choice(images, size=min(10, len(images)), 46 | replace=False) 47 | for src_file in images: 48 | tgt_file = osp.join('cam_0', 49 | '{:05d}_{:05d}.png'.format(len(identities), len(p_images[0]))) 50 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 51 | p_images[0].append(tgt_file) 52 | images = glob(osp.join(args.prid_dir, 'multi_shot', 'cam_b', 53 | 'person_{:04d}'.format(pid + 1), '*.png')) 54 | images = np.random.choice(images, size=min(10, len(images)), 55 | replace=False) 56 | for src_file in images: 57 | tgt_file = osp.join('cam_1', 58 | '{:05d}_{:05d}.png'.format(len(identities), len(p_images[1]))) 59 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 60 | p_images[1].append(tgt_file) 61 | identities.append(p_images) 62 | # 201 to 385 cam_a people as another part of trainval 63 | for pid in xrange(200, 385): 64 | p_images = [[], []] 65 | images = glob(osp.join(args.prid_dir, 'multi_shot', 'cam_a', 66 | 'person_{:04d}'.format(pid + 1), '*.png')) 67 | images = np.random.choice(images, size=min(10, len(images)), 68 | replace=False) 69 | for src_file in images: 70 | tgt_file = osp.join('cam_0', 71 | '{:05d}_{:05d}.png'.format(len(identities), len(p_images[0]))) 72 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 73 | p_images[0].append(tgt_file) 74 | trainval.append(len(identities)) 75 | identities.append(p_images) 76 | # 201 to 749 cam_b people as additional test gallery 77 | for pid in xrange(200, 749): 78 | src_file = osp.join(args.prid_dir, 'single_shot', 'cam_b', 79 | 'person_{:04d}.png'.format(pid + 1)) 80 | tgt_file = osp.join('cam_1', '{:05d}_00000.png'.format(len(identities))) 81 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 82 | p_images = [[], [tgt_file]] 83 | test_gallery.append(len(identities)) 84 | identities.append(p_images) 85 | # Save meta information into a json file 86 | meta = {'name': 'PRID', 'shot': 'multiple', 'num_cameras': 2} 87 | meta['identities'] = identities 88 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 89 | # We have only one split 90 | split = {'trainval': trainval, 91 | 'test_probe': test_probe, 92 | 'test_gallery': test_gallery} 93 | write_json(split, osp.join(output_dir, 'split.json')) 94 | 95 | 96 | if __name__ == '__main__': 97 | parser = ArgumentParser( 98 | description="Convert the PRID dataset into the uniform format") 99 | parser.add_argument('prid_dir', 100 | help="Root directory of the PRID dataset containing " 101 | "single_shot/ and multi_shot/") 102 | parser.add_argument('output_dir', 103 | help="Output directory for the formatted PRID dataset") 104 | args = parser.parse_args() 105 | main(args) -------------------------------------------------------------------------------- /scripts/routines.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Collection of routine functions 3 | 4 | # Some constants 5 | CAFFE_DIR=external/caffe 6 | 7 | EXP_DIR=external/exp 8 | DATASETS_DIR=${EXP_DIR}/datasets 9 | DB_DIR=${EXP_DIR}/db 10 | RESULTS_DIR=${EXP_DIR}/results 11 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 12 | NIS_DIR=${EXP_DIR}/nis 13 | 14 | MODELS_DIR=models 15 | LOGS_DIR=logs 16 | 17 | get_trained_model() { 18 | local exp=$1 19 | local dataset=$2 20 | 21 | local solver=${MODELS_DIR}/${exp}/${dataset}_solver.prototxt 22 | local max_iter=$(grep 'max_iter' ${solver} | awk '{print $2}') 23 | local snapshot_prefix=$(grep 'snapshot_prefix' ${solver} | awk -F '"' '{print $2}') 24 | local model=${snapshot_prefix}_iter_${max_iter}.caffemodel 25 | echo ${model} 26 | } 27 | 28 | get_trained_model_for_inference() { 29 | local exp=$1 30 | local dataset=$2 31 | 32 | local solver=${MODELS_DIR}/${exp}/${dataset}_solver.prototxt 33 | local max_iter=$(grep 'max_iter' ${solver} | awk '{print $2}') 34 | local snapshot_prefix=$(grep 'snapshot_prefix' ${solver} | awk -F '"' '{print $2}') 35 | local model=${snapshot_prefix}_iter_${max_iter}_inference.caffemodel 36 | echo ${model} 37 | } 38 | 39 | get_result_dir() { 40 | local exp=$1 41 | local dataset=$2 42 | local trained_model=$3 43 | if [[ $# -eq 4 ]]; then 44 | local blob=$4 45 | else 46 | local blob=fc7_bn 47 | fi 48 | 49 | local weights_name=$(basename ${trained_model}) 50 | local weights_name="${weights_name%%.*}" 51 | local result_dir=${RESULTS_DIR}/${exp}/${dataset}_${weights_name}_${blob} 52 | echo ${result_dir} 53 | } 54 | 55 | train_model() { 56 | local exp=$1 57 | local dataset=$2 58 | local pretrained_model=$3 59 | 60 | local solver=${MODELS_DIR}/${exp}/${dataset}_solver.prototxt 61 | local log=${LOGS_DIR}/${exp}/${dataset}.log 62 | local trained_model=$(get_trained_model ${exp} ${dataset}) 63 | 64 | # Make directories. 65 | mkdir -p $(dirname ${log}) 66 | mkdir -p $(dirname ${trained_model}) 67 | 68 | # Training 69 | if [[ $# -eq 2 ]]; then 70 | GLOG_logtostderr=1 mpirun -n 2 ${CAFFE_DIR}/build/tools/caffe train \ 71 | -solver ${solver} -gpu 0,1 2>&1 | tee ${log} 72 | else 73 | GLOG_logtostderr=1 mpirun -n 2 ${CAFFE_DIR}/build/tools/caffe train \ 74 | -solver ${solver} -weights ${pretrained_model} -gpu 0,1 2>&1 | tee ${log} 75 | fi 76 | } 77 | 78 | extract_features() { 79 | local exp=$1 80 | local dataset=$2 81 | local trained_model=$3 82 | if [[ $# -eq 4 ]]; then 83 | local blob=$4 84 | else 85 | local blob=fc7_bn 86 | fi 87 | 88 | local result_dir=$(get_result_dir ${exp} ${dataset} ${trained_model}) 89 | rm -rf ${result_dir} 90 | mkdir -p ${result_dir} 91 | 92 | # Extract train, val, test probe, and test gallery features. 93 | for subset in train val test_probe test_gallery; do 94 | echo "Extracting ${subset} set" 95 | local num_samples=$(wc -l ${DB_DIR}/${dataset}/${subset}.txt | awk '{print $1}') 96 | local num_samples=$((num_samples + 1)) 97 | local num_iters=$(((num_samples + 99) / 100)) 98 | local model=$(mktemp) 99 | sed -e "s/\${dataset}/${dataset}/g; s/\${subset}/${subset}/g" \ 100 | ${MODELS_DIR}/exfeat_template.prototxt > ${model} 101 | ${CAFFE_DIR}/build/tools/extract_features \ 102 | ${trained_model} ${model} ${blob},label \ 103 | ${result_dir}/${subset}_features_lmdb,${result_dir}/${subset}_labels_lmdb \ 104 | ${num_iters} lmdb GPU 0 105 | python2 tools/convert_lmdb_to_numpy.py \ 106 | ${result_dir}/${subset}_features_lmdb ${result_dir}/${subset}_features.npy \ 107 | --truncate ${num_samples} 108 | python2 tools/convert_lmdb_to_numpy.py \ 109 | ${result_dir}/${subset}_labels_lmdb ${result_dir}/${subset}_labels.npy \ 110 | --truncate ${num_samples} 111 | done 112 | } 113 | 114 | test_model() { 115 | local exp=$1 116 | local dataset=$2 117 | local trained_model=$3 118 | 119 | # Extract features 120 | extract_features ${exp} ${dataset} ${trained_model} 121 | 122 | # Evaluate performance 123 | local result_dir=$(get_result_dir ${exp} ${dataset} ${trained_model}) 124 | python2 eval/metric_learning.py ${result_dir} 125 | } 126 | 127 | compute_neuron_impact_scores() { 128 | local dataset=$1 129 | local inference_model=$2 130 | if [[ $# -eq 3 ]]; then 131 | local layer=$3 132 | else 133 | local layer=fc7 # Here fc7 is default because we use the inference model 134 | fi 135 | 136 | # Fine-tune the id-classifier only 137 | train_model fc_only ${dataset} ${inference_model} 138 | local finetuned_model=$(get_trained_model fc_only ${dataset}) 139 | 140 | # Compute NIS 141 | local model=${MODELS_DIR}/fc_only/${dataset}_trainval.prototxt 142 | local num_samples=$(wc -l ${DB_DIR}/${dataset}/val.txt | awk '{print $1}') 143 | local num_samples=$((num_samples + 1)) 144 | local num_iters=$(((num_samples + 19) / 20)) 145 | local output_npy=${NIS_DIR}/${dataset}.npy 146 | mkdir -p $(dirname ${output_npy}) 147 | python2 tools/compute_impact_score.py \ 148 | ${model} ${finetuned_model} ${output_npy} \ 149 | --num_iters ${num_iters} --layer ${layer} --normalize 150 | 151 | # Save NIS to LMDB 152 | local output_lmdb=${DB_DIR}/${dataset}/nis_lmdb 153 | python2 tools/save_individual_impact_score.py \ 154 | ${output_npy} ${output_lmdb} 155 | } 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Domain Guided Dropout for Person Re-id 2 | 3 | This project aims at learning generic person re-identification (re-id) deep features from multiple datasets with domain guided dropout. Mainly based on our CVPR 2016 paper [Learning Deep Feature Representations with Domain Guided Dropout for Person Re-identification](http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Xiao_Learning_Deep_Feature_CVPR_2016_paper.pdf). 4 | 5 | ## Installation 6 | 7 | We have integrated our self-brewed caffe into `external/caffe`, which provides batch-normalization and multi-gpu parallel training. Please clone this project with the command: 8 | 9 | git clone --recursive https://github.com/Cysu/dgd_person_reid.git 10 | 11 | Apart from the official installation [prerequisites](http://caffe.berkeleyvision.org/installation.html), we have several other dependencies: cudnn-v4, openmpi, and 1.55 <= boost < 1.60. You may install them manually or by a package manager (a tip for installing boost 1.55 on Ubuntu 14.04: `sudo apt-get autoremove libboost1.54*` then `sudo apt-get install libboost1.55-all-dev`). 12 | 13 | Then configure the `Makefile.config` and compile the caffe. To use multi-GPU for training, please uncomment the MPI parallel block in the `Makefile.config` and set the `MPI_INCLUDE` and `MPI_LIB` properly. Please find more details of using the caffe [here](https://github.com/Cysu/caffe/tree/domain-guided-dropout). 14 | 15 | cd external/caffe 16 | cp Makefile.config.example Makefile.config 17 | # Configure the libraries properly 18 | make -j8 && make py 19 | 20 | Some other prerequisites are 21 | 22 | 1. Matlab (to pre-process the CUHK03 dataset) 23 | 2. python2 packages: numpy, scipy, Pillow, scikit-learn, protobuf, lmdb 24 | 3. Add `export PYTHONPATH=".:$PYTHONPATH"` to `~/.bashrc` and restart the terminal 25 | 26 | ## Download datasets 27 | 28 | Download the following datasets. 29 | 30 | 1. [CUHK03](https://docs.google.com/spreadsheet/viewform?usp=drive_web&formkey=dHRkMkFVSUFvbTJIRkRDLWRwZWpONnc6MA#gid=0) 31 | 2. [CUHK01](https://docs.google.com/spreadsheet/viewform?formkey=dF9pZ1BFZkNiMG1oZUdtTjZPalR0MGc6MA) 32 | 3. [PRID](https://lrs.icg.tugraz.at/datasets/prid/prid_2011.zip) 33 | 4. [VIPeR](https://drive.google.com/open?id=0B67_d0rLRTQYdlcwd3VGV09SVHM) (I cannot find the link to the original dataset. This is my previous backup version.) 34 | 5. [3DPeS](http://imagelab.ing.unimore.it/3DPeS/3dPES_data/3DPeS_ReId_Snap.zip) 35 | 6. [i-LIDS](https://drive.google.com/file/d/0B67_d0rLRTQYRjQ2T3o1NmxvVE0/view?usp=sharing) (I cannot find the link to the original dataset. This is my previous backup version.) 36 | 7. [Shinpuhkan](http://www.mm.media.kyoto-u.ac.jp/en/datasets/shinpuhkan) (need to send an email to the authors) 37 | 38 | Link the root directory of these datasets to our project. 39 | 40 | ln -sf /path/to/the/root/of/datasets external/raw 41 | 42 | ## Prepare data 43 | 44 | 1. Create a directory for experiment data and results 45 | 46 | mkdir -p external/exp 47 | 48 | or link against another external directory 49 | 50 | ln -s /path/to/your/exp/directory external/exp 51 | 52 | 2. Convert raw datasets into a uniform data format 53 | 54 | scripts/format_rawdata.sh 55 | 56 | 3. Convert formatted datasets into LMDBs 57 | 58 | scripts/make_dbs.sh 59 | 60 | 4. Merge all the datasets together for the joint single-task learning (JSTL) 61 | 62 | scripts/merge_dbs.sh 63 | 64 | ## Experiments 65 | 66 | **Note: We use two GPUs to train the models by default. Change the `mpirun -n 2 ... -gpu 0,1` in `scripts/routines.sh` to your own hardware configuration if necessary.** 67 | 68 | **GPU device id needs to be changed in: 69 | 70 | 1. train_model() and extract_features() of scripts/rountines.sh 71 | 2. main() of tools/compute_impact_score.py** 72 | 73 | Our experiments are organized into two groups: 74 | 75 | 1. Baseline: training individually on each dataset 76 | 2. Ours: Joint single task learning (JSTL) + Domain guided dropout (DGD) 77 | 78 | We provide a **pretrained JSTL+DGD model [here](https://drive.google.com/open?id=0B67_d0rLRTQYZnB5ZUZpdTlxM0k)** that can be used as a generic person re-id feature extractor. 79 | 80 | Some archived experiment logs can be found at `archived/`. 81 | 82 | ### Baseline: training individually on each dataset 83 | 84 | To train and test a model individually on a dataset, just run the script 85 | 86 | scripts/exp_individually.sh prid 87 | 88 | where the parameter is the dataset name, can be one of `cuhk03`, `cuhk01`, `prid`, `viper`, `3dpes`, `ilids`. 89 | 90 | ### Ours: Joint single task learning (JSTL) + Domain guided dropout (DGD) 91 | 92 | 1. Pretrain a model using the mixed dataset with JSTL. The CMC accuracies printed out are corresponding to the **JSTL** entries in Table 3 of our paper. 93 | 94 | scripts/exp_jstl.sh 95 | 96 | 2. Based on the pretrained JSTL model, we first compute the neuron impact scores (NIS) for each dataset, and then resume the JSTL training with deterministic DGD. The CMC accuracies printed out are corresponding to the **JSTL+DGD** entries in Table 3 of our paper. 97 | 98 | scripts/exp_dgd.sh 99 | 100 | At last, to achieve the best performance, we can fine-tune the model on each dataset with stochastic DGD. The CMC accuracies printed out are corresponding to the **FT-(JSTL+DGD)** entries in Table 3 of our paper. 101 | 102 | scripts/exp_ft_dgd.sh 103 | 104 | ## Citation 105 | 106 | @inproceedings{xiao2016learning, 107 | title={Learning Deep Feature Representations with Domain Guided Dropout for Person Re-identification}, 108 | author={Xiao, Tong and Li, Hongsheng and Ouyang, Wanli and Wang, Xiaogang}, 109 | booktitle={CVPR}, 110 | year={2016} 111 | } 112 | 113 | ## Referenced Datasets 114 | 115 | We summarize the person re-id datasets used in this project as below. 116 | 117 | | Name | Reference | 118 | |------------|------------------------------------------------------------------------------------------------------------------| 119 | | 3DPeS | Baltieri, et al., 3DPes: 3D people dataset for surveillance and forensics | 120 | | CUHK01 | Li, et al., Human reidentification with transferred metric learning | 121 | | CUHK02 | Li, et al., Locally Aligned Feature Transforms across Views | 122 | | CUHK03 | Li, et al., Deepreid: Deep filter pairing neural network for person re-identification | 123 | | i-LIDS | Zheng, et al., Associating groups of people | 124 | | PRID | Hirzer, et al., Person re-identification by descriptive and discriminative classification | 125 | | Shinpuhkan | Kawanishi, et al., Shinpuhkan2014: A Multi-Camera Pedestrian Dataset for Tracking People across Multiple Cameras | 126 | | VIPeR | Gray, et al., Evaluating appearance models for recognition, reacquisition, and tracking | 127 | -------------------------------------------------------------------------------- /models/jstl/jstl_deploy_inference.prototxt: -------------------------------------------------------------------------------- 1 | name: "JSTL" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 144 6 | input_dim: 56 7 | richness: 1000 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | param { 14 | lr_mult: 1.0 15 | decay_mult: 1.0 16 | } 17 | param { 18 | lr_mult: 2.0 19 | decay_mult: 0.0 20 | } 21 | convolution_param { 22 | num_output: 32 23 | pad: 1 24 | kernel_size: 3 25 | stride: 1 26 | weight_filler { 27 | type: "xavier" 28 | } 29 | bias_filler { 30 | type: "constant" 31 | value: 0.2 32 | } 33 | } 34 | } 35 | layer { 36 | name: "relu1" 37 | type: "ReLU" 38 | bottom: "conv1" 39 | top: "conv1" 40 | } 41 | layer { 42 | name: "conv2" 43 | type: "Convolution" 44 | bottom: "conv1" 45 | top: "conv2" 46 | param { 47 | lr_mult: 1.0 48 | decay_mult: 1.0 49 | } 50 | param { 51 | lr_mult: 2.0 52 | decay_mult: 0.0 53 | } 54 | convolution_param { 55 | num_output: 32 56 | pad: 1 57 | kernel_size: 3 58 | stride: 1 59 | weight_filler { 60 | type: "xavier" 61 | } 62 | bias_filler { 63 | type: "constant" 64 | value: 0.2 65 | } 66 | } 67 | } 68 | layer { 69 | name: "relu2" 70 | type: "ReLU" 71 | bottom: "conv2" 72 | top: "conv2" 73 | } 74 | layer { 75 | name: "conv3" 76 | type: "Convolution" 77 | bottom: "conv2" 78 | top: "conv3" 79 | param { 80 | lr_mult: 1.0 81 | decay_mult: 1.0 82 | } 83 | param { 84 | lr_mult: 2.0 85 | decay_mult: 0.0 86 | } 87 | convolution_param { 88 | num_output: 32 89 | pad: 1 90 | kernel_size: 3 91 | stride: 1 92 | weight_filler { 93 | type: "xavier" 94 | } 95 | bias_filler { 96 | type: "constant" 97 | value: 0.2 98 | } 99 | } 100 | } 101 | layer { 102 | name: "relu3" 103 | type: "ReLU" 104 | bottom: "conv3" 105 | top: "conv3" 106 | } 107 | layer { 108 | name: "pool1" 109 | type: "Pooling" 110 | bottom: "conv3" 111 | top: "pool1" 112 | pooling_param { 113 | pool: MAX 114 | kernel_size: 2 115 | stride: 2 116 | } 117 | } 118 | layer { 119 | name: "inception_1a/1x1" 120 | type: "Convolution" 121 | bottom: "pool1" 122 | top: "inception_1a/1x1" 123 | param { 124 | lr_mult: 1.0 125 | decay_mult: 1.0 126 | } 127 | param { 128 | lr_mult: 2.0 129 | decay_mult: 0.0 130 | } 131 | convolution_param { 132 | num_output: 64 133 | kernel_size: 1 134 | weight_filler { 135 | type: "xavier" 136 | } 137 | bias_filler { 138 | type: "constant" 139 | value: 0.2 140 | } 141 | } 142 | } 143 | layer { 144 | name: "inception_1a/relu_1x1" 145 | type: "ReLU" 146 | bottom: "inception_1a/1x1" 147 | top: "inception_1a/1x1" 148 | } 149 | layer { 150 | name: "inception_1a/3x3_reduce" 151 | type: "Convolution" 152 | bottom: "pool1" 153 | top: "inception_1a/3x3_reduce" 154 | param { 155 | lr_mult: 1.0 156 | decay_mult: 1.0 157 | } 158 | param { 159 | lr_mult: 2.0 160 | decay_mult: 0.0 161 | } 162 | convolution_param { 163 | num_output: 64 164 | kernel_size: 1 165 | weight_filler { 166 | type: "xavier" 167 | } 168 | bias_filler { 169 | type: "constant" 170 | value: 0.2 171 | } 172 | } 173 | } 174 | layer { 175 | name: "inception_1a/relu_3x3_reduce" 176 | type: "ReLU" 177 | bottom: "inception_1a/3x3_reduce" 178 | top: "inception_1a/3x3_reduce" 179 | } 180 | layer { 181 | name: "inception_1a/3x3" 182 | type: "Convolution" 183 | bottom: "inception_1a/3x3_reduce" 184 | top: "inception_1a/3x3" 185 | param { 186 | lr_mult: 1.0 187 | decay_mult: 1.0 188 | } 189 | param { 190 | lr_mult: 2.0 191 | decay_mult: 0.0 192 | } 193 | convolution_param { 194 | num_output: 64 195 | pad: 1 196 | kernel_size: 3 197 | weight_filler { 198 | type: "xavier" 199 | } 200 | bias_filler { 201 | type: "constant" 202 | value: 0.2 203 | } 204 | } 205 | } 206 | layer { 207 | name: "inception_1a/relu_3x3" 208 | type: "ReLU" 209 | bottom: "inception_1a/3x3" 210 | top: "inception_1a/3x3" 211 | } 212 | layer { 213 | name: "inception_1a/double_3x3_reduce" 214 | type: "Convolution" 215 | bottom: "pool1" 216 | top: "inception_1a/double_3x3_reduce" 217 | param { 218 | lr_mult: 1.0 219 | decay_mult: 1.0 220 | } 221 | param { 222 | lr_mult: 2.0 223 | decay_mult: 0.0 224 | } 225 | convolution_param { 226 | num_output: 64 227 | kernel_size: 1 228 | weight_filler { 229 | type: "xavier" 230 | } 231 | bias_filler { 232 | type: "constant" 233 | value: 0.2 234 | } 235 | } 236 | } 237 | layer { 238 | name: "inception_1a/relu_double_3x3_reduce" 239 | type: "ReLU" 240 | bottom: "inception_1a/double_3x3_reduce" 241 | top: "inception_1a/double_3x3_reduce" 242 | } 243 | layer { 244 | name: "inception_1a/double_3x3_1" 245 | type: "Convolution" 246 | bottom: "inception_1a/double_3x3_reduce" 247 | top: "inception_1a/double_3x3_1" 248 | param { 249 | lr_mult: 1.0 250 | decay_mult: 1.0 251 | } 252 | param { 253 | lr_mult: 2.0 254 | decay_mult: 0.0 255 | } 256 | convolution_param { 257 | num_output: 64 258 | pad: 1 259 | kernel_size: 3 260 | weight_filler { 261 | type: "xavier" 262 | } 263 | bias_filler { 264 | type: "constant" 265 | value: 0.2 266 | } 267 | } 268 | } 269 | layer { 270 | name: "inception_1a/relu_double_3x3_1" 271 | type: "ReLU" 272 | bottom: "inception_1a/double_3x3_1" 273 | top: "inception_1a/double_3x3_1" 274 | } 275 | layer { 276 | name: "inception_1a/double_3x3_2" 277 | type: "Convolution" 278 | bottom: "inception_1a/double_3x3_1" 279 | top: "inception_1a/double_3x3_2" 280 | param { 281 | lr_mult: 1.0 282 | decay_mult: 1.0 283 | } 284 | param { 285 | lr_mult: 2.0 286 | decay_mult: 0.0 287 | } 288 | convolution_param { 289 | num_output: 64 290 | pad: 1 291 | kernel_size: 3 292 | weight_filler { 293 | type: "xavier" 294 | } 295 | bias_filler { 296 | type: "constant" 297 | value: 0.2 298 | } 299 | } 300 | } 301 | layer { 302 | name: "inception_1a/relu_double_3x3_2" 303 | type: "ReLU" 304 | bottom: "inception_1a/double_3x3_2" 305 | top: "inception_1a/double_3x3_2" 306 | } 307 | layer { 308 | name: "inception_1a/pool" 309 | type: "Pooling" 310 | bottom: "pool1" 311 | top: "inception_1a/pool" 312 | pooling_param { 313 | pool: AVE 314 | kernel_size: 3 315 | stride: 1 316 | pad: 1 317 | } 318 | } 319 | layer { 320 | name: "inception_1a/pool_proj" 321 | type: "Convolution" 322 | bottom: "inception_1a/pool" 323 | top: "inception_1a/pool_proj" 324 | param { 325 | lr_mult: 1.0 326 | decay_mult: 1.0 327 | } 328 | param { 329 | lr_mult: 2.0 330 | decay_mult: 0.0 331 | } 332 | convolution_param { 333 | num_output: 64 334 | kernel_size: 1 335 | weight_filler { 336 | type: "xavier" 337 | } 338 | bias_filler { 339 | type: "constant" 340 | value: 0.2 341 | } 342 | } 343 | } 344 | layer { 345 | name: "inception_1a/relu_pool_proj" 346 | type: "ReLU" 347 | bottom: "inception_1a/pool_proj" 348 | top: "inception_1a/pool_proj" 349 | } 350 | layer { 351 | name: "inception_1a/output" 352 | type: "Concat" 353 | bottom: "inception_1a/1x1" 354 | bottom: "inception_1a/3x3" 355 | bottom: "inception_1a/double_3x3_2" 356 | bottom: "inception_1a/pool_proj" 357 | top: "inception_1a/output" 358 | } 359 | layer { 360 | name: "inception_1b/3x3_reduce" 361 | type: "Convolution" 362 | bottom: "inception_1a/output" 363 | top: "inception_1b/3x3_reduce" 364 | param { 365 | lr_mult: 1.0 366 | decay_mult: 1.0 367 | } 368 | param { 369 | lr_mult: 2.0 370 | decay_mult: 0.0 371 | } 372 | convolution_param { 373 | num_output: 64 374 | kernel_size: 1 375 | weight_filler { 376 | type: "xavier" 377 | } 378 | bias_filler { 379 | type: "constant" 380 | value: 0.2 381 | } 382 | } 383 | } 384 | layer { 385 | name: "inception_1b/relu_3x3_reduce" 386 | type: "ReLU" 387 | bottom: "inception_1b/3x3_reduce" 388 | top: "inception_1b/3x3_reduce" 389 | } 390 | layer { 391 | name: "inception_1b/3x3" 392 | type: "Convolution" 393 | bottom: "inception_1b/3x3_reduce" 394 | top: "inception_1b/3x3" 395 | param { 396 | lr_mult: 1.0 397 | decay_mult: 1.0 398 | } 399 | param { 400 | lr_mult: 2.0 401 | decay_mult: 0.0 402 | } 403 | convolution_param { 404 | num_output: 64 405 | pad: 1 406 | kernel_size: 3 407 | stride: 2 408 | weight_filler { 409 | type: "xavier" 410 | } 411 | bias_filler { 412 | type: "constant" 413 | value: 0.2 414 | } 415 | } 416 | } 417 | layer { 418 | name: "inception_1b/relu_3x3" 419 | type: "ReLU" 420 | bottom: "inception_1b/3x3" 421 | top: "inception_1b/3x3" 422 | } 423 | layer { 424 | name: "inception_1b/double_3x3_reduce" 425 | type: "Convolution" 426 | bottom: "inception_1a/output" 427 | top: "inception_1b/double_3x3_reduce" 428 | param { 429 | lr_mult: 1.0 430 | decay_mult: 1.0 431 | } 432 | param { 433 | lr_mult: 2.0 434 | decay_mult: 0.0 435 | } 436 | convolution_param { 437 | num_output: 64 438 | kernel_size: 1 439 | weight_filler { 440 | type: "xavier" 441 | } 442 | bias_filler { 443 | type: "constant" 444 | value: 0.2 445 | } 446 | } 447 | } 448 | layer { 449 | name: "inception_1b/relu_double_3x3_reduce" 450 | type: "ReLU" 451 | bottom: "inception_1b/double_3x3_reduce" 452 | top: "inception_1b/double_3x3_reduce" 453 | } 454 | layer { 455 | name: "inception_1b/double_3x3_1" 456 | type: "Convolution" 457 | bottom: "inception_1b/double_3x3_reduce" 458 | top: "inception_1b/double_3x3_1" 459 | param { 460 | lr_mult: 1.0 461 | decay_mult: 1.0 462 | } 463 | param { 464 | lr_mult: 2.0 465 | decay_mult: 0.0 466 | } 467 | convolution_param { 468 | num_output: 64 469 | pad: 1 470 | kernel_size: 3 471 | weight_filler { 472 | type: "xavier" 473 | } 474 | bias_filler { 475 | type: "constant" 476 | value: 0.2 477 | } 478 | } 479 | } 480 | layer { 481 | name: "inception_1b/relu_double_3x3_1" 482 | type: "ReLU" 483 | bottom: "inception_1b/double_3x3_1" 484 | top: "inception_1b/double_3x3_1" 485 | } 486 | layer { 487 | name: "inception_1b/double_3x3_2" 488 | type: "Convolution" 489 | bottom: "inception_1b/double_3x3_1" 490 | top: "inception_1b/double_3x3_2" 491 | param { 492 | lr_mult: 1.0 493 | decay_mult: 1.0 494 | } 495 | param { 496 | lr_mult: 2.0 497 | decay_mult: 0.0 498 | } 499 | convolution_param { 500 | num_output: 64 501 | pad: 1 502 | kernel_size: 3 503 | stride: 2 504 | weight_filler { 505 | type: "xavier" 506 | } 507 | bias_filler { 508 | type: "constant" 509 | value: 0.2 510 | } 511 | } 512 | } 513 | layer { 514 | name: "inception_1b/relu_double_3x3_2" 515 | type: "ReLU" 516 | bottom: "inception_1b/double_3x3_2" 517 | top: "inception_1b/double_3x3_2" 518 | } 519 | layer { 520 | name: "inception_1b/pool" 521 | type: "Pooling" 522 | bottom: "inception_1a/output" 523 | top: "inception_1b/pool" 524 | pooling_param { 525 | pool: MAX 526 | kernel_size: 3 527 | stride: 2 528 | } 529 | } 530 | layer { 531 | name: "inception_1b/output" 532 | type: "Concat" 533 | bottom: "inception_1b/3x3" 534 | bottom: "inception_1b/double_3x3_2" 535 | bottom: "inception_1b/pool" 536 | top: "inception_1b/output" 537 | } 538 | layer { 539 | name: "inception_2a/1x1" 540 | type: "Convolution" 541 | bottom: "inception_1b/output" 542 | top: "inception_2a/1x1" 543 | param { 544 | lr_mult: 1.0 545 | decay_mult: 1.0 546 | } 547 | param { 548 | lr_mult: 2.0 549 | decay_mult: 0.0 550 | } 551 | convolution_param { 552 | num_output: 128 553 | kernel_size: 1 554 | weight_filler { 555 | type: "xavier" 556 | } 557 | bias_filler { 558 | type: "constant" 559 | value: 0.2 560 | } 561 | } 562 | } 563 | layer { 564 | name: "inception_2a/relu_1x1" 565 | type: "ReLU" 566 | bottom: "inception_2a/1x1" 567 | top: "inception_2a/1x1" 568 | } 569 | layer { 570 | name: "inception_2a/3x3_reduce" 571 | type: "Convolution" 572 | bottom: "inception_1b/output" 573 | top: "inception_2a/3x3_reduce" 574 | param { 575 | lr_mult: 1.0 576 | decay_mult: 1.0 577 | } 578 | param { 579 | lr_mult: 2.0 580 | decay_mult: 0.0 581 | } 582 | convolution_param { 583 | num_output: 128 584 | kernel_size: 1 585 | weight_filler { 586 | type: "xavier" 587 | } 588 | bias_filler { 589 | type: "constant" 590 | value: 0.2 591 | } 592 | } 593 | } 594 | layer { 595 | name: "inception_2a/relu_3x3_reduce" 596 | type: "ReLU" 597 | bottom: "inception_2a/3x3_reduce" 598 | top: "inception_2a/3x3_reduce" 599 | } 600 | layer { 601 | name: "inception_2a/3x3" 602 | type: "Convolution" 603 | bottom: "inception_2a/3x3_reduce" 604 | top: "inception_2a/3x3" 605 | param { 606 | lr_mult: 1.0 607 | decay_mult: 1.0 608 | } 609 | param { 610 | lr_mult: 2.0 611 | decay_mult: 0.0 612 | } 613 | convolution_param { 614 | num_output: 128 615 | pad: 1 616 | kernel_size: 3 617 | weight_filler { 618 | type: "xavier" 619 | } 620 | bias_filler { 621 | type: "constant" 622 | value: 0.2 623 | } 624 | } 625 | } 626 | layer { 627 | name: "inception_2a/relu_3x3" 628 | type: "ReLU" 629 | bottom: "inception_2a/3x3" 630 | top: "inception_2a/3x3" 631 | } 632 | layer { 633 | name: "inception_2a/double_3x3_reduce" 634 | type: "Convolution" 635 | bottom: "inception_1b/output" 636 | top: "inception_2a/double_3x3_reduce" 637 | param { 638 | lr_mult: 1.0 639 | decay_mult: 1.0 640 | } 641 | param { 642 | lr_mult: 2.0 643 | decay_mult: 0.0 644 | } 645 | convolution_param { 646 | num_output: 128 647 | kernel_size: 1 648 | weight_filler { 649 | type: "xavier" 650 | } 651 | bias_filler { 652 | type: "constant" 653 | value: 0.2 654 | } 655 | } 656 | } 657 | layer { 658 | name: "inception_2a/relu_double_3x3_reduce" 659 | type: "ReLU" 660 | bottom: "inception_2a/double_3x3_reduce" 661 | top: "inception_2a/double_3x3_reduce" 662 | } 663 | layer { 664 | name: "inception_2a/double_3x3_1" 665 | type: "Convolution" 666 | bottom: "inception_2a/double_3x3_reduce" 667 | top: "inception_2a/double_3x3_1" 668 | param { 669 | lr_mult: 1.0 670 | decay_mult: 1.0 671 | } 672 | param { 673 | lr_mult: 2.0 674 | decay_mult: 0.0 675 | } 676 | convolution_param { 677 | num_output: 128 678 | pad: 1 679 | kernel_size: 3 680 | weight_filler { 681 | type: "xavier" 682 | } 683 | bias_filler { 684 | type: "constant" 685 | value: 0.2 686 | } 687 | } 688 | } 689 | layer { 690 | name: "inception_2a/relu_double_3x3_1" 691 | type: "ReLU" 692 | bottom: "inception_2a/double_3x3_1" 693 | top: "inception_2a/double_3x3_1" 694 | } 695 | layer { 696 | name: "inception_2a/double_3x3_2" 697 | type: "Convolution" 698 | bottom: "inception_2a/double_3x3_1" 699 | top: "inception_2a/double_3x3_2" 700 | param { 701 | lr_mult: 1.0 702 | decay_mult: 1.0 703 | } 704 | param { 705 | lr_mult: 2.0 706 | decay_mult: 0.0 707 | } 708 | convolution_param { 709 | num_output: 128 710 | pad: 1 711 | kernel_size: 3 712 | weight_filler { 713 | type: "xavier" 714 | } 715 | bias_filler { 716 | type: "constant" 717 | value: 0.2 718 | } 719 | } 720 | } 721 | layer { 722 | name: "inception_2a/relu_double_3x3_2" 723 | type: "ReLU" 724 | bottom: "inception_2a/double_3x3_2" 725 | top: "inception_2a/double_3x3_2" 726 | } 727 | layer { 728 | name: "inception_2a/pool" 729 | type: "Pooling" 730 | bottom: "inception_1b/output" 731 | top: "inception_2a/pool" 732 | pooling_param { 733 | pool: AVE 734 | kernel_size: 3 735 | stride: 1 736 | pad: 1 737 | } 738 | } 739 | layer { 740 | name: "inception_2a/pool_proj" 741 | type: "Convolution" 742 | bottom: "inception_2a/pool" 743 | top: "inception_2a/pool_proj" 744 | param { 745 | lr_mult: 1.0 746 | decay_mult: 1.0 747 | } 748 | param { 749 | lr_mult: 2.0 750 | decay_mult: 0.0 751 | } 752 | convolution_param { 753 | num_output: 128 754 | kernel_size: 1 755 | weight_filler { 756 | type: "xavier" 757 | } 758 | bias_filler { 759 | type: "constant" 760 | value: 0.2 761 | } 762 | } 763 | } 764 | layer { 765 | name: "inception_2a/relu_pool_proj" 766 | type: "ReLU" 767 | bottom: "inception_2a/pool_proj" 768 | top: "inception_2a/pool_proj" 769 | } 770 | layer { 771 | name: "inception_2a/output" 772 | type: "Concat" 773 | bottom: "inception_2a/1x1" 774 | bottom: "inception_2a/3x3" 775 | bottom: "inception_2a/double_3x3_2" 776 | bottom: "inception_2a/pool_proj" 777 | top: "inception_2a/output" 778 | } 779 | layer { 780 | name: "inception_2b/3x3_reduce" 781 | type: "Convolution" 782 | bottom: "inception_2a/output" 783 | top: "inception_2b/3x3_reduce" 784 | param { 785 | lr_mult: 1.0 786 | decay_mult: 1.0 787 | } 788 | param { 789 | lr_mult: 2.0 790 | decay_mult: 0.0 791 | } 792 | convolution_param { 793 | num_output: 128 794 | kernel_size: 1 795 | weight_filler { 796 | type: "xavier" 797 | } 798 | bias_filler { 799 | type: "constant" 800 | value: 0.2 801 | } 802 | } 803 | } 804 | layer { 805 | name: "inception_2b/relu_3x3_reduce" 806 | type: "ReLU" 807 | bottom: "inception_2b/3x3_reduce" 808 | top: "inception_2b/3x3_reduce" 809 | } 810 | layer { 811 | name: "inception_2b/3x3" 812 | type: "Convolution" 813 | bottom: "inception_2b/3x3_reduce" 814 | top: "inception_2b/3x3" 815 | param { 816 | lr_mult: 1.0 817 | decay_mult: 1.0 818 | } 819 | param { 820 | lr_mult: 2.0 821 | decay_mult: 0.0 822 | } 823 | convolution_param { 824 | num_output: 128 825 | pad: 1 826 | kernel_size: 3 827 | stride: 2 828 | weight_filler { 829 | type: "xavier" 830 | } 831 | bias_filler { 832 | type: "constant" 833 | value: 0.2 834 | } 835 | } 836 | } 837 | layer { 838 | name: "inception_2b/relu_3x3" 839 | type: "ReLU" 840 | bottom: "inception_2b/3x3" 841 | top: "inception_2b/3x3" 842 | } 843 | layer { 844 | name: "inception_2b/double_3x3_reduce" 845 | type: "Convolution" 846 | bottom: "inception_2a/output" 847 | top: "inception_2b/double_3x3_reduce" 848 | param { 849 | lr_mult: 1.0 850 | decay_mult: 1.0 851 | } 852 | param { 853 | lr_mult: 2.0 854 | decay_mult: 0.0 855 | } 856 | convolution_param { 857 | num_output: 128 858 | kernel_size: 1 859 | weight_filler { 860 | type: "xavier" 861 | } 862 | bias_filler { 863 | type: "constant" 864 | value: 0.2 865 | } 866 | } 867 | } 868 | layer { 869 | name: "inception_2b/relu_double_3x3_reduce" 870 | type: "ReLU" 871 | bottom: "inception_2b/double_3x3_reduce" 872 | top: "inception_2b/double_3x3_reduce" 873 | } 874 | layer { 875 | name: "inception_2b/double_3x3_1" 876 | type: "Convolution" 877 | bottom: "inception_2b/double_3x3_reduce" 878 | top: "inception_2b/double_3x3_1" 879 | param { 880 | lr_mult: 1.0 881 | decay_mult: 1.0 882 | } 883 | param { 884 | lr_mult: 2.0 885 | decay_mult: 0.0 886 | } 887 | convolution_param { 888 | num_output: 128 889 | pad: 1 890 | kernel_size: 3 891 | weight_filler { 892 | type: "xavier" 893 | } 894 | bias_filler { 895 | type: "constant" 896 | value: 0.2 897 | } 898 | } 899 | } 900 | layer { 901 | name: "inception_2b/relu_double_3x3_1" 902 | type: "ReLU" 903 | bottom: "inception_2b/double_3x3_1" 904 | top: "inception_2b/double_3x3_1" 905 | } 906 | layer { 907 | name: "inception_2b/double_3x3_2" 908 | type: "Convolution" 909 | bottom: "inception_2b/double_3x3_1" 910 | top: "inception_2b/double_3x3_2" 911 | param { 912 | lr_mult: 1.0 913 | decay_mult: 1.0 914 | } 915 | param { 916 | lr_mult: 2.0 917 | decay_mult: 0.0 918 | } 919 | convolution_param { 920 | num_output: 128 921 | pad: 1 922 | kernel_size: 3 923 | stride: 2 924 | weight_filler { 925 | type: "xavier" 926 | } 927 | bias_filler { 928 | type: "constant" 929 | value: 0.2 930 | } 931 | } 932 | } 933 | layer { 934 | name: "inception_2b/relu_double_3x3_2" 935 | type: "ReLU" 936 | bottom: "inception_2b/double_3x3_2" 937 | top: "inception_2b/double_3x3_2" 938 | } 939 | layer { 940 | name: "inception_2b/pool" 941 | type: "Pooling" 942 | bottom: "inception_2a/output" 943 | top: "inception_2b/pool" 944 | pooling_param { 945 | pool: MAX 946 | kernel_size: 3 947 | stride: 2 948 | } 949 | } 950 | layer { 951 | name: "inception_2b/output" 952 | type: "Concat" 953 | bottom: "inception_2b/3x3" 954 | bottom: "inception_2b/double_3x3_2" 955 | bottom: "inception_2b/pool" 956 | top: "inception_2b/output" 957 | } 958 | layer { 959 | name: "inception_3a/1x1" 960 | type: "Convolution" 961 | bottom: "inception_2b/output" 962 | top: "inception_3a/1x1" 963 | param { 964 | lr_mult: 1.0 965 | decay_mult: 1.0 966 | } 967 | param { 968 | lr_mult: 2.0 969 | decay_mult: 0.0 970 | } 971 | convolution_param { 972 | num_output: 256 973 | kernel_size: 1 974 | weight_filler { 975 | type: "xavier" 976 | } 977 | bias_filler { 978 | type: "constant" 979 | value: 0.2 980 | } 981 | } 982 | } 983 | layer { 984 | name: "inception_3a/relu_1x1" 985 | type: "ReLU" 986 | bottom: "inception_3a/1x1" 987 | top: "inception_3a/1x1" 988 | } 989 | layer { 990 | name: "inception_3a/3x3_reduce" 991 | type: "Convolution" 992 | bottom: "inception_2b/output" 993 | top: "inception_3a/3x3_reduce" 994 | param { 995 | lr_mult: 1.0 996 | decay_mult: 1.0 997 | } 998 | param { 999 | lr_mult: 2.0 1000 | decay_mult: 0.0 1001 | } 1002 | convolution_param { 1003 | num_output: 256 1004 | kernel_size: 1 1005 | weight_filler { 1006 | type: "xavier" 1007 | } 1008 | bias_filler { 1009 | type: "constant" 1010 | value: 0.2 1011 | } 1012 | } 1013 | } 1014 | layer { 1015 | name: "inception_3a/relu_3x3_reduce" 1016 | type: "ReLU" 1017 | bottom: "inception_3a/3x3_reduce" 1018 | top: "inception_3a/3x3_reduce" 1019 | } 1020 | layer { 1021 | name: "inception_3a/3x3" 1022 | type: "Convolution" 1023 | bottom: "inception_3a/3x3_reduce" 1024 | top: "inception_3a/3x3" 1025 | param { 1026 | lr_mult: 1.0 1027 | decay_mult: 1.0 1028 | } 1029 | param { 1030 | lr_mult: 2.0 1031 | decay_mult: 0.0 1032 | } 1033 | convolution_param { 1034 | num_output: 256 1035 | pad: 1 1036 | kernel_size: 3 1037 | weight_filler { 1038 | type: "xavier" 1039 | } 1040 | bias_filler { 1041 | type: "constant" 1042 | value: 0.2 1043 | } 1044 | } 1045 | } 1046 | layer { 1047 | name: "inception_3a/relu_3x3" 1048 | type: "ReLU" 1049 | bottom: "inception_3a/3x3" 1050 | top: "inception_3a/3x3" 1051 | } 1052 | layer { 1053 | name: "inception_3a/double_3x3_reduce" 1054 | type: "Convolution" 1055 | bottom: "inception_2b/output" 1056 | top: "inception_3a/double_3x3_reduce" 1057 | param { 1058 | lr_mult: 1.0 1059 | decay_mult: 1.0 1060 | } 1061 | param { 1062 | lr_mult: 2.0 1063 | decay_mult: 0.0 1064 | } 1065 | convolution_param { 1066 | num_output: 256 1067 | kernel_size: 1 1068 | weight_filler { 1069 | type: "xavier" 1070 | } 1071 | bias_filler { 1072 | type: "constant" 1073 | value: 0.2 1074 | } 1075 | } 1076 | } 1077 | layer { 1078 | name: "inception_3a/relu_double_3x3_reduce" 1079 | type: "ReLU" 1080 | bottom: "inception_3a/double_3x3_reduce" 1081 | top: "inception_3a/double_3x3_reduce" 1082 | } 1083 | layer { 1084 | name: "inception_3a/double_3x3_1" 1085 | type: "Convolution" 1086 | bottom: "inception_3a/double_3x3_reduce" 1087 | top: "inception_3a/double_3x3_1" 1088 | param { 1089 | lr_mult: 1.0 1090 | decay_mult: 1.0 1091 | } 1092 | param { 1093 | lr_mult: 2.0 1094 | decay_mult: 0.0 1095 | } 1096 | convolution_param { 1097 | num_output: 256 1098 | pad: 1 1099 | kernel_size: 3 1100 | weight_filler { 1101 | type: "xavier" 1102 | } 1103 | bias_filler { 1104 | type: "constant" 1105 | value: 0.2 1106 | } 1107 | } 1108 | } 1109 | layer { 1110 | name: "inception_3a/relu_double_3x3_1" 1111 | type: "ReLU" 1112 | bottom: "inception_3a/double_3x3_1" 1113 | top: "inception_3a/double_3x3_1" 1114 | } 1115 | layer { 1116 | name: "inception_3a/double_3x3_2" 1117 | type: "Convolution" 1118 | bottom: "inception_3a/double_3x3_1" 1119 | top: "inception_3a/double_3x3_2" 1120 | param { 1121 | lr_mult: 1.0 1122 | decay_mult: 1.0 1123 | } 1124 | param { 1125 | lr_mult: 2.0 1126 | decay_mult: 0.0 1127 | } 1128 | convolution_param { 1129 | num_output: 256 1130 | pad: 1 1131 | kernel_size: 3 1132 | weight_filler { 1133 | type: "xavier" 1134 | } 1135 | bias_filler { 1136 | type: "constant" 1137 | value: 0.2 1138 | } 1139 | } 1140 | } 1141 | layer { 1142 | name: "inception_3a/relu_double_3x3_2" 1143 | type: "ReLU" 1144 | bottom: "inception_3a/double_3x3_2" 1145 | top: "inception_3a/double_3x3_2" 1146 | } 1147 | layer { 1148 | name: "inception_3a/pool" 1149 | type: "Pooling" 1150 | bottom: "inception_2b/output" 1151 | top: "inception_3a/pool" 1152 | pooling_param { 1153 | pool: AVE 1154 | kernel_size: 3 1155 | stride: 1 1156 | pad: 1 1157 | } 1158 | } 1159 | layer { 1160 | name: "inception_3a/pool_proj" 1161 | type: "Convolution" 1162 | bottom: "inception_3a/pool" 1163 | top: "inception_3a/pool_proj" 1164 | param { 1165 | lr_mult: 1.0 1166 | decay_mult: 1.0 1167 | } 1168 | param { 1169 | lr_mult: 2.0 1170 | decay_mult: 0.0 1171 | } 1172 | convolution_param { 1173 | num_output: 256 1174 | kernel_size: 1 1175 | weight_filler { 1176 | type: "xavier" 1177 | } 1178 | bias_filler { 1179 | type: "constant" 1180 | value: 0.2 1181 | } 1182 | } 1183 | } 1184 | layer { 1185 | name: "inception_3a/relu_pool_proj" 1186 | type: "ReLU" 1187 | bottom: "inception_3a/pool_proj" 1188 | top: "inception_3a/pool_proj" 1189 | } 1190 | layer { 1191 | name: "inception_3a/output" 1192 | type: "Concat" 1193 | bottom: "inception_3a/1x1" 1194 | bottom: "inception_3a/3x3" 1195 | bottom: "inception_3a/double_3x3_2" 1196 | bottom: "inception_3a/pool_proj" 1197 | top: "inception_3a/output" 1198 | } 1199 | layer { 1200 | name: "inception_3b/3x3_reduce" 1201 | type: "Convolution" 1202 | bottom: "inception_3a/output" 1203 | top: "inception_3b/3x3_reduce" 1204 | param { 1205 | lr_mult: 1.0 1206 | decay_mult: 1.0 1207 | } 1208 | param { 1209 | lr_mult: 2.0 1210 | decay_mult: 0.0 1211 | } 1212 | convolution_param { 1213 | num_output: 256 1214 | kernel_size: 1 1215 | weight_filler { 1216 | type: "xavier" 1217 | } 1218 | bias_filler { 1219 | type: "constant" 1220 | value: 0.2 1221 | } 1222 | } 1223 | } 1224 | layer { 1225 | name: "inception_3b/relu_3x3_reduce" 1226 | type: "ReLU" 1227 | bottom: "inception_3b/3x3_reduce" 1228 | top: "inception_3b/3x3_reduce" 1229 | } 1230 | layer { 1231 | name: "inception_3b/3x3" 1232 | type: "Convolution" 1233 | bottom: "inception_3b/3x3_reduce" 1234 | top: "inception_3b/3x3" 1235 | param { 1236 | lr_mult: 1.0 1237 | decay_mult: 1.0 1238 | } 1239 | param { 1240 | lr_mult: 2.0 1241 | decay_mult: 0.0 1242 | } 1243 | convolution_param { 1244 | num_output: 256 1245 | pad: 1 1246 | kernel_size: 3 1247 | stride: 2 1248 | weight_filler { 1249 | type: "xavier" 1250 | } 1251 | bias_filler { 1252 | type: "constant" 1253 | value: 0.2 1254 | } 1255 | } 1256 | } 1257 | layer { 1258 | name: "inception_3b/relu_3x3" 1259 | type: "ReLU" 1260 | bottom: "inception_3b/3x3" 1261 | top: "inception_3b/3x3" 1262 | } 1263 | layer { 1264 | name: "inception_3b/double_3x3_reduce" 1265 | type: "Convolution" 1266 | bottom: "inception_3a/output" 1267 | top: "inception_3b/double_3x3_reduce" 1268 | param { 1269 | lr_mult: 1.0 1270 | decay_mult: 1.0 1271 | } 1272 | param { 1273 | lr_mult: 2.0 1274 | decay_mult: 0.0 1275 | } 1276 | convolution_param { 1277 | num_output: 256 1278 | kernel_size: 1 1279 | weight_filler { 1280 | type: "xavier" 1281 | } 1282 | bias_filler { 1283 | type: "constant" 1284 | value: 0.2 1285 | } 1286 | } 1287 | } 1288 | layer { 1289 | name: "inception_3b/relu_double_3x3_reduce" 1290 | type: "ReLU" 1291 | bottom: "inception_3b/double_3x3_reduce" 1292 | top: "inception_3b/double_3x3_reduce" 1293 | } 1294 | layer { 1295 | name: "inception_3b/double_3x3_1" 1296 | type: "Convolution" 1297 | bottom: "inception_3b/double_3x3_reduce" 1298 | top: "inception_3b/double_3x3_1" 1299 | param { 1300 | lr_mult: 1.0 1301 | decay_mult: 1.0 1302 | } 1303 | param { 1304 | lr_mult: 2.0 1305 | decay_mult: 0.0 1306 | } 1307 | convolution_param { 1308 | num_output: 256 1309 | pad: 1 1310 | kernel_size: 3 1311 | weight_filler { 1312 | type: "xavier" 1313 | } 1314 | bias_filler { 1315 | type: "constant" 1316 | value: 0.2 1317 | } 1318 | } 1319 | } 1320 | layer { 1321 | name: "inception_3b/relu_double_3x3_1" 1322 | type: "ReLU" 1323 | bottom: "inception_3b/double_3x3_1" 1324 | top: "inception_3b/double_3x3_1" 1325 | } 1326 | layer { 1327 | name: "inception_3b/double_3x3_2" 1328 | type: "Convolution" 1329 | bottom: "inception_3b/double_3x3_1" 1330 | top: "inception_3b/double_3x3_2" 1331 | param { 1332 | lr_mult: 1.0 1333 | decay_mult: 1.0 1334 | } 1335 | param { 1336 | lr_mult: 2.0 1337 | decay_mult: 0.0 1338 | } 1339 | convolution_param { 1340 | num_output: 256 1341 | pad: 1 1342 | kernel_size: 3 1343 | stride: 2 1344 | weight_filler { 1345 | type: "xavier" 1346 | } 1347 | bias_filler { 1348 | type: "constant" 1349 | value: 0.2 1350 | } 1351 | } 1352 | } 1353 | layer { 1354 | name: "inception_3b/relu_double_3x3_2" 1355 | type: "ReLU" 1356 | bottom: "inception_3b/double_3x3_2" 1357 | top: "inception_3b/double_3x3_2" 1358 | } 1359 | layer { 1360 | name: "inception_3b/pool" 1361 | type: "Pooling" 1362 | bottom: "inception_3a/output" 1363 | top: "inception_3b/pool" 1364 | pooling_param { 1365 | pool: MAX 1366 | kernel_size: 3 1367 | stride: 2 1368 | pad_h: 0 1369 | pad_w: 1 1370 | } 1371 | } 1372 | layer { 1373 | name: "inception_3b/output" 1374 | type: "Concat" 1375 | bottom: "inception_3b/3x3" 1376 | bottom: "inception_3b/double_3x3_2" 1377 | bottom: "inception_3b/pool" 1378 | top: "inception_3b/output" 1379 | } 1380 | layer { 1381 | name: "global_pool" 1382 | type: "Pooling" 1383 | bottom: "inception_3b/output" 1384 | top: "global_pool" 1385 | pooling_param { 1386 | pool: AVE 1387 | stride: 1 1388 | kernel_h: 9 1389 | kernel_w: 4 1390 | } 1391 | } 1392 | layer { 1393 | name: "fc7" 1394 | type: "InnerProduct" 1395 | bottom: "global_pool" 1396 | top: "fc7" 1397 | param { 1398 | lr_mult: 1.0 1399 | decay_mult: 1.0 1400 | } 1401 | param { 1402 | lr_mult: 2.0 1403 | decay_mult: 0.0 1404 | } 1405 | inner_product_param { 1406 | num_output: 256 1407 | weight_filler { 1408 | type: "xavier" 1409 | } 1410 | bias_filler { 1411 | type: "constant" 1412 | value: 0.0 1413 | } 1414 | } 1415 | } 1416 | layer { 1417 | name: "relu7" 1418 | type: "ReLU" 1419 | bottom: "fc7" 1420 | top: "fc7" 1421 | } 1422 | layer { 1423 | name: "drop7" 1424 | type: "Dropout" 1425 | bottom: "fc7" 1426 | top: "fc7" 1427 | dropout_param { 1428 | dropout_ratio: 0.5 1429 | } 1430 | } 1431 | layer { 1432 | name: "fc8_jstl" 1433 | type: "InnerProduct" 1434 | bottom: "fc7" 1435 | top: "fc8_jstl" 1436 | param { 1437 | lr_mult: 1.0 1438 | decay_mult: 1.0 1439 | } 1440 | param { 1441 | lr_mult: 2.0 1442 | decay_mult: 0.0 1443 | } 1444 | inner_product_param { 1445 | num_output: 2629 1446 | weight_filler { 1447 | type: "gaussian" 1448 | std: 0.001 1449 | } 1450 | bias_filler { 1451 | type: "constant" 1452 | value: 0.0 1453 | } 1454 | } 1455 | } 1456 | -------------------------------------------------------------------------------- /models/fc_only/3dpes_trainval.prototxt: -------------------------------------------------------------------------------- 1 | name: "3DPeS" 2 | richness: 1000 3 | 4 | ##################################### data ##################################### 5 | layer { name: "data" type: "Data" top: "data" top: "label" 6 | data_param { backend: LMDB batch_size: 100 shuffle_pool_size: 10 7 | source: "external/exp/db/3dpes/train_lmdb" } 8 | transform_param { mirror: true crop_height: 144 crop_width: 56 9 | mean_value: 102 mean_value: 102 mean_value: 101 } 10 | include: { phase: TRAIN } } 11 | layer { name: "data" type: "Data" top: "data" top: "label" 12 | data_param { backend: LMDB batch_size: 20 13 | source: "external/exp/db/3dpes/val_lmdb" } 14 | transform_param { mirror: false crop_height: 144 crop_width: 56 15 | mean_value: 102 mean_value: 102 mean_value: 101 } 16 | include: { phase: TEST } } 17 | 18 | ################################ fixed layers ################################## 19 | layer { 20 | name: "conv1" 21 | type: "Convolution" 22 | bottom: "data" 23 | top: "conv1" 24 | param { 25 | lr_mult: 0 26 | decay_mult: 0 27 | } 28 | param { 29 | lr_mult: 0 30 | decay_mult: 0 31 | } 32 | convolution_param { 33 | num_output: 32 34 | pad: 1 35 | kernel_size: 3 36 | stride: 1 37 | weight_filler { 38 | type: "xavier" 39 | } 40 | bias_filler { 41 | type: "constant" 42 | value: 0.2 43 | } 44 | } 45 | } 46 | layer { 47 | name: "relu1" 48 | type: "ReLU" 49 | bottom: "conv1" 50 | top: "conv1" 51 | } 52 | layer { 53 | name: "conv2" 54 | type: "Convolution" 55 | bottom: "conv1" 56 | top: "conv2" 57 | param { 58 | lr_mult: 0 59 | decay_mult: 0 60 | } 61 | param { 62 | lr_mult: 0 63 | decay_mult: 0 64 | } 65 | convolution_param { 66 | num_output: 32 67 | pad: 1 68 | kernel_size: 3 69 | stride: 1 70 | weight_filler { 71 | type: "xavier" 72 | } 73 | bias_filler { 74 | type: "constant" 75 | value: 0.2 76 | } 77 | } 78 | } 79 | layer { 80 | name: "relu2" 81 | type: "ReLU" 82 | bottom: "conv2" 83 | top: "conv2" 84 | } 85 | layer { 86 | name: "conv3" 87 | type: "Convolution" 88 | bottom: "conv2" 89 | top: "conv3" 90 | param { 91 | lr_mult: 0 92 | decay_mult: 0 93 | } 94 | param { 95 | lr_mult: 0 96 | decay_mult: 0 97 | } 98 | convolution_param { 99 | num_output: 32 100 | pad: 1 101 | kernel_size: 3 102 | stride: 1 103 | weight_filler { 104 | type: "xavier" 105 | } 106 | bias_filler { 107 | type: "constant" 108 | value: 0.2 109 | } 110 | } 111 | } 112 | layer { 113 | name: "relu3" 114 | type: "ReLU" 115 | bottom: "conv3" 116 | top: "conv3" 117 | } 118 | layer { 119 | name: "pool1" 120 | type: "Pooling" 121 | bottom: "conv3" 122 | top: "pool1" 123 | pooling_param { 124 | pool: MAX 125 | kernel_size: 2 126 | stride: 2 127 | } 128 | } 129 | layer { 130 | name: "inception_1a/1x1" 131 | type: "Convolution" 132 | bottom: "pool1" 133 | top: "inception_1a/1x1" 134 | param { 135 | lr_mult: 0 136 | decay_mult: 0 137 | } 138 | param { 139 | lr_mult: 0 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 64 144 | kernel_size: 1 145 | weight_filler { 146 | type: "xavier" 147 | } 148 | bias_filler { 149 | type: "constant" 150 | value: 0.2 151 | } 152 | } 153 | } 154 | layer { 155 | name: "inception_1a/relu_1x1" 156 | type: "ReLU" 157 | bottom: "inception_1a/1x1" 158 | top: "inception_1a/1x1" 159 | } 160 | layer { 161 | name: "inception_1a/3x3_reduce" 162 | type: "Convolution" 163 | bottom: "pool1" 164 | top: "inception_1a/3x3_reduce" 165 | param { 166 | lr_mult: 0 167 | decay_mult: 0 168 | } 169 | param { 170 | lr_mult: 0 171 | decay_mult: 0 172 | } 173 | convolution_param { 174 | num_output: 64 175 | kernel_size: 1 176 | weight_filler { 177 | type: "xavier" 178 | } 179 | bias_filler { 180 | type: "constant" 181 | value: 0.2 182 | } 183 | } 184 | } 185 | layer { 186 | name: "inception_1a/relu_3x3_reduce" 187 | type: "ReLU" 188 | bottom: "inception_1a/3x3_reduce" 189 | top: "inception_1a/3x3_reduce" 190 | } 191 | layer { 192 | name: "inception_1a/3x3" 193 | type: "Convolution" 194 | bottom: "inception_1a/3x3_reduce" 195 | top: "inception_1a/3x3" 196 | param { 197 | lr_mult: 0 198 | decay_mult: 0 199 | } 200 | param { 201 | lr_mult: 0 202 | decay_mult: 0 203 | } 204 | convolution_param { 205 | num_output: 64 206 | pad: 1 207 | kernel_size: 3 208 | weight_filler { 209 | type: "xavier" 210 | } 211 | bias_filler { 212 | type: "constant" 213 | value: 0.2 214 | } 215 | } 216 | } 217 | layer { 218 | name: "inception_1a/relu_3x3" 219 | type: "ReLU" 220 | bottom: "inception_1a/3x3" 221 | top: "inception_1a/3x3" 222 | } 223 | layer { 224 | name: "inception_1a/double_3x3_reduce" 225 | type: "Convolution" 226 | bottom: "pool1" 227 | top: "inception_1a/double_3x3_reduce" 228 | param { 229 | lr_mult: 0 230 | decay_mult: 0 231 | } 232 | param { 233 | lr_mult: 0 234 | decay_mult: 0 235 | } 236 | convolution_param { 237 | num_output: 64 238 | kernel_size: 1 239 | weight_filler { 240 | type: "xavier" 241 | } 242 | bias_filler { 243 | type: "constant" 244 | value: 0.2 245 | } 246 | } 247 | } 248 | layer { 249 | name: "inception_1a/relu_double_3x3_reduce" 250 | type: "ReLU" 251 | bottom: "inception_1a/double_3x3_reduce" 252 | top: "inception_1a/double_3x3_reduce" 253 | } 254 | layer { 255 | name: "inception_1a/double_3x3_1" 256 | type: "Convolution" 257 | bottom: "inception_1a/double_3x3_reduce" 258 | top: "inception_1a/double_3x3_1" 259 | param { 260 | lr_mult: 0 261 | decay_mult: 0 262 | } 263 | param { 264 | lr_mult: 0 265 | decay_mult: 0 266 | } 267 | convolution_param { 268 | num_output: 64 269 | pad: 1 270 | kernel_size: 3 271 | weight_filler { 272 | type: "xavier" 273 | } 274 | bias_filler { 275 | type: "constant" 276 | value: 0.2 277 | } 278 | } 279 | } 280 | layer { 281 | name: "inception_1a/relu_double_3x3_1" 282 | type: "ReLU" 283 | bottom: "inception_1a/double_3x3_1" 284 | top: "inception_1a/double_3x3_1" 285 | } 286 | layer { 287 | name: "inception_1a/double_3x3_2" 288 | type: "Convolution" 289 | bottom: "inception_1a/double_3x3_1" 290 | top: "inception_1a/double_3x3_2" 291 | param { 292 | lr_mult: 0 293 | decay_mult: 0 294 | } 295 | param { 296 | lr_mult: 0 297 | decay_mult: 0 298 | } 299 | convolution_param { 300 | num_output: 64 301 | pad: 1 302 | kernel_size: 3 303 | weight_filler { 304 | type: "xavier" 305 | } 306 | bias_filler { 307 | type: "constant" 308 | value: 0.2 309 | } 310 | } 311 | } 312 | layer { 313 | name: "inception_1a/relu_double_3x3_2" 314 | type: "ReLU" 315 | bottom: "inception_1a/double_3x3_2" 316 | top: "inception_1a/double_3x3_2" 317 | } 318 | layer { 319 | name: "inception_1a/pool" 320 | type: "Pooling" 321 | bottom: "pool1" 322 | top: "inception_1a/pool" 323 | pooling_param { 324 | pool: AVE 325 | kernel_size: 3 326 | stride: 1 327 | pad: 1 328 | } 329 | } 330 | layer { 331 | name: "inception_1a/pool_proj" 332 | type: "Convolution" 333 | bottom: "inception_1a/pool" 334 | top: "inception_1a/pool_proj" 335 | param { 336 | lr_mult: 0 337 | decay_mult: 0 338 | } 339 | param { 340 | lr_mult: 0 341 | decay_mult: 0 342 | } 343 | convolution_param { 344 | num_output: 64 345 | kernel_size: 1 346 | weight_filler { 347 | type: "xavier" 348 | } 349 | bias_filler { 350 | type: "constant" 351 | value: 0.2 352 | } 353 | } 354 | } 355 | layer { 356 | name: "inception_1a/relu_pool_proj" 357 | type: "ReLU" 358 | bottom: "inception_1a/pool_proj" 359 | top: "inception_1a/pool_proj" 360 | } 361 | layer { 362 | name: "inception_1a/output" 363 | type: "Concat" 364 | bottom: "inception_1a/1x1" 365 | bottom: "inception_1a/3x3" 366 | bottom: "inception_1a/double_3x3_2" 367 | bottom: "inception_1a/pool_proj" 368 | top: "inception_1a/output" 369 | } 370 | layer { 371 | name: "inception_1b/3x3_reduce" 372 | type: "Convolution" 373 | bottom: "inception_1a/output" 374 | top: "inception_1b/3x3_reduce" 375 | param { 376 | lr_mult: 0 377 | decay_mult: 0 378 | } 379 | param { 380 | lr_mult: 0 381 | decay_mult: 0 382 | } 383 | convolution_param { 384 | num_output: 64 385 | kernel_size: 1 386 | weight_filler { 387 | type: "xavier" 388 | } 389 | bias_filler { 390 | type: "constant" 391 | value: 0.2 392 | } 393 | } 394 | } 395 | layer { 396 | name: "inception_1b/relu_3x3_reduce" 397 | type: "ReLU" 398 | bottom: "inception_1b/3x3_reduce" 399 | top: "inception_1b/3x3_reduce" 400 | } 401 | layer { 402 | name: "inception_1b/3x3" 403 | type: "Convolution" 404 | bottom: "inception_1b/3x3_reduce" 405 | top: "inception_1b/3x3" 406 | param { 407 | lr_mult: 0 408 | decay_mult: 0 409 | } 410 | param { 411 | lr_mult: 0 412 | decay_mult: 0 413 | } 414 | convolution_param { 415 | num_output: 64 416 | pad: 1 417 | kernel_size: 3 418 | stride: 2 419 | weight_filler { 420 | type: "xavier" 421 | } 422 | bias_filler { 423 | type: "constant" 424 | value: 0.2 425 | } 426 | } 427 | } 428 | layer { 429 | name: "inception_1b/relu_3x3" 430 | type: "ReLU" 431 | bottom: "inception_1b/3x3" 432 | top: "inception_1b/3x3" 433 | } 434 | layer { 435 | name: "inception_1b/double_3x3_reduce" 436 | type: "Convolution" 437 | bottom: "inception_1a/output" 438 | top: "inception_1b/double_3x3_reduce" 439 | param { 440 | lr_mult: 0 441 | decay_mult: 0 442 | } 443 | param { 444 | lr_mult: 0 445 | decay_mult: 0 446 | } 447 | convolution_param { 448 | num_output: 64 449 | kernel_size: 1 450 | weight_filler { 451 | type: "xavier" 452 | } 453 | bias_filler { 454 | type: "constant" 455 | value: 0.2 456 | } 457 | } 458 | } 459 | layer { 460 | name: "inception_1b/relu_double_3x3_reduce" 461 | type: "ReLU" 462 | bottom: "inception_1b/double_3x3_reduce" 463 | top: "inception_1b/double_3x3_reduce" 464 | } 465 | layer { 466 | name: "inception_1b/double_3x3_1" 467 | type: "Convolution" 468 | bottom: "inception_1b/double_3x3_reduce" 469 | top: "inception_1b/double_3x3_1" 470 | param { 471 | lr_mult: 0 472 | decay_mult: 0 473 | } 474 | param { 475 | lr_mult: 0 476 | decay_mult: 0 477 | } 478 | convolution_param { 479 | num_output: 64 480 | pad: 1 481 | kernel_size: 3 482 | weight_filler { 483 | type: "xavier" 484 | } 485 | bias_filler { 486 | type: "constant" 487 | value: 0.2 488 | } 489 | } 490 | } 491 | layer { 492 | name: "inception_1b/relu_double_3x3_1" 493 | type: "ReLU" 494 | bottom: "inception_1b/double_3x3_1" 495 | top: "inception_1b/double_3x3_1" 496 | } 497 | layer { 498 | name: "inception_1b/double_3x3_2" 499 | type: "Convolution" 500 | bottom: "inception_1b/double_3x3_1" 501 | top: "inception_1b/double_3x3_2" 502 | param { 503 | lr_mult: 0 504 | decay_mult: 0 505 | } 506 | param { 507 | lr_mult: 0 508 | decay_mult: 0 509 | } 510 | convolution_param { 511 | num_output: 64 512 | pad: 1 513 | kernel_size: 3 514 | stride: 2 515 | weight_filler { 516 | type: "xavier" 517 | } 518 | bias_filler { 519 | type: "constant" 520 | value: 0.2 521 | } 522 | } 523 | } 524 | layer { 525 | name: "inception_1b/relu_double_3x3_2" 526 | type: "ReLU" 527 | bottom: "inception_1b/double_3x3_2" 528 | top: "inception_1b/double_3x3_2" 529 | } 530 | layer { 531 | name: "inception_1b/pool" 532 | type: "Pooling" 533 | bottom: "inception_1a/output" 534 | top: "inception_1b/pool" 535 | pooling_param { 536 | pool: MAX 537 | kernel_size: 3 538 | stride: 2 539 | } 540 | } 541 | layer { 542 | name: "inception_1b/output" 543 | type: "Concat" 544 | bottom: "inception_1b/3x3" 545 | bottom: "inception_1b/double_3x3_2" 546 | bottom: "inception_1b/pool" 547 | top: "inception_1b/output" 548 | } 549 | layer { 550 | name: "inception_2a/1x1" 551 | type: "Convolution" 552 | bottom: "inception_1b/output" 553 | top: "inception_2a/1x1" 554 | param { 555 | lr_mult: 0 556 | decay_mult: 0 557 | } 558 | param { 559 | lr_mult: 0 560 | decay_mult: 0 561 | } 562 | convolution_param { 563 | num_output: 128 564 | kernel_size: 1 565 | weight_filler { 566 | type: "xavier" 567 | } 568 | bias_filler { 569 | type: "constant" 570 | value: 0.2 571 | } 572 | } 573 | } 574 | layer { 575 | name: "inception_2a/relu_1x1" 576 | type: "ReLU" 577 | bottom: "inception_2a/1x1" 578 | top: "inception_2a/1x1" 579 | } 580 | layer { 581 | name: "inception_2a/3x3_reduce" 582 | type: "Convolution" 583 | bottom: "inception_1b/output" 584 | top: "inception_2a/3x3_reduce" 585 | param { 586 | lr_mult: 0 587 | decay_mult: 0 588 | } 589 | param { 590 | lr_mult: 0 591 | decay_mult: 0 592 | } 593 | convolution_param { 594 | num_output: 128 595 | kernel_size: 1 596 | weight_filler { 597 | type: "xavier" 598 | } 599 | bias_filler { 600 | type: "constant" 601 | value: 0.2 602 | } 603 | } 604 | } 605 | layer { 606 | name: "inception_2a/relu_3x3_reduce" 607 | type: "ReLU" 608 | bottom: "inception_2a/3x3_reduce" 609 | top: "inception_2a/3x3_reduce" 610 | } 611 | layer { 612 | name: "inception_2a/3x3" 613 | type: "Convolution" 614 | bottom: "inception_2a/3x3_reduce" 615 | top: "inception_2a/3x3" 616 | param { 617 | lr_mult: 0 618 | decay_mult: 0 619 | } 620 | param { 621 | lr_mult: 0 622 | decay_mult: 0 623 | } 624 | convolution_param { 625 | num_output: 128 626 | pad: 1 627 | kernel_size: 3 628 | weight_filler { 629 | type: "xavier" 630 | } 631 | bias_filler { 632 | type: "constant" 633 | value: 0.2 634 | } 635 | } 636 | } 637 | layer { 638 | name: "inception_2a/relu_3x3" 639 | type: "ReLU" 640 | bottom: "inception_2a/3x3" 641 | top: "inception_2a/3x3" 642 | } 643 | layer { 644 | name: "inception_2a/double_3x3_reduce" 645 | type: "Convolution" 646 | bottom: "inception_1b/output" 647 | top: "inception_2a/double_3x3_reduce" 648 | param { 649 | lr_mult: 0 650 | decay_mult: 0 651 | } 652 | param { 653 | lr_mult: 0 654 | decay_mult: 0 655 | } 656 | convolution_param { 657 | num_output: 128 658 | kernel_size: 1 659 | weight_filler { 660 | type: "xavier" 661 | } 662 | bias_filler { 663 | type: "constant" 664 | value: 0.2 665 | } 666 | } 667 | } 668 | layer { 669 | name: "inception_2a/relu_double_3x3_reduce" 670 | type: "ReLU" 671 | bottom: "inception_2a/double_3x3_reduce" 672 | top: "inception_2a/double_3x3_reduce" 673 | } 674 | layer { 675 | name: "inception_2a/double_3x3_1" 676 | type: "Convolution" 677 | bottom: "inception_2a/double_3x3_reduce" 678 | top: "inception_2a/double_3x3_1" 679 | param { 680 | lr_mult: 0 681 | decay_mult: 0 682 | } 683 | param { 684 | lr_mult: 0 685 | decay_mult: 0 686 | } 687 | convolution_param { 688 | num_output: 128 689 | pad: 1 690 | kernel_size: 3 691 | weight_filler { 692 | type: "xavier" 693 | } 694 | bias_filler { 695 | type: "constant" 696 | value: 0.2 697 | } 698 | } 699 | } 700 | layer { 701 | name: "inception_2a/relu_double_3x3_1" 702 | type: "ReLU" 703 | bottom: "inception_2a/double_3x3_1" 704 | top: "inception_2a/double_3x3_1" 705 | } 706 | layer { 707 | name: "inception_2a/double_3x3_2" 708 | type: "Convolution" 709 | bottom: "inception_2a/double_3x3_1" 710 | top: "inception_2a/double_3x3_2" 711 | param { 712 | lr_mult: 0 713 | decay_mult: 0 714 | } 715 | param { 716 | lr_mult: 0 717 | decay_mult: 0 718 | } 719 | convolution_param { 720 | num_output: 128 721 | pad: 1 722 | kernel_size: 3 723 | weight_filler { 724 | type: "xavier" 725 | } 726 | bias_filler { 727 | type: "constant" 728 | value: 0.2 729 | } 730 | } 731 | } 732 | layer { 733 | name: "inception_2a/relu_double_3x3_2" 734 | type: "ReLU" 735 | bottom: "inception_2a/double_3x3_2" 736 | top: "inception_2a/double_3x3_2" 737 | } 738 | layer { 739 | name: "inception_2a/pool" 740 | type: "Pooling" 741 | bottom: "inception_1b/output" 742 | top: "inception_2a/pool" 743 | pooling_param { 744 | pool: AVE 745 | kernel_size: 3 746 | stride: 1 747 | pad: 1 748 | } 749 | } 750 | layer { 751 | name: "inception_2a/pool_proj" 752 | type: "Convolution" 753 | bottom: "inception_2a/pool" 754 | top: "inception_2a/pool_proj" 755 | param { 756 | lr_mult: 0 757 | decay_mult: 0 758 | } 759 | param { 760 | lr_mult: 0 761 | decay_mult: 0 762 | } 763 | convolution_param { 764 | num_output: 128 765 | kernel_size: 1 766 | weight_filler { 767 | type: "xavier" 768 | } 769 | bias_filler { 770 | type: "constant" 771 | value: 0.2 772 | } 773 | } 774 | } 775 | layer { 776 | name: "inception_2a/relu_pool_proj" 777 | type: "ReLU" 778 | bottom: "inception_2a/pool_proj" 779 | top: "inception_2a/pool_proj" 780 | } 781 | layer { 782 | name: "inception_2a/output" 783 | type: "Concat" 784 | bottom: "inception_2a/1x1" 785 | bottom: "inception_2a/3x3" 786 | bottom: "inception_2a/double_3x3_2" 787 | bottom: "inception_2a/pool_proj" 788 | top: "inception_2a/output" 789 | } 790 | layer { 791 | name: "inception_2b/3x3_reduce" 792 | type: "Convolution" 793 | bottom: "inception_2a/output" 794 | top: "inception_2b/3x3_reduce" 795 | param { 796 | lr_mult: 0 797 | decay_mult: 0 798 | } 799 | param { 800 | lr_mult: 0 801 | decay_mult: 0 802 | } 803 | convolution_param { 804 | num_output: 128 805 | kernel_size: 1 806 | weight_filler { 807 | type: "xavier" 808 | } 809 | bias_filler { 810 | type: "constant" 811 | value: 0.2 812 | } 813 | } 814 | } 815 | layer { 816 | name: "inception_2b/relu_3x3_reduce" 817 | type: "ReLU" 818 | bottom: "inception_2b/3x3_reduce" 819 | top: "inception_2b/3x3_reduce" 820 | } 821 | layer { 822 | name: "inception_2b/3x3" 823 | type: "Convolution" 824 | bottom: "inception_2b/3x3_reduce" 825 | top: "inception_2b/3x3" 826 | param { 827 | lr_mult: 0 828 | decay_mult: 0 829 | } 830 | param { 831 | lr_mult: 0 832 | decay_mult: 0 833 | } 834 | convolution_param { 835 | num_output: 128 836 | pad: 1 837 | kernel_size: 3 838 | stride: 2 839 | weight_filler { 840 | type: "xavier" 841 | } 842 | bias_filler { 843 | type: "constant" 844 | value: 0.2 845 | } 846 | } 847 | } 848 | layer { 849 | name: "inception_2b/relu_3x3" 850 | type: "ReLU" 851 | bottom: "inception_2b/3x3" 852 | top: "inception_2b/3x3" 853 | } 854 | layer { 855 | name: "inception_2b/double_3x3_reduce" 856 | type: "Convolution" 857 | bottom: "inception_2a/output" 858 | top: "inception_2b/double_3x3_reduce" 859 | param { 860 | lr_mult: 0 861 | decay_mult: 0 862 | } 863 | param { 864 | lr_mult: 0 865 | decay_mult: 0 866 | } 867 | convolution_param { 868 | num_output: 128 869 | kernel_size: 1 870 | weight_filler { 871 | type: "xavier" 872 | } 873 | bias_filler { 874 | type: "constant" 875 | value: 0.2 876 | } 877 | } 878 | } 879 | layer { 880 | name: "inception_2b/relu_double_3x3_reduce" 881 | type: "ReLU" 882 | bottom: "inception_2b/double_3x3_reduce" 883 | top: "inception_2b/double_3x3_reduce" 884 | } 885 | layer { 886 | name: "inception_2b/double_3x3_1" 887 | type: "Convolution" 888 | bottom: "inception_2b/double_3x3_reduce" 889 | top: "inception_2b/double_3x3_1" 890 | param { 891 | lr_mult: 0 892 | decay_mult: 0 893 | } 894 | param { 895 | lr_mult: 0 896 | decay_mult: 0 897 | } 898 | convolution_param { 899 | num_output: 128 900 | pad: 1 901 | kernel_size: 3 902 | weight_filler { 903 | type: "xavier" 904 | } 905 | bias_filler { 906 | type: "constant" 907 | value: 0.2 908 | } 909 | } 910 | } 911 | layer { 912 | name: "inception_2b/relu_double_3x3_1" 913 | type: "ReLU" 914 | bottom: "inception_2b/double_3x3_1" 915 | top: "inception_2b/double_3x3_1" 916 | } 917 | layer { 918 | name: "inception_2b/double_3x3_2" 919 | type: "Convolution" 920 | bottom: "inception_2b/double_3x3_1" 921 | top: "inception_2b/double_3x3_2" 922 | param { 923 | lr_mult: 0 924 | decay_mult: 0 925 | } 926 | param { 927 | lr_mult: 0 928 | decay_mult: 0 929 | } 930 | convolution_param { 931 | num_output: 128 932 | pad: 1 933 | kernel_size: 3 934 | stride: 2 935 | weight_filler { 936 | type: "xavier" 937 | } 938 | bias_filler { 939 | type: "constant" 940 | value: 0.2 941 | } 942 | } 943 | } 944 | layer { 945 | name: "inception_2b/relu_double_3x3_2" 946 | type: "ReLU" 947 | bottom: "inception_2b/double_3x3_2" 948 | top: "inception_2b/double_3x3_2" 949 | } 950 | layer { 951 | name: "inception_2b/pool" 952 | type: "Pooling" 953 | bottom: "inception_2a/output" 954 | top: "inception_2b/pool" 955 | pooling_param { 956 | pool: MAX 957 | kernel_size: 3 958 | stride: 2 959 | } 960 | } 961 | layer { 962 | name: "inception_2b/output" 963 | type: "Concat" 964 | bottom: "inception_2b/3x3" 965 | bottom: "inception_2b/double_3x3_2" 966 | bottom: "inception_2b/pool" 967 | top: "inception_2b/output" 968 | } 969 | layer { 970 | name: "inception_3a/1x1" 971 | type: "Convolution" 972 | bottom: "inception_2b/output" 973 | top: "inception_3a/1x1" 974 | param { 975 | lr_mult: 0 976 | decay_mult: 0 977 | } 978 | param { 979 | lr_mult: 0 980 | decay_mult: 0 981 | } 982 | convolution_param { 983 | num_output: 256 984 | kernel_size: 1 985 | weight_filler { 986 | type: "xavier" 987 | } 988 | bias_filler { 989 | type: "constant" 990 | value: 0.2 991 | } 992 | } 993 | } 994 | layer { 995 | name: "inception_3a/relu_1x1" 996 | type: "ReLU" 997 | bottom: "inception_3a/1x1" 998 | top: "inception_3a/1x1" 999 | } 1000 | layer { 1001 | name: "inception_3a/3x3_reduce" 1002 | type: "Convolution" 1003 | bottom: "inception_2b/output" 1004 | top: "inception_3a/3x3_reduce" 1005 | param { 1006 | lr_mult: 0 1007 | decay_mult: 0 1008 | } 1009 | param { 1010 | lr_mult: 0 1011 | decay_mult: 0 1012 | } 1013 | convolution_param { 1014 | num_output: 256 1015 | kernel_size: 1 1016 | weight_filler { 1017 | type: "xavier" 1018 | } 1019 | bias_filler { 1020 | type: "constant" 1021 | value: 0.2 1022 | } 1023 | } 1024 | } 1025 | layer { 1026 | name: "inception_3a/relu_3x3_reduce" 1027 | type: "ReLU" 1028 | bottom: "inception_3a/3x3_reduce" 1029 | top: "inception_3a/3x3_reduce" 1030 | } 1031 | layer { 1032 | name: "inception_3a/3x3" 1033 | type: "Convolution" 1034 | bottom: "inception_3a/3x3_reduce" 1035 | top: "inception_3a/3x3" 1036 | param { 1037 | lr_mult: 0 1038 | decay_mult: 0 1039 | } 1040 | param { 1041 | lr_mult: 0 1042 | decay_mult: 0 1043 | } 1044 | convolution_param { 1045 | num_output: 256 1046 | pad: 1 1047 | kernel_size: 3 1048 | weight_filler { 1049 | type: "xavier" 1050 | } 1051 | bias_filler { 1052 | type: "constant" 1053 | value: 0.2 1054 | } 1055 | } 1056 | } 1057 | layer { 1058 | name: "inception_3a/relu_3x3" 1059 | type: "ReLU" 1060 | bottom: "inception_3a/3x3" 1061 | top: "inception_3a/3x3" 1062 | } 1063 | layer { 1064 | name: "inception_3a/double_3x3_reduce" 1065 | type: "Convolution" 1066 | bottom: "inception_2b/output" 1067 | top: "inception_3a/double_3x3_reduce" 1068 | param { 1069 | lr_mult: 0 1070 | decay_mult: 0 1071 | } 1072 | param { 1073 | lr_mult: 0 1074 | decay_mult: 0 1075 | } 1076 | convolution_param { 1077 | num_output: 256 1078 | kernel_size: 1 1079 | weight_filler { 1080 | type: "xavier" 1081 | } 1082 | bias_filler { 1083 | type: "constant" 1084 | value: 0.2 1085 | } 1086 | } 1087 | } 1088 | layer { 1089 | name: "inception_3a/relu_double_3x3_reduce" 1090 | type: "ReLU" 1091 | bottom: "inception_3a/double_3x3_reduce" 1092 | top: "inception_3a/double_3x3_reduce" 1093 | } 1094 | layer { 1095 | name: "inception_3a/double_3x3_1" 1096 | type: "Convolution" 1097 | bottom: "inception_3a/double_3x3_reduce" 1098 | top: "inception_3a/double_3x3_1" 1099 | param { 1100 | lr_mult: 0 1101 | decay_mult: 0 1102 | } 1103 | param { 1104 | lr_mult: 0 1105 | decay_mult: 0 1106 | } 1107 | convolution_param { 1108 | num_output: 256 1109 | pad: 1 1110 | kernel_size: 3 1111 | weight_filler { 1112 | type: "xavier" 1113 | } 1114 | bias_filler { 1115 | type: "constant" 1116 | value: 0.2 1117 | } 1118 | } 1119 | } 1120 | layer { 1121 | name: "inception_3a/relu_double_3x3_1" 1122 | type: "ReLU" 1123 | bottom: "inception_3a/double_3x3_1" 1124 | top: "inception_3a/double_3x3_1" 1125 | } 1126 | layer { 1127 | name: "inception_3a/double_3x3_2" 1128 | type: "Convolution" 1129 | bottom: "inception_3a/double_3x3_1" 1130 | top: "inception_3a/double_3x3_2" 1131 | param { 1132 | lr_mult: 0 1133 | decay_mult: 0 1134 | } 1135 | param { 1136 | lr_mult: 0 1137 | decay_mult: 0 1138 | } 1139 | convolution_param { 1140 | num_output: 256 1141 | pad: 1 1142 | kernel_size: 3 1143 | weight_filler { 1144 | type: "xavier" 1145 | } 1146 | bias_filler { 1147 | type: "constant" 1148 | value: 0.2 1149 | } 1150 | } 1151 | } 1152 | layer { 1153 | name: "inception_3a/relu_double_3x3_2" 1154 | type: "ReLU" 1155 | bottom: "inception_3a/double_3x3_2" 1156 | top: "inception_3a/double_3x3_2" 1157 | } 1158 | layer { 1159 | name: "inception_3a/pool" 1160 | type: "Pooling" 1161 | bottom: "inception_2b/output" 1162 | top: "inception_3a/pool" 1163 | pooling_param { 1164 | pool: AVE 1165 | kernel_size: 3 1166 | stride: 1 1167 | pad: 1 1168 | } 1169 | } 1170 | layer { 1171 | name: "inception_3a/pool_proj" 1172 | type: "Convolution" 1173 | bottom: "inception_3a/pool" 1174 | top: "inception_3a/pool_proj" 1175 | param { 1176 | lr_mult: 0 1177 | decay_mult: 0 1178 | } 1179 | param { 1180 | lr_mult: 0 1181 | decay_mult: 0 1182 | } 1183 | convolution_param { 1184 | num_output: 256 1185 | kernel_size: 1 1186 | weight_filler { 1187 | type: "xavier" 1188 | } 1189 | bias_filler { 1190 | type: "constant" 1191 | value: 0.2 1192 | } 1193 | } 1194 | } 1195 | layer { 1196 | name: "inception_3a/relu_pool_proj" 1197 | type: "ReLU" 1198 | bottom: "inception_3a/pool_proj" 1199 | top: "inception_3a/pool_proj" 1200 | } 1201 | layer { 1202 | name: "inception_3a/output" 1203 | type: "Concat" 1204 | bottom: "inception_3a/1x1" 1205 | bottom: "inception_3a/3x3" 1206 | bottom: "inception_3a/double_3x3_2" 1207 | bottom: "inception_3a/pool_proj" 1208 | top: "inception_3a/output" 1209 | } 1210 | layer { 1211 | name: "inception_3b/3x3_reduce" 1212 | type: "Convolution" 1213 | bottom: "inception_3a/output" 1214 | top: "inception_3b/3x3_reduce" 1215 | param { 1216 | lr_mult: 0 1217 | decay_mult: 0 1218 | } 1219 | param { 1220 | lr_mult: 0 1221 | decay_mult: 0 1222 | } 1223 | convolution_param { 1224 | num_output: 256 1225 | kernel_size: 1 1226 | weight_filler { 1227 | type: "xavier" 1228 | } 1229 | bias_filler { 1230 | type: "constant" 1231 | value: 0.2 1232 | } 1233 | } 1234 | } 1235 | layer { 1236 | name: "inception_3b/relu_3x3_reduce" 1237 | type: "ReLU" 1238 | bottom: "inception_3b/3x3_reduce" 1239 | top: "inception_3b/3x3_reduce" 1240 | } 1241 | layer { 1242 | name: "inception_3b/3x3" 1243 | type: "Convolution" 1244 | bottom: "inception_3b/3x3_reduce" 1245 | top: "inception_3b/3x3" 1246 | param { 1247 | lr_mult: 0 1248 | decay_mult: 0 1249 | } 1250 | param { 1251 | lr_mult: 0 1252 | decay_mult: 0 1253 | } 1254 | convolution_param { 1255 | num_output: 256 1256 | pad: 1 1257 | kernel_size: 3 1258 | stride: 2 1259 | weight_filler { 1260 | type: "xavier" 1261 | } 1262 | bias_filler { 1263 | type: "constant" 1264 | value: 0.2 1265 | } 1266 | } 1267 | } 1268 | layer { 1269 | name: "inception_3b/relu_3x3" 1270 | type: "ReLU" 1271 | bottom: "inception_3b/3x3" 1272 | top: "inception_3b/3x3" 1273 | } 1274 | layer { 1275 | name: "inception_3b/double_3x3_reduce" 1276 | type: "Convolution" 1277 | bottom: "inception_3a/output" 1278 | top: "inception_3b/double_3x3_reduce" 1279 | param { 1280 | lr_mult: 0 1281 | decay_mult: 0 1282 | } 1283 | param { 1284 | lr_mult: 0 1285 | decay_mult: 0 1286 | } 1287 | convolution_param { 1288 | num_output: 256 1289 | kernel_size: 1 1290 | weight_filler { 1291 | type: "xavier" 1292 | } 1293 | bias_filler { 1294 | type: "constant" 1295 | value: 0.2 1296 | } 1297 | } 1298 | } 1299 | layer { 1300 | name: "inception_3b/relu_double_3x3_reduce" 1301 | type: "ReLU" 1302 | bottom: "inception_3b/double_3x3_reduce" 1303 | top: "inception_3b/double_3x3_reduce" 1304 | } 1305 | layer { 1306 | name: "inception_3b/double_3x3_1" 1307 | type: "Convolution" 1308 | bottom: "inception_3b/double_3x3_reduce" 1309 | top: "inception_3b/double_3x3_1" 1310 | param { 1311 | lr_mult: 0 1312 | decay_mult: 0 1313 | } 1314 | param { 1315 | lr_mult: 0 1316 | decay_mult: 0 1317 | } 1318 | convolution_param { 1319 | num_output: 256 1320 | pad: 1 1321 | kernel_size: 3 1322 | weight_filler { 1323 | type: "xavier" 1324 | } 1325 | bias_filler { 1326 | type: "constant" 1327 | value: 0.2 1328 | } 1329 | } 1330 | } 1331 | layer { 1332 | name: "inception_3b/relu_double_3x3_1" 1333 | type: "ReLU" 1334 | bottom: "inception_3b/double_3x3_1" 1335 | top: "inception_3b/double_3x3_1" 1336 | } 1337 | layer { 1338 | name: "inception_3b/double_3x3_2" 1339 | type: "Convolution" 1340 | bottom: "inception_3b/double_3x3_1" 1341 | top: "inception_3b/double_3x3_2" 1342 | param { 1343 | lr_mult: 0 1344 | decay_mult: 0 1345 | } 1346 | param { 1347 | lr_mult: 0 1348 | decay_mult: 0 1349 | } 1350 | convolution_param { 1351 | num_output: 256 1352 | pad: 1 1353 | kernel_size: 3 1354 | stride: 2 1355 | weight_filler { 1356 | type: "xavier" 1357 | } 1358 | bias_filler { 1359 | type: "constant" 1360 | value: 0.2 1361 | } 1362 | } 1363 | } 1364 | layer { 1365 | name: "inception_3b/relu_double_3x3_2" 1366 | type: "ReLU" 1367 | bottom: "inception_3b/double_3x3_2" 1368 | top: "inception_3b/double_3x3_2" 1369 | } 1370 | layer { 1371 | name: "inception_3b/pool" 1372 | type: "Pooling" 1373 | bottom: "inception_3a/output" 1374 | top: "inception_3b/pool" 1375 | pooling_param { 1376 | pool: MAX 1377 | kernel_size: 3 1378 | stride: 2 1379 | pad_h: 0 1380 | pad_w: 1 1381 | } 1382 | } 1383 | layer { 1384 | name: "inception_3b/output" 1385 | type: "Concat" 1386 | bottom: "inception_3b/3x3" 1387 | bottom: "inception_3b/double_3x3_2" 1388 | bottom: "inception_3b/pool" 1389 | top: "inception_3b/output" 1390 | } 1391 | layer { 1392 | name: "global_pool" 1393 | type: "Pooling" 1394 | bottom: "inception_3b/output" 1395 | top: "global_pool" 1396 | pooling_param { 1397 | pool: AVE 1398 | stride: 1 1399 | kernel_h: 9 1400 | kernel_w: 4 1401 | } 1402 | } 1403 | layer { 1404 | name: "fc7" 1405 | type: "InnerProduct" 1406 | bottom: "global_pool" 1407 | top: "fc7" 1408 | param { 1409 | lr_mult: 0 1410 | decay_mult: 0 1411 | } 1412 | param { 1413 | lr_mult: 0 1414 | decay_mult: 0 1415 | } 1416 | inner_product_param { 1417 | num_output: 256 1418 | weight_filler { 1419 | type: "xavier" 1420 | } 1421 | bias_filler { 1422 | type: "constant" 1423 | value: 0.0 1424 | } 1425 | } 1426 | } 1427 | layer { 1428 | name: "relu7" 1429 | type: "ReLU" 1430 | bottom: "fc7" 1431 | top: "fc7" 1432 | } 1433 | layer { 1434 | name: "drop7" 1435 | type: "Dropout" 1436 | bottom: "fc7" 1437 | top: "fc7" 1438 | dropout_param { 1439 | dropout_ratio: 0.5 1440 | } 1441 | } 1442 | 1443 | ####################################### loss accuracy ####################################### 1444 | layer { name: "fc8_3dpes" type: "InnerProduct" bottom: "fc7" top: "fc8_3dpes" 1445 | param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } 1446 | inner_product_param { num_output: 193 1447 | weight_filler { type: "gaussian" std: 0.001 } 1448 | bias_filler { type: "constant" value: 0 } } } 1449 | layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_3dpes" bottom: "label" top: "loss" } 1450 | layer { name: "accuracy" type: "Accuracy" bottom: "fc8_3dpes" bottom: "label" top: "accuracy" 1451 | include { phase: TEST } } -------------------------------------------------------------------------------- /models/fc_only/ilids_trainval.prototxt: -------------------------------------------------------------------------------- 1 | name: "iLIDS" 2 | richness: 1000 3 | 4 | ##################################### data ##################################### 5 | layer { name: "data" type: "Data" top: "data" top: "label" 6 | data_param { backend: LMDB batch_size: 100 shuffle_pool_size: 10 7 | source: "external/exp/db/ilids/train_lmdb" } 8 | transform_param { mirror: true crop_height: 144 crop_width: 56 9 | mean_value: 102 mean_value: 102 mean_value: 101 } 10 | include: { phase: TRAIN } } 11 | layer { name: "data" type: "Data" top: "data" top: "label" 12 | data_param { backend: LMDB batch_size: 20 13 | source: "external/exp/db/ilids/val_lmdb" } 14 | transform_param { mirror: false crop_height: 144 crop_width: 56 15 | mean_value: 102 mean_value: 102 mean_value: 101 } 16 | include: { phase: TEST } } 17 | 18 | ################################ fixed layers ################################## 19 | layer { 20 | name: "conv1" 21 | type: "Convolution" 22 | bottom: "data" 23 | top: "conv1" 24 | param { 25 | lr_mult: 0 26 | decay_mult: 0 27 | } 28 | param { 29 | lr_mult: 0 30 | decay_mult: 0 31 | } 32 | convolution_param { 33 | num_output: 32 34 | pad: 1 35 | kernel_size: 3 36 | stride: 1 37 | weight_filler { 38 | type: "xavier" 39 | } 40 | bias_filler { 41 | type: "constant" 42 | value: 0.2 43 | } 44 | } 45 | } 46 | layer { 47 | name: "relu1" 48 | type: "ReLU" 49 | bottom: "conv1" 50 | top: "conv1" 51 | } 52 | layer { 53 | name: "conv2" 54 | type: "Convolution" 55 | bottom: "conv1" 56 | top: "conv2" 57 | param { 58 | lr_mult: 0 59 | decay_mult: 0 60 | } 61 | param { 62 | lr_mult: 0 63 | decay_mult: 0 64 | } 65 | convolution_param { 66 | num_output: 32 67 | pad: 1 68 | kernel_size: 3 69 | stride: 1 70 | weight_filler { 71 | type: "xavier" 72 | } 73 | bias_filler { 74 | type: "constant" 75 | value: 0.2 76 | } 77 | } 78 | } 79 | layer { 80 | name: "relu2" 81 | type: "ReLU" 82 | bottom: "conv2" 83 | top: "conv2" 84 | } 85 | layer { 86 | name: "conv3" 87 | type: "Convolution" 88 | bottom: "conv2" 89 | top: "conv3" 90 | param { 91 | lr_mult: 0 92 | decay_mult: 0 93 | } 94 | param { 95 | lr_mult: 0 96 | decay_mult: 0 97 | } 98 | convolution_param { 99 | num_output: 32 100 | pad: 1 101 | kernel_size: 3 102 | stride: 1 103 | weight_filler { 104 | type: "xavier" 105 | } 106 | bias_filler { 107 | type: "constant" 108 | value: 0.2 109 | } 110 | } 111 | } 112 | layer { 113 | name: "relu3" 114 | type: "ReLU" 115 | bottom: "conv3" 116 | top: "conv3" 117 | } 118 | layer { 119 | name: "pool1" 120 | type: "Pooling" 121 | bottom: "conv3" 122 | top: "pool1" 123 | pooling_param { 124 | pool: MAX 125 | kernel_size: 2 126 | stride: 2 127 | } 128 | } 129 | layer { 130 | name: "inception_1a/1x1" 131 | type: "Convolution" 132 | bottom: "pool1" 133 | top: "inception_1a/1x1" 134 | param { 135 | lr_mult: 0 136 | decay_mult: 0 137 | } 138 | param { 139 | lr_mult: 0 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 64 144 | kernel_size: 1 145 | weight_filler { 146 | type: "xavier" 147 | } 148 | bias_filler { 149 | type: "constant" 150 | value: 0.2 151 | } 152 | } 153 | } 154 | layer { 155 | name: "inception_1a/relu_1x1" 156 | type: "ReLU" 157 | bottom: "inception_1a/1x1" 158 | top: "inception_1a/1x1" 159 | } 160 | layer { 161 | name: "inception_1a/3x3_reduce" 162 | type: "Convolution" 163 | bottom: "pool1" 164 | top: "inception_1a/3x3_reduce" 165 | param { 166 | lr_mult: 0 167 | decay_mult: 0 168 | } 169 | param { 170 | lr_mult: 0 171 | decay_mult: 0 172 | } 173 | convolution_param { 174 | num_output: 64 175 | kernel_size: 1 176 | weight_filler { 177 | type: "xavier" 178 | } 179 | bias_filler { 180 | type: "constant" 181 | value: 0.2 182 | } 183 | } 184 | } 185 | layer { 186 | name: "inception_1a/relu_3x3_reduce" 187 | type: "ReLU" 188 | bottom: "inception_1a/3x3_reduce" 189 | top: "inception_1a/3x3_reduce" 190 | } 191 | layer { 192 | name: "inception_1a/3x3" 193 | type: "Convolution" 194 | bottom: "inception_1a/3x3_reduce" 195 | top: "inception_1a/3x3" 196 | param { 197 | lr_mult: 0 198 | decay_mult: 0 199 | } 200 | param { 201 | lr_mult: 0 202 | decay_mult: 0 203 | } 204 | convolution_param { 205 | num_output: 64 206 | pad: 1 207 | kernel_size: 3 208 | weight_filler { 209 | type: "xavier" 210 | } 211 | bias_filler { 212 | type: "constant" 213 | value: 0.2 214 | } 215 | } 216 | } 217 | layer { 218 | name: "inception_1a/relu_3x3" 219 | type: "ReLU" 220 | bottom: "inception_1a/3x3" 221 | top: "inception_1a/3x3" 222 | } 223 | layer { 224 | name: "inception_1a/double_3x3_reduce" 225 | type: "Convolution" 226 | bottom: "pool1" 227 | top: "inception_1a/double_3x3_reduce" 228 | param { 229 | lr_mult: 0 230 | decay_mult: 0 231 | } 232 | param { 233 | lr_mult: 0 234 | decay_mult: 0 235 | } 236 | convolution_param { 237 | num_output: 64 238 | kernel_size: 1 239 | weight_filler { 240 | type: "xavier" 241 | } 242 | bias_filler { 243 | type: "constant" 244 | value: 0.2 245 | } 246 | } 247 | } 248 | layer { 249 | name: "inception_1a/relu_double_3x3_reduce" 250 | type: "ReLU" 251 | bottom: "inception_1a/double_3x3_reduce" 252 | top: "inception_1a/double_3x3_reduce" 253 | } 254 | layer { 255 | name: "inception_1a/double_3x3_1" 256 | type: "Convolution" 257 | bottom: "inception_1a/double_3x3_reduce" 258 | top: "inception_1a/double_3x3_1" 259 | param { 260 | lr_mult: 0 261 | decay_mult: 0 262 | } 263 | param { 264 | lr_mult: 0 265 | decay_mult: 0 266 | } 267 | convolution_param { 268 | num_output: 64 269 | pad: 1 270 | kernel_size: 3 271 | weight_filler { 272 | type: "xavier" 273 | } 274 | bias_filler { 275 | type: "constant" 276 | value: 0.2 277 | } 278 | } 279 | } 280 | layer { 281 | name: "inception_1a/relu_double_3x3_1" 282 | type: "ReLU" 283 | bottom: "inception_1a/double_3x3_1" 284 | top: "inception_1a/double_3x3_1" 285 | } 286 | layer { 287 | name: "inception_1a/double_3x3_2" 288 | type: "Convolution" 289 | bottom: "inception_1a/double_3x3_1" 290 | top: "inception_1a/double_3x3_2" 291 | param { 292 | lr_mult: 0 293 | decay_mult: 0 294 | } 295 | param { 296 | lr_mult: 0 297 | decay_mult: 0 298 | } 299 | convolution_param { 300 | num_output: 64 301 | pad: 1 302 | kernel_size: 3 303 | weight_filler { 304 | type: "xavier" 305 | } 306 | bias_filler { 307 | type: "constant" 308 | value: 0.2 309 | } 310 | } 311 | } 312 | layer { 313 | name: "inception_1a/relu_double_3x3_2" 314 | type: "ReLU" 315 | bottom: "inception_1a/double_3x3_2" 316 | top: "inception_1a/double_3x3_2" 317 | } 318 | layer { 319 | name: "inception_1a/pool" 320 | type: "Pooling" 321 | bottom: "pool1" 322 | top: "inception_1a/pool" 323 | pooling_param { 324 | pool: AVE 325 | kernel_size: 3 326 | stride: 1 327 | pad: 1 328 | } 329 | } 330 | layer { 331 | name: "inception_1a/pool_proj" 332 | type: "Convolution" 333 | bottom: "inception_1a/pool" 334 | top: "inception_1a/pool_proj" 335 | param { 336 | lr_mult: 0 337 | decay_mult: 0 338 | } 339 | param { 340 | lr_mult: 0 341 | decay_mult: 0 342 | } 343 | convolution_param { 344 | num_output: 64 345 | kernel_size: 1 346 | weight_filler { 347 | type: "xavier" 348 | } 349 | bias_filler { 350 | type: "constant" 351 | value: 0.2 352 | } 353 | } 354 | } 355 | layer { 356 | name: "inception_1a/relu_pool_proj" 357 | type: "ReLU" 358 | bottom: "inception_1a/pool_proj" 359 | top: "inception_1a/pool_proj" 360 | } 361 | layer { 362 | name: "inception_1a/output" 363 | type: "Concat" 364 | bottom: "inception_1a/1x1" 365 | bottom: "inception_1a/3x3" 366 | bottom: "inception_1a/double_3x3_2" 367 | bottom: "inception_1a/pool_proj" 368 | top: "inception_1a/output" 369 | } 370 | layer { 371 | name: "inception_1b/3x3_reduce" 372 | type: "Convolution" 373 | bottom: "inception_1a/output" 374 | top: "inception_1b/3x3_reduce" 375 | param { 376 | lr_mult: 0 377 | decay_mult: 0 378 | } 379 | param { 380 | lr_mult: 0 381 | decay_mult: 0 382 | } 383 | convolution_param { 384 | num_output: 64 385 | kernel_size: 1 386 | weight_filler { 387 | type: "xavier" 388 | } 389 | bias_filler { 390 | type: "constant" 391 | value: 0.2 392 | } 393 | } 394 | } 395 | layer { 396 | name: "inception_1b/relu_3x3_reduce" 397 | type: "ReLU" 398 | bottom: "inception_1b/3x3_reduce" 399 | top: "inception_1b/3x3_reduce" 400 | } 401 | layer { 402 | name: "inception_1b/3x3" 403 | type: "Convolution" 404 | bottom: "inception_1b/3x3_reduce" 405 | top: "inception_1b/3x3" 406 | param { 407 | lr_mult: 0 408 | decay_mult: 0 409 | } 410 | param { 411 | lr_mult: 0 412 | decay_mult: 0 413 | } 414 | convolution_param { 415 | num_output: 64 416 | pad: 1 417 | kernel_size: 3 418 | stride: 2 419 | weight_filler { 420 | type: "xavier" 421 | } 422 | bias_filler { 423 | type: "constant" 424 | value: 0.2 425 | } 426 | } 427 | } 428 | layer { 429 | name: "inception_1b/relu_3x3" 430 | type: "ReLU" 431 | bottom: "inception_1b/3x3" 432 | top: "inception_1b/3x3" 433 | } 434 | layer { 435 | name: "inception_1b/double_3x3_reduce" 436 | type: "Convolution" 437 | bottom: "inception_1a/output" 438 | top: "inception_1b/double_3x3_reduce" 439 | param { 440 | lr_mult: 0 441 | decay_mult: 0 442 | } 443 | param { 444 | lr_mult: 0 445 | decay_mult: 0 446 | } 447 | convolution_param { 448 | num_output: 64 449 | kernel_size: 1 450 | weight_filler { 451 | type: "xavier" 452 | } 453 | bias_filler { 454 | type: "constant" 455 | value: 0.2 456 | } 457 | } 458 | } 459 | layer { 460 | name: "inception_1b/relu_double_3x3_reduce" 461 | type: "ReLU" 462 | bottom: "inception_1b/double_3x3_reduce" 463 | top: "inception_1b/double_3x3_reduce" 464 | } 465 | layer { 466 | name: "inception_1b/double_3x3_1" 467 | type: "Convolution" 468 | bottom: "inception_1b/double_3x3_reduce" 469 | top: "inception_1b/double_3x3_1" 470 | param { 471 | lr_mult: 0 472 | decay_mult: 0 473 | } 474 | param { 475 | lr_mult: 0 476 | decay_mult: 0 477 | } 478 | convolution_param { 479 | num_output: 64 480 | pad: 1 481 | kernel_size: 3 482 | weight_filler { 483 | type: "xavier" 484 | } 485 | bias_filler { 486 | type: "constant" 487 | value: 0.2 488 | } 489 | } 490 | } 491 | layer { 492 | name: "inception_1b/relu_double_3x3_1" 493 | type: "ReLU" 494 | bottom: "inception_1b/double_3x3_1" 495 | top: "inception_1b/double_3x3_1" 496 | } 497 | layer { 498 | name: "inception_1b/double_3x3_2" 499 | type: "Convolution" 500 | bottom: "inception_1b/double_3x3_1" 501 | top: "inception_1b/double_3x3_2" 502 | param { 503 | lr_mult: 0 504 | decay_mult: 0 505 | } 506 | param { 507 | lr_mult: 0 508 | decay_mult: 0 509 | } 510 | convolution_param { 511 | num_output: 64 512 | pad: 1 513 | kernel_size: 3 514 | stride: 2 515 | weight_filler { 516 | type: "xavier" 517 | } 518 | bias_filler { 519 | type: "constant" 520 | value: 0.2 521 | } 522 | } 523 | } 524 | layer { 525 | name: "inception_1b/relu_double_3x3_2" 526 | type: "ReLU" 527 | bottom: "inception_1b/double_3x3_2" 528 | top: "inception_1b/double_3x3_2" 529 | } 530 | layer { 531 | name: "inception_1b/pool" 532 | type: "Pooling" 533 | bottom: "inception_1a/output" 534 | top: "inception_1b/pool" 535 | pooling_param { 536 | pool: MAX 537 | kernel_size: 3 538 | stride: 2 539 | } 540 | } 541 | layer { 542 | name: "inception_1b/output" 543 | type: "Concat" 544 | bottom: "inception_1b/3x3" 545 | bottom: "inception_1b/double_3x3_2" 546 | bottom: "inception_1b/pool" 547 | top: "inception_1b/output" 548 | } 549 | layer { 550 | name: "inception_2a/1x1" 551 | type: "Convolution" 552 | bottom: "inception_1b/output" 553 | top: "inception_2a/1x1" 554 | param { 555 | lr_mult: 0 556 | decay_mult: 0 557 | } 558 | param { 559 | lr_mult: 0 560 | decay_mult: 0 561 | } 562 | convolution_param { 563 | num_output: 128 564 | kernel_size: 1 565 | weight_filler { 566 | type: "xavier" 567 | } 568 | bias_filler { 569 | type: "constant" 570 | value: 0.2 571 | } 572 | } 573 | } 574 | layer { 575 | name: "inception_2a/relu_1x1" 576 | type: "ReLU" 577 | bottom: "inception_2a/1x1" 578 | top: "inception_2a/1x1" 579 | } 580 | layer { 581 | name: "inception_2a/3x3_reduce" 582 | type: "Convolution" 583 | bottom: "inception_1b/output" 584 | top: "inception_2a/3x3_reduce" 585 | param { 586 | lr_mult: 0 587 | decay_mult: 0 588 | } 589 | param { 590 | lr_mult: 0 591 | decay_mult: 0 592 | } 593 | convolution_param { 594 | num_output: 128 595 | kernel_size: 1 596 | weight_filler { 597 | type: "xavier" 598 | } 599 | bias_filler { 600 | type: "constant" 601 | value: 0.2 602 | } 603 | } 604 | } 605 | layer { 606 | name: "inception_2a/relu_3x3_reduce" 607 | type: "ReLU" 608 | bottom: "inception_2a/3x3_reduce" 609 | top: "inception_2a/3x3_reduce" 610 | } 611 | layer { 612 | name: "inception_2a/3x3" 613 | type: "Convolution" 614 | bottom: "inception_2a/3x3_reduce" 615 | top: "inception_2a/3x3" 616 | param { 617 | lr_mult: 0 618 | decay_mult: 0 619 | } 620 | param { 621 | lr_mult: 0 622 | decay_mult: 0 623 | } 624 | convolution_param { 625 | num_output: 128 626 | pad: 1 627 | kernel_size: 3 628 | weight_filler { 629 | type: "xavier" 630 | } 631 | bias_filler { 632 | type: "constant" 633 | value: 0.2 634 | } 635 | } 636 | } 637 | layer { 638 | name: "inception_2a/relu_3x3" 639 | type: "ReLU" 640 | bottom: "inception_2a/3x3" 641 | top: "inception_2a/3x3" 642 | } 643 | layer { 644 | name: "inception_2a/double_3x3_reduce" 645 | type: "Convolution" 646 | bottom: "inception_1b/output" 647 | top: "inception_2a/double_3x3_reduce" 648 | param { 649 | lr_mult: 0 650 | decay_mult: 0 651 | } 652 | param { 653 | lr_mult: 0 654 | decay_mult: 0 655 | } 656 | convolution_param { 657 | num_output: 128 658 | kernel_size: 1 659 | weight_filler { 660 | type: "xavier" 661 | } 662 | bias_filler { 663 | type: "constant" 664 | value: 0.2 665 | } 666 | } 667 | } 668 | layer { 669 | name: "inception_2a/relu_double_3x3_reduce" 670 | type: "ReLU" 671 | bottom: "inception_2a/double_3x3_reduce" 672 | top: "inception_2a/double_3x3_reduce" 673 | } 674 | layer { 675 | name: "inception_2a/double_3x3_1" 676 | type: "Convolution" 677 | bottom: "inception_2a/double_3x3_reduce" 678 | top: "inception_2a/double_3x3_1" 679 | param { 680 | lr_mult: 0 681 | decay_mult: 0 682 | } 683 | param { 684 | lr_mult: 0 685 | decay_mult: 0 686 | } 687 | convolution_param { 688 | num_output: 128 689 | pad: 1 690 | kernel_size: 3 691 | weight_filler { 692 | type: "xavier" 693 | } 694 | bias_filler { 695 | type: "constant" 696 | value: 0.2 697 | } 698 | } 699 | } 700 | layer { 701 | name: "inception_2a/relu_double_3x3_1" 702 | type: "ReLU" 703 | bottom: "inception_2a/double_3x3_1" 704 | top: "inception_2a/double_3x3_1" 705 | } 706 | layer { 707 | name: "inception_2a/double_3x3_2" 708 | type: "Convolution" 709 | bottom: "inception_2a/double_3x3_1" 710 | top: "inception_2a/double_3x3_2" 711 | param { 712 | lr_mult: 0 713 | decay_mult: 0 714 | } 715 | param { 716 | lr_mult: 0 717 | decay_mult: 0 718 | } 719 | convolution_param { 720 | num_output: 128 721 | pad: 1 722 | kernel_size: 3 723 | weight_filler { 724 | type: "xavier" 725 | } 726 | bias_filler { 727 | type: "constant" 728 | value: 0.2 729 | } 730 | } 731 | } 732 | layer { 733 | name: "inception_2a/relu_double_3x3_2" 734 | type: "ReLU" 735 | bottom: "inception_2a/double_3x3_2" 736 | top: "inception_2a/double_3x3_2" 737 | } 738 | layer { 739 | name: "inception_2a/pool" 740 | type: "Pooling" 741 | bottom: "inception_1b/output" 742 | top: "inception_2a/pool" 743 | pooling_param { 744 | pool: AVE 745 | kernel_size: 3 746 | stride: 1 747 | pad: 1 748 | } 749 | } 750 | layer { 751 | name: "inception_2a/pool_proj" 752 | type: "Convolution" 753 | bottom: "inception_2a/pool" 754 | top: "inception_2a/pool_proj" 755 | param { 756 | lr_mult: 0 757 | decay_mult: 0 758 | } 759 | param { 760 | lr_mult: 0 761 | decay_mult: 0 762 | } 763 | convolution_param { 764 | num_output: 128 765 | kernel_size: 1 766 | weight_filler { 767 | type: "xavier" 768 | } 769 | bias_filler { 770 | type: "constant" 771 | value: 0.2 772 | } 773 | } 774 | } 775 | layer { 776 | name: "inception_2a/relu_pool_proj" 777 | type: "ReLU" 778 | bottom: "inception_2a/pool_proj" 779 | top: "inception_2a/pool_proj" 780 | } 781 | layer { 782 | name: "inception_2a/output" 783 | type: "Concat" 784 | bottom: "inception_2a/1x1" 785 | bottom: "inception_2a/3x3" 786 | bottom: "inception_2a/double_3x3_2" 787 | bottom: "inception_2a/pool_proj" 788 | top: "inception_2a/output" 789 | } 790 | layer { 791 | name: "inception_2b/3x3_reduce" 792 | type: "Convolution" 793 | bottom: "inception_2a/output" 794 | top: "inception_2b/3x3_reduce" 795 | param { 796 | lr_mult: 0 797 | decay_mult: 0 798 | } 799 | param { 800 | lr_mult: 0 801 | decay_mult: 0 802 | } 803 | convolution_param { 804 | num_output: 128 805 | kernel_size: 1 806 | weight_filler { 807 | type: "xavier" 808 | } 809 | bias_filler { 810 | type: "constant" 811 | value: 0.2 812 | } 813 | } 814 | } 815 | layer { 816 | name: "inception_2b/relu_3x3_reduce" 817 | type: "ReLU" 818 | bottom: "inception_2b/3x3_reduce" 819 | top: "inception_2b/3x3_reduce" 820 | } 821 | layer { 822 | name: "inception_2b/3x3" 823 | type: "Convolution" 824 | bottom: "inception_2b/3x3_reduce" 825 | top: "inception_2b/3x3" 826 | param { 827 | lr_mult: 0 828 | decay_mult: 0 829 | } 830 | param { 831 | lr_mult: 0 832 | decay_mult: 0 833 | } 834 | convolution_param { 835 | num_output: 128 836 | pad: 1 837 | kernel_size: 3 838 | stride: 2 839 | weight_filler { 840 | type: "xavier" 841 | } 842 | bias_filler { 843 | type: "constant" 844 | value: 0.2 845 | } 846 | } 847 | } 848 | layer { 849 | name: "inception_2b/relu_3x3" 850 | type: "ReLU" 851 | bottom: "inception_2b/3x3" 852 | top: "inception_2b/3x3" 853 | } 854 | layer { 855 | name: "inception_2b/double_3x3_reduce" 856 | type: "Convolution" 857 | bottom: "inception_2a/output" 858 | top: "inception_2b/double_3x3_reduce" 859 | param { 860 | lr_mult: 0 861 | decay_mult: 0 862 | } 863 | param { 864 | lr_mult: 0 865 | decay_mult: 0 866 | } 867 | convolution_param { 868 | num_output: 128 869 | kernel_size: 1 870 | weight_filler { 871 | type: "xavier" 872 | } 873 | bias_filler { 874 | type: "constant" 875 | value: 0.2 876 | } 877 | } 878 | } 879 | layer { 880 | name: "inception_2b/relu_double_3x3_reduce" 881 | type: "ReLU" 882 | bottom: "inception_2b/double_3x3_reduce" 883 | top: "inception_2b/double_3x3_reduce" 884 | } 885 | layer { 886 | name: "inception_2b/double_3x3_1" 887 | type: "Convolution" 888 | bottom: "inception_2b/double_3x3_reduce" 889 | top: "inception_2b/double_3x3_1" 890 | param { 891 | lr_mult: 0 892 | decay_mult: 0 893 | } 894 | param { 895 | lr_mult: 0 896 | decay_mult: 0 897 | } 898 | convolution_param { 899 | num_output: 128 900 | pad: 1 901 | kernel_size: 3 902 | weight_filler { 903 | type: "xavier" 904 | } 905 | bias_filler { 906 | type: "constant" 907 | value: 0.2 908 | } 909 | } 910 | } 911 | layer { 912 | name: "inception_2b/relu_double_3x3_1" 913 | type: "ReLU" 914 | bottom: "inception_2b/double_3x3_1" 915 | top: "inception_2b/double_3x3_1" 916 | } 917 | layer { 918 | name: "inception_2b/double_3x3_2" 919 | type: "Convolution" 920 | bottom: "inception_2b/double_3x3_1" 921 | top: "inception_2b/double_3x3_2" 922 | param { 923 | lr_mult: 0 924 | decay_mult: 0 925 | } 926 | param { 927 | lr_mult: 0 928 | decay_mult: 0 929 | } 930 | convolution_param { 931 | num_output: 128 932 | pad: 1 933 | kernel_size: 3 934 | stride: 2 935 | weight_filler { 936 | type: "xavier" 937 | } 938 | bias_filler { 939 | type: "constant" 940 | value: 0.2 941 | } 942 | } 943 | } 944 | layer { 945 | name: "inception_2b/relu_double_3x3_2" 946 | type: "ReLU" 947 | bottom: "inception_2b/double_3x3_2" 948 | top: "inception_2b/double_3x3_2" 949 | } 950 | layer { 951 | name: "inception_2b/pool" 952 | type: "Pooling" 953 | bottom: "inception_2a/output" 954 | top: "inception_2b/pool" 955 | pooling_param { 956 | pool: MAX 957 | kernel_size: 3 958 | stride: 2 959 | } 960 | } 961 | layer { 962 | name: "inception_2b/output" 963 | type: "Concat" 964 | bottom: "inception_2b/3x3" 965 | bottom: "inception_2b/double_3x3_2" 966 | bottom: "inception_2b/pool" 967 | top: "inception_2b/output" 968 | } 969 | layer { 970 | name: "inception_3a/1x1" 971 | type: "Convolution" 972 | bottom: "inception_2b/output" 973 | top: "inception_3a/1x1" 974 | param { 975 | lr_mult: 0 976 | decay_mult: 0 977 | } 978 | param { 979 | lr_mult: 0 980 | decay_mult: 0 981 | } 982 | convolution_param { 983 | num_output: 256 984 | kernel_size: 1 985 | weight_filler { 986 | type: "xavier" 987 | } 988 | bias_filler { 989 | type: "constant" 990 | value: 0.2 991 | } 992 | } 993 | } 994 | layer { 995 | name: "inception_3a/relu_1x1" 996 | type: "ReLU" 997 | bottom: "inception_3a/1x1" 998 | top: "inception_3a/1x1" 999 | } 1000 | layer { 1001 | name: "inception_3a/3x3_reduce" 1002 | type: "Convolution" 1003 | bottom: "inception_2b/output" 1004 | top: "inception_3a/3x3_reduce" 1005 | param { 1006 | lr_mult: 0 1007 | decay_mult: 0 1008 | } 1009 | param { 1010 | lr_mult: 0 1011 | decay_mult: 0 1012 | } 1013 | convolution_param { 1014 | num_output: 256 1015 | kernel_size: 1 1016 | weight_filler { 1017 | type: "xavier" 1018 | } 1019 | bias_filler { 1020 | type: "constant" 1021 | value: 0.2 1022 | } 1023 | } 1024 | } 1025 | layer { 1026 | name: "inception_3a/relu_3x3_reduce" 1027 | type: "ReLU" 1028 | bottom: "inception_3a/3x3_reduce" 1029 | top: "inception_3a/3x3_reduce" 1030 | } 1031 | layer { 1032 | name: "inception_3a/3x3" 1033 | type: "Convolution" 1034 | bottom: "inception_3a/3x3_reduce" 1035 | top: "inception_3a/3x3" 1036 | param { 1037 | lr_mult: 0 1038 | decay_mult: 0 1039 | } 1040 | param { 1041 | lr_mult: 0 1042 | decay_mult: 0 1043 | } 1044 | convolution_param { 1045 | num_output: 256 1046 | pad: 1 1047 | kernel_size: 3 1048 | weight_filler { 1049 | type: "xavier" 1050 | } 1051 | bias_filler { 1052 | type: "constant" 1053 | value: 0.2 1054 | } 1055 | } 1056 | } 1057 | layer { 1058 | name: "inception_3a/relu_3x3" 1059 | type: "ReLU" 1060 | bottom: "inception_3a/3x3" 1061 | top: "inception_3a/3x3" 1062 | } 1063 | layer { 1064 | name: "inception_3a/double_3x3_reduce" 1065 | type: "Convolution" 1066 | bottom: "inception_2b/output" 1067 | top: "inception_3a/double_3x3_reduce" 1068 | param { 1069 | lr_mult: 0 1070 | decay_mult: 0 1071 | } 1072 | param { 1073 | lr_mult: 0 1074 | decay_mult: 0 1075 | } 1076 | convolution_param { 1077 | num_output: 256 1078 | kernel_size: 1 1079 | weight_filler { 1080 | type: "xavier" 1081 | } 1082 | bias_filler { 1083 | type: "constant" 1084 | value: 0.2 1085 | } 1086 | } 1087 | } 1088 | layer { 1089 | name: "inception_3a/relu_double_3x3_reduce" 1090 | type: "ReLU" 1091 | bottom: "inception_3a/double_3x3_reduce" 1092 | top: "inception_3a/double_3x3_reduce" 1093 | } 1094 | layer { 1095 | name: "inception_3a/double_3x3_1" 1096 | type: "Convolution" 1097 | bottom: "inception_3a/double_3x3_reduce" 1098 | top: "inception_3a/double_3x3_1" 1099 | param { 1100 | lr_mult: 0 1101 | decay_mult: 0 1102 | } 1103 | param { 1104 | lr_mult: 0 1105 | decay_mult: 0 1106 | } 1107 | convolution_param { 1108 | num_output: 256 1109 | pad: 1 1110 | kernel_size: 3 1111 | weight_filler { 1112 | type: "xavier" 1113 | } 1114 | bias_filler { 1115 | type: "constant" 1116 | value: 0.2 1117 | } 1118 | } 1119 | } 1120 | layer { 1121 | name: "inception_3a/relu_double_3x3_1" 1122 | type: "ReLU" 1123 | bottom: "inception_3a/double_3x3_1" 1124 | top: "inception_3a/double_3x3_1" 1125 | } 1126 | layer { 1127 | name: "inception_3a/double_3x3_2" 1128 | type: "Convolution" 1129 | bottom: "inception_3a/double_3x3_1" 1130 | top: "inception_3a/double_3x3_2" 1131 | param { 1132 | lr_mult: 0 1133 | decay_mult: 0 1134 | } 1135 | param { 1136 | lr_mult: 0 1137 | decay_mult: 0 1138 | } 1139 | convolution_param { 1140 | num_output: 256 1141 | pad: 1 1142 | kernel_size: 3 1143 | weight_filler { 1144 | type: "xavier" 1145 | } 1146 | bias_filler { 1147 | type: "constant" 1148 | value: 0.2 1149 | } 1150 | } 1151 | } 1152 | layer { 1153 | name: "inception_3a/relu_double_3x3_2" 1154 | type: "ReLU" 1155 | bottom: "inception_3a/double_3x3_2" 1156 | top: "inception_3a/double_3x3_2" 1157 | } 1158 | layer { 1159 | name: "inception_3a/pool" 1160 | type: "Pooling" 1161 | bottom: "inception_2b/output" 1162 | top: "inception_3a/pool" 1163 | pooling_param { 1164 | pool: AVE 1165 | kernel_size: 3 1166 | stride: 1 1167 | pad: 1 1168 | } 1169 | } 1170 | layer { 1171 | name: "inception_3a/pool_proj" 1172 | type: "Convolution" 1173 | bottom: "inception_3a/pool" 1174 | top: "inception_3a/pool_proj" 1175 | param { 1176 | lr_mult: 0 1177 | decay_mult: 0 1178 | } 1179 | param { 1180 | lr_mult: 0 1181 | decay_mult: 0 1182 | } 1183 | convolution_param { 1184 | num_output: 256 1185 | kernel_size: 1 1186 | weight_filler { 1187 | type: "xavier" 1188 | } 1189 | bias_filler { 1190 | type: "constant" 1191 | value: 0.2 1192 | } 1193 | } 1194 | } 1195 | layer { 1196 | name: "inception_3a/relu_pool_proj" 1197 | type: "ReLU" 1198 | bottom: "inception_3a/pool_proj" 1199 | top: "inception_3a/pool_proj" 1200 | } 1201 | layer { 1202 | name: "inception_3a/output" 1203 | type: "Concat" 1204 | bottom: "inception_3a/1x1" 1205 | bottom: "inception_3a/3x3" 1206 | bottom: "inception_3a/double_3x3_2" 1207 | bottom: "inception_3a/pool_proj" 1208 | top: "inception_3a/output" 1209 | } 1210 | layer { 1211 | name: "inception_3b/3x3_reduce" 1212 | type: "Convolution" 1213 | bottom: "inception_3a/output" 1214 | top: "inception_3b/3x3_reduce" 1215 | param { 1216 | lr_mult: 0 1217 | decay_mult: 0 1218 | } 1219 | param { 1220 | lr_mult: 0 1221 | decay_mult: 0 1222 | } 1223 | convolution_param { 1224 | num_output: 256 1225 | kernel_size: 1 1226 | weight_filler { 1227 | type: "xavier" 1228 | } 1229 | bias_filler { 1230 | type: "constant" 1231 | value: 0.2 1232 | } 1233 | } 1234 | } 1235 | layer { 1236 | name: "inception_3b/relu_3x3_reduce" 1237 | type: "ReLU" 1238 | bottom: "inception_3b/3x3_reduce" 1239 | top: "inception_3b/3x3_reduce" 1240 | } 1241 | layer { 1242 | name: "inception_3b/3x3" 1243 | type: "Convolution" 1244 | bottom: "inception_3b/3x3_reduce" 1245 | top: "inception_3b/3x3" 1246 | param { 1247 | lr_mult: 0 1248 | decay_mult: 0 1249 | } 1250 | param { 1251 | lr_mult: 0 1252 | decay_mult: 0 1253 | } 1254 | convolution_param { 1255 | num_output: 256 1256 | pad: 1 1257 | kernel_size: 3 1258 | stride: 2 1259 | weight_filler { 1260 | type: "xavier" 1261 | } 1262 | bias_filler { 1263 | type: "constant" 1264 | value: 0.2 1265 | } 1266 | } 1267 | } 1268 | layer { 1269 | name: "inception_3b/relu_3x3" 1270 | type: "ReLU" 1271 | bottom: "inception_3b/3x3" 1272 | top: "inception_3b/3x3" 1273 | } 1274 | layer { 1275 | name: "inception_3b/double_3x3_reduce" 1276 | type: "Convolution" 1277 | bottom: "inception_3a/output" 1278 | top: "inception_3b/double_3x3_reduce" 1279 | param { 1280 | lr_mult: 0 1281 | decay_mult: 0 1282 | } 1283 | param { 1284 | lr_mult: 0 1285 | decay_mult: 0 1286 | } 1287 | convolution_param { 1288 | num_output: 256 1289 | kernel_size: 1 1290 | weight_filler { 1291 | type: "xavier" 1292 | } 1293 | bias_filler { 1294 | type: "constant" 1295 | value: 0.2 1296 | } 1297 | } 1298 | } 1299 | layer { 1300 | name: "inception_3b/relu_double_3x3_reduce" 1301 | type: "ReLU" 1302 | bottom: "inception_3b/double_3x3_reduce" 1303 | top: "inception_3b/double_3x3_reduce" 1304 | } 1305 | layer { 1306 | name: "inception_3b/double_3x3_1" 1307 | type: "Convolution" 1308 | bottom: "inception_3b/double_3x3_reduce" 1309 | top: "inception_3b/double_3x3_1" 1310 | param { 1311 | lr_mult: 0 1312 | decay_mult: 0 1313 | } 1314 | param { 1315 | lr_mult: 0 1316 | decay_mult: 0 1317 | } 1318 | convolution_param { 1319 | num_output: 256 1320 | pad: 1 1321 | kernel_size: 3 1322 | weight_filler { 1323 | type: "xavier" 1324 | } 1325 | bias_filler { 1326 | type: "constant" 1327 | value: 0.2 1328 | } 1329 | } 1330 | } 1331 | layer { 1332 | name: "inception_3b/relu_double_3x3_1" 1333 | type: "ReLU" 1334 | bottom: "inception_3b/double_3x3_1" 1335 | top: "inception_3b/double_3x3_1" 1336 | } 1337 | layer { 1338 | name: "inception_3b/double_3x3_2" 1339 | type: "Convolution" 1340 | bottom: "inception_3b/double_3x3_1" 1341 | top: "inception_3b/double_3x3_2" 1342 | param { 1343 | lr_mult: 0 1344 | decay_mult: 0 1345 | } 1346 | param { 1347 | lr_mult: 0 1348 | decay_mult: 0 1349 | } 1350 | convolution_param { 1351 | num_output: 256 1352 | pad: 1 1353 | kernel_size: 3 1354 | stride: 2 1355 | weight_filler { 1356 | type: "xavier" 1357 | } 1358 | bias_filler { 1359 | type: "constant" 1360 | value: 0.2 1361 | } 1362 | } 1363 | } 1364 | layer { 1365 | name: "inception_3b/relu_double_3x3_2" 1366 | type: "ReLU" 1367 | bottom: "inception_3b/double_3x3_2" 1368 | top: "inception_3b/double_3x3_2" 1369 | } 1370 | layer { 1371 | name: "inception_3b/pool" 1372 | type: "Pooling" 1373 | bottom: "inception_3a/output" 1374 | top: "inception_3b/pool" 1375 | pooling_param { 1376 | pool: MAX 1377 | kernel_size: 3 1378 | stride: 2 1379 | pad_h: 0 1380 | pad_w: 1 1381 | } 1382 | } 1383 | layer { 1384 | name: "inception_3b/output" 1385 | type: "Concat" 1386 | bottom: "inception_3b/3x3" 1387 | bottom: "inception_3b/double_3x3_2" 1388 | bottom: "inception_3b/pool" 1389 | top: "inception_3b/output" 1390 | } 1391 | layer { 1392 | name: "global_pool" 1393 | type: "Pooling" 1394 | bottom: "inception_3b/output" 1395 | top: "global_pool" 1396 | pooling_param { 1397 | pool: AVE 1398 | stride: 1 1399 | kernel_h: 9 1400 | kernel_w: 4 1401 | } 1402 | } 1403 | layer { 1404 | name: "fc7" 1405 | type: "InnerProduct" 1406 | bottom: "global_pool" 1407 | top: "fc7" 1408 | param { 1409 | lr_mult: 0 1410 | decay_mult: 0 1411 | } 1412 | param { 1413 | lr_mult: 0 1414 | decay_mult: 0 1415 | } 1416 | inner_product_param { 1417 | num_output: 256 1418 | weight_filler { 1419 | type: "xavier" 1420 | } 1421 | bias_filler { 1422 | type: "constant" 1423 | value: 0.0 1424 | } 1425 | } 1426 | } 1427 | layer { 1428 | name: "relu7" 1429 | type: "ReLU" 1430 | bottom: "fc7" 1431 | top: "fc7" 1432 | } 1433 | layer { 1434 | name: "drop7" 1435 | type: "Dropout" 1436 | bottom: "fc7" 1437 | top: "fc7" 1438 | dropout_param { 1439 | dropout_ratio: 0.5 1440 | } 1441 | } 1442 | 1443 | ####################################### loss accuracy ####################################### 1444 | layer { name: "fc8_ilids" type: "InnerProduct" bottom: "fc7" top: "fc8_ilids" 1445 | param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } 1446 | inner_product_param { num_output: 119 1447 | weight_filler { type: "gaussian" std: 0.001 } 1448 | bias_filler { type: "constant" value: 0 } } } 1449 | layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_ilids" bottom: "label" top: "loss" } 1450 | layer { name: "accuracy" type: "Accuracy" bottom: "fc8_ilids" bottom: "label" top: "accuracy" 1451 | include { phase: TEST } } -------------------------------------------------------------------------------- /models/fc_only/prid_trainval.prototxt: -------------------------------------------------------------------------------- 1 | name: "PRID" 2 | richness: 1000 3 | 4 | ##################################### data ##################################### 5 | layer { name: "data" type: "Data" top: "data" top: "label" 6 | data_param { backend: LMDB batch_size: 100 shuffle_pool_size: 10 7 | source: "external/exp/db/prid/train_lmdb" } 8 | transform_param { mirror: true crop_height: 144 crop_width: 56 9 | mean_value: 102 mean_value: 102 mean_value: 101 } 10 | include: { phase: TRAIN } } 11 | layer { name: "data" type: "Data" top: "data" top: "label" 12 | data_param { backend: LMDB batch_size: 20 13 | source: "external/exp/db/prid/val_lmdb" } 14 | transform_param { mirror: false crop_height: 144 crop_width: 56 15 | mean_value: 102 mean_value: 102 mean_value: 101 } 16 | include: { phase: TEST } } 17 | 18 | ################################ fixed layers ################################## 19 | layer { 20 | name: "conv1" 21 | type: "Convolution" 22 | bottom: "data" 23 | top: "conv1" 24 | param { 25 | lr_mult: 0 26 | decay_mult: 0 27 | } 28 | param { 29 | lr_mult: 0 30 | decay_mult: 0 31 | } 32 | convolution_param { 33 | num_output: 32 34 | pad: 1 35 | kernel_size: 3 36 | stride: 1 37 | weight_filler { 38 | type: "xavier" 39 | } 40 | bias_filler { 41 | type: "constant" 42 | value: 0.2 43 | } 44 | } 45 | } 46 | layer { 47 | name: "relu1" 48 | type: "ReLU" 49 | bottom: "conv1" 50 | top: "conv1" 51 | } 52 | layer { 53 | name: "conv2" 54 | type: "Convolution" 55 | bottom: "conv1" 56 | top: "conv2" 57 | param { 58 | lr_mult: 0 59 | decay_mult: 0 60 | } 61 | param { 62 | lr_mult: 0 63 | decay_mult: 0 64 | } 65 | convolution_param { 66 | num_output: 32 67 | pad: 1 68 | kernel_size: 3 69 | stride: 1 70 | weight_filler { 71 | type: "xavier" 72 | } 73 | bias_filler { 74 | type: "constant" 75 | value: 0.2 76 | } 77 | } 78 | } 79 | layer { 80 | name: "relu2" 81 | type: "ReLU" 82 | bottom: "conv2" 83 | top: "conv2" 84 | } 85 | layer { 86 | name: "conv3" 87 | type: "Convolution" 88 | bottom: "conv2" 89 | top: "conv3" 90 | param { 91 | lr_mult: 0 92 | decay_mult: 0 93 | } 94 | param { 95 | lr_mult: 0 96 | decay_mult: 0 97 | } 98 | convolution_param { 99 | num_output: 32 100 | pad: 1 101 | kernel_size: 3 102 | stride: 1 103 | weight_filler { 104 | type: "xavier" 105 | } 106 | bias_filler { 107 | type: "constant" 108 | value: 0.2 109 | } 110 | } 111 | } 112 | layer { 113 | name: "relu3" 114 | type: "ReLU" 115 | bottom: "conv3" 116 | top: "conv3" 117 | } 118 | layer { 119 | name: "pool1" 120 | type: "Pooling" 121 | bottom: "conv3" 122 | top: "pool1" 123 | pooling_param { 124 | pool: MAX 125 | kernel_size: 2 126 | stride: 2 127 | } 128 | } 129 | layer { 130 | name: "inception_1a/1x1" 131 | type: "Convolution" 132 | bottom: "pool1" 133 | top: "inception_1a/1x1" 134 | param { 135 | lr_mult: 0 136 | decay_mult: 0 137 | } 138 | param { 139 | lr_mult: 0 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 64 144 | kernel_size: 1 145 | weight_filler { 146 | type: "xavier" 147 | } 148 | bias_filler { 149 | type: "constant" 150 | value: 0.2 151 | } 152 | } 153 | } 154 | layer { 155 | name: "inception_1a/relu_1x1" 156 | type: "ReLU" 157 | bottom: "inception_1a/1x1" 158 | top: "inception_1a/1x1" 159 | } 160 | layer { 161 | name: "inception_1a/3x3_reduce" 162 | type: "Convolution" 163 | bottom: "pool1" 164 | top: "inception_1a/3x3_reduce" 165 | param { 166 | lr_mult: 0 167 | decay_mult: 0 168 | } 169 | param { 170 | lr_mult: 0 171 | decay_mult: 0 172 | } 173 | convolution_param { 174 | num_output: 64 175 | kernel_size: 1 176 | weight_filler { 177 | type: "xavier" 178 | } 179 | bias_filler { 180 | type: "constant" 181 | value: 0.2 182 | } 183 | } 184 | } 185 | layer { 186 | name: "inception_1a/relu_3x3_reduce" 187 | type: "ReLU" 188 | bottom: "inception_1a/3x3_reduce" 189 | top: "inception_1a/3x3_reduce" 190 | } 191 | layer { 192 | name: "inception_1a/3x3" 193 | type: "Convolution" 194 | bottom: "inception_1a/3x3_reduce" 195 | top: "inception_1a/3x3" 196 | param { 197 | lr_mult: 0 198 | decay_mult: 0 199 | } 200 | param { 201 | lr_mult: 0 202 | decay_mult: 0 203 | } 204 | convolution_param { 205 | num_output: 64 206 | pad: 1 207 | kernel_size: 3 208 | weight_filler { 209 | type: "xavier" 210 | } 211 | bias_filler { 212 | type: "constant" 213 | value: 0.2 214 | } 215 | } 216 | } 217 | layer { 218 | name: "inception_1a/relu_3x3" 219 | type: "ReLU" 220 | bottom: "inception_1a/3x3" 221 | top: "inception_1a/3x3" 222 | } 223 | layer { 224 | name: "inception_1a/double_3x3_reduce" 225 | type: "Convolution" 226 | bottom: "pool1" 227 | top: "inception_1a/double_3x3_reduce" 228 | param { 229 | lr_mult: 0 230 | decay_mult: 0 231 | } 232 | param { 233 | lr_mult: 0 234 | decay_mult: 0 235 | } 236 | convolution_param { 237 | num_output: 64 238 | kernel_size: 1 239 | weight_filler { 240 | type: "xavier" 241 | } 242 | bias_filler { 243 | type: "constant" 244 | value: 0.2 245 | } 246 | } 247 | } 248 | layer { 249 | name: "inception_1a/relu_double_3x3_reduce" 250 | type: "ReLU" 251 | bottom: "inception_1a/double_3x3_reduce" 252 | top: "inception_1a/double_3x3_reduce" 253 | } 254 | layer { 255 | name: "inception_1a/double_3x3_1" 256 | type: "Convolution" 257 | bottom: "inception_1a/double_3x3_reduce" 258 | top: "inception_1a/double_3x3_1" 259 | param { 260 | lr_mult: 0 261 | decay_mult: 0 262 | } 263 | param { 264 | lr_mult: 0 265 | decay_mult: 0 266 | } 267 | convolution_param { 268 | num_output: 64 269 | pad: 1 270 | kernel_size: 3 271 | weight_filler { 272 | type: "xavier" 273 | } 274 | bias_filler { 275 | type: "constant" 276 | value: 0.2 277 | } 278 | } 279 | } 280 | layer { 281 | name: "inception_1a/relu_double_3x3_1" 282 | type: "ReLU" 283 | bottom: "inception_1a/double_3x3_1" 284 | top: "inception_1a/double_3x3_1" 285 | } 286 | layer { 287 | name: "inception_1a/double_3x3_2" 288 | type: "Convolution" 289 | bottom: "inception_1a/double_3x3_1" 290 | top: "inception_1a/double_3x3_2" 291 | param { 292 | lr_mult: 0 293 | decay_mult: 0 294 | } 295 | param { 296 | lr_mult: 0 297 | decay_mult: 0 298 | } 299 | convolution_param { 300 | num_output: 64 301 | pad: 1 302 | kernel_size: 3 303 | weight_filler { 304 | type: "xavier" 305 | } 306 | bias_filler { 307 | type: "constant" 308 | value: 0.2 309 | } 310 | } 311 | } 312 | layer { 313 | name: "inception_1a/relu_double_3x3_2" 314 | type: "ReLU" 315 | bottom: "inception_1a/double_3x3_2" 316 | top: "inception_1a/double_3x3_2" 317 | } 318 | layer { 319 | name: "inception_1a/pool" 320 | type: "Pooling" 321 | bottom: "pool1" 322 | top: "inception_1a/pool" 323 | pooling_param { 324 | pool: AVE 325 | kernel_size: 3 326 | stride: 1 327 | pad: 1 328 | } 329 | } 330 | layer { 331 | name: "inception_1a/pool_proj" 332 | type: "Convolution" 333 | bottom: "inception_1a/pool" 334 | top: "inception_1a/pool_proj" 335 | param { 336 | lr_mult: 0 337 | decay_mult: 0 338 | } 339 | param { 340 | lr_mult: 0 341 | decay_mult: 0 342 | } 343 | convolution_param { 344 | num_output: 64 345 | kernel_size: 1 346 | weight_filler { 347 | type: "xavier" 348 | } 349 | bias_filler { 350 | type: "constant" 351 | value: 0.2 352 | } 353 | } 354 | } 355 | layer { 356 | name: "inception_1a/relu_pool_proj" 357 | type: "ReLU" 358 | bottom: "inception_1a/pool_proj" 359 | top: "inception_1a/pool_proj" 360 | } 361 | layer { 362 | name: "inception_1a/output" 363 | type: "Concat" 364 | bottom: "inception_1a/1x1" 365 | bottom: "inception_1a/3x3" 366 | bottom: "inception_1a/double_3x3_2" 367 | bottom: "inception_1a/pool_proj" 368 | top: "inception_1a/output" 369 | } 370 | layer { 371 | name: "inception_1b/3x3_reduce" 372 | type: "Convolution" 373 | bottom: "inception_1a/output" 374 | top: "inception_1b/3x3_reduce" 375 | param { 376 | lr_mult: 0 377 | decay_mult: 0 378 | } 379 | param { 380 | lr_mult: 0 381 | decay_mult: 0 382 | } 383 | convolution_param { 384 | num_output: 64 385 | kernel_size: 1 386 | weight_filler { 387 | type: "xavier" 388 | } 389 | bias_filler { 390 | type: "constant" 391 | value: 0.2 392 | } 393 | } 394 | } 395 | layer { 396 | name: "inception_1b/relu_3x3_reduce" 397 | type: "ReLU" 398 | bottom: "inception_1b/3x3_reduce" 399 | top: "inception_1b/3x3_reduce" 400 | } 401 | layer { 402 | name: "inception_1b/3x3" 403 | type: "Convolution" 404 | bottom: "inception_1b/3x3_reduce" 405 | top: "inception_1b/3x3" 406 | param { 407 | lr_mult: 0 408 | decay_mult: 0 409 | } 410 | param { 411 | lr_mult: 0 412 | decay_mult: 0 413 | } 414 | convolution_param { 415 | num_output: 64 416 | pad: 1 417 | kernel_size: 3 418 | stride: 2 419 | weight_filler { 420 | type: "xavier" 421 | } 422 | bias_filler { 423 | type: "constant" 424 | value: 0.2 425 | } 426 | } 427 | } 428 | layer { 429 | name: "inception_1b/relu_3x3" 430 | type: "ReLU" 431 | bottom: "inception_1b/3x3" 432 | top: "inception_1b/3x3" 433 | } 434 | layer { 435 | name: "inception_1b/double_3x3_reduce" 436 | type: "Convolution" 437 | bottom: "inception_1a/output" 438 | top: "inception_1b/double_3x3_reduce" 439 | param { 440 | lr_mult: 0 441 | decay_mult: 0 442 | } 443 | param { 444 | lr_mult: 0 445 | decay_mult: 0 446 | } 447 | convolution_param { 448 | num_output: 64 449 | kernel_size: 1 450 | weight_filler { 451 | type: "xavier" 452 | } 453 | bias_filler { 454 | type: "constant" 455 | value: 0.2 456 | } 457 | } 458 | } 459 | layer { 460 | name: "inception_1b/relu_double_3x3_reduce" 461 | type: "ReLU" 462 | bottom: "inception_1b/double_3x3_reduce" 463 | top: "inception_1b/double_3x3_reduce" 464 | } 465 | layer { 466 | name: "inception_1b/double_3x3_1" 467 | type: "Convolution" 468 | bottom: "inception_1b/double_3x3_reduce" 469 | top: "inception_1b/double_3x3_1" 470 | param { 471 | lr_mult: 0 472 | decay_mult: 0 473 | } 474 | param { 475 | lr_mult: 0 476 | decay_mult: 0 477 | } 478 | convolution_param { 479 | num_output: 64 480 | pad: 1 481 | kernel_size: 3 482 | weight_filler { 483 | type: "xavier" 484 | } 485 | bias_filler { 486 | type: "constant" 487 | value: 0.2 488 | } 489 | } 490 | } 491 | layer { 492 | name: "inception_1b/relu_double_3x3_1" 493 | type: "ReLU" 494 | bottom: "inception_1b/double_3x3_1" 495 | top: "inception_1b/double_3x3_1" 496 | } 497 | layer { 498 | name: "inception_1b/double_3x3_2" 499 | type: "Convolution" 500 | bottom: "inception_1b/double_3x3_1" 501 | top: "inception_1b/double_3x3_2" 502 | param { 503 | lr_mult: 0 504 | decay_mult: 0 505 | } 506 | param { 507 | lr_mult: 0 508 | decay_mult: 0 509 | } 510 | convolution_param { 511 | num_output: 64 512 | pad: 1 513 | kernel_size: 3 514 | stride: 2 515 | weight_filler { 516 | type: "xavier" 517 | } 518 | bias_filler { 519 | type: "constant" 520 | value: 0.2 521 | } 522 | } 523 | } 524 | layer { 525 | name: "inception_1b/relu_double_3x3_2" 526 | type: "ReLU" 527 | bottom: "inception_1b/double_3x3_2" 528 | top: "inception_1b/double_3x3_2" 529 | } 530 | layer { 531 | name: "inception_1b/pool" 532 | type: "Pooling" 533 | bottom: "inception_1a/output" 534 | top: "inception_1b/pool" 535 | pooling_param { 536 | pool: MAX 537 | kernel_size: 3 538 | stride: 2 539 | } 540 | } 541 | layer { 542 | name: "inception_1b/output" 543 | type: "Concat" 544 | bottom: "inception_1b/3x3" 545 | bottom: "inception_1b/double_3x3_2" 546 | bottom: "inception_1b/pool" 547 | top: "inception_1b/output" 548 | } 549 | layer { 550 | name: "inception_2a/1x1" 551 | type: "Convolution" 552 | bottom: "inception_1b/output" 553 | top: "inception_2a/1x1" 554 | param { 555 | lr_mult: 0 556 | decay_mult: 0 557 | } 558 | param { 559 | lr_mult: 0 560 | decay_mult: 0 561 | } 562 | convolution_param { 563 | num_output: 128 564 | kernel_size: 1 565 | weight_filler { 566 | type: "xavier" 567 | } 568 | bias_filler { 569 | type: "constant" 570 | value: 0.2 571 | } 572 | } 573 | } 574 | layer { 575 | name: "inception_2a/relu_1x1" 576 | type: "ReLU" 577 | bottom: "inception_2a/1x1" 578 | top: "inception_2a/1x1" 579 | } 580 | layer { 581 | name: "inception_2a/3x3_reduce" 582 | type: "Convolution" 583 | bottom: "inception_1b/output" 584 | top: "inception_2a/3x3_reduce" 585 | param { 586 | lr_mult: 0 587 | decay_mult: 0 588 | } 589 | param { 590 | lr_mult: 0 591 | decay_mult: 0 592 | } 593 | convolution_param { 594 | num_output: 128 595 | kernel_size: 1 596 | weight_filler { 597 | type: "xavier" 598 | } 599 | bias_filler { 600 | type: "constant" 601 | value: 0.2 602 | } 603 | } 604 | } 605 | layer { 606 | name: "inception_2a/relu_3x3_reduce" 607 | type: "ReLU" 608 | bottom: "inception_2a/3x3_reduce" 609 | top: "inception_2a/3x3_reduce" 610 | } 611 | layer { 612 | name: "inception_2a/3x3" 613 | type: "Convolution" 614 | bottom: "inception_2a/3x3_reduce" 615 | top: "inception_2a/3x3" 616 | param { 617 | lr_mult: 0 618 | decay_mult: 0 619 | } 620 | param { 621 | lr_mult: 0 622 | decay_mult: 0 623 | } 624 | convolution_param { 625 | num_output: 128 626 | pad: 1 627 | kernel_size: 3 628 | weight_filler { 629 | type: "xavier" 630 | } 631 | bias_filler { 632 | type: "constant" 633 | value: 0.2 634 | } 635 | } 636 | } 637 | layer { 638 | name: "inception_2a/relu_3x3" 639 | type: "ReLU" 640 | bottom: "inception_2a/3x3" 641 | top: "inception_2a/3x3" 642 | } 643 | layer { 644 | name: "inception_2a/double_3x3_reduce" 645 | type: "Convolution" 646 | bottom: "inception_1b/output" 647 | top: "inception_2a/double_3x3_reduce" 648 | param { 649 | lr_mult: 0 650 | decay_mult: 0 651 | } 652 | param { 653 | lr_mult: 0 654 | decay_mult: 0 655 | } 656 | convolution_param { 657 | num_output: 128 658 | kernel_size: 1 659 | weight_filler { 660 | type: "xavier" 661 | } 662 | bias_filler { 663 | type: "constant" 664 | value: 0.2 665 | } 666 | } 667 | } 668 | layer { 669 | name: "inception_2a/relu_double_3x3_reduce" 670 | type: "ReLU" 671 | bottom: "inception_2a/double_3x3_reduce" 672 | top: "inception_2a/double_3x3_reduce" 673 | } 674 | layer { 675 | name: "inception_2a/double_3x3_1" 676 | type: "Convolution" 677 | bottom: "inception_2a/double_3x3_reduce" 678 | top: "inception_2a/double_3x3_1" 679 | param { 680 | lr_mult: 0 681 | decay_mult: 0 682 | } 683 | param { 684 | lr_mult: 0 685 | decay_mult: 0 686 | } 687 | convolution_param { 688 | num_output: 128 689 | pad: 1 690 | kernel_size: 3 691 | weight_filler { 692 | type: "xavier" 693 | } 694 | bias_filler { 695 | type: "constant" 696 | value: 0.2 697 | } 698 | } 699 | } 700 | layer { 701 | name: "inception_2a/relu_double_3x3_1" 702 | type: "ReLU" 703 | bottom: "inception_2a/double_3x3_1" 704 | top: "inception_2a/double_3x3_1" 705 | } 706 | layer { 707 | name: "inception_2a/double_3x3_2" 708 | type: "Convolution" 709 | bottom: "inception_2a/double_3x3_1" 710 | top: "inception_2a/double_3x3_2" 711 | param { 712 | lr_mult: 0 713 | decay_mult: 0 714 | } 715 | param { 716 | lr_mult: 0 717 | decay_mult: 0 718 | } 719 | convolution_param { 720 | num_output: 128 721 | pad: 1 722 | kernel_size: 3 723 | weight_filler { 724 | type: "xavier" 725 | } 726 | bias_filler { 727 | type: "constant" 728 | value: 0.2 729 | } 730 | } 731 | } 732 | layer { 733 | name: "inception_2a/relu_double_3x3_2" 734 | type: "ReLU" 735 | bottom: "inception_2a/double_3x3_2" 736 | top: "inception_2a/double_3x3_2" 737 | } 738 | layer { 739 | name: "inception_2a/pool" 740 | type: "Pooling" 741 | bottom: "inception_1b/output" 742 | top: "inception_2a/pool" 743 | pooling_param { 744 | pool: AVE 745 | kernel_size: 3 746 | stride: 1 747 | pad: 1 748 | } 749 | } 750 | layer { 751 | name: "inception_2a/pool_proj" 752 | type: "Convolution" 753 | bottom: "inception_2a/pool" 754 | top: "inception_2a/pool_proj" 755 | param { 756 | lr_mult: 0 757 | decay_mult: 0 758 | } 759 | param { 760 | lr_mult: 0 761 | decay_mult: 0 762 | } 763 | convolution_param { 764 | num_output: 128 765 | kernel_size: 1 766 | weight_filler { 767 | type: "xavier" 768 | } 769 | bias_filler { 770 | type: "constant" 771 | value: 0.2 772 | } 773 | } 774 | } 775 | layer { 776 | name: "inception_2a/relu_pool_proj" 777 | type: "ReLU" 778 | bottom: "inception_2a/pool_proj" 779 | top: "inception_2a/pool_proj" 780 | } 781 | layer { 782 | name: "inception_2a/output" 783 | type: "Concat" 784 | bottom: "inception_2a/1x1" 785 | bottom: "inception_2a/3x3" 786 | bottom: "inception_2a/double_3x3_2" 787 | bottom: "inception_2a/pool_proj" 788 | top: "inception_2a/output" 789 | } 790 | layer { 791 | name: "inception_2b/3x3_reduce" 792 | type: "Convolution" 793 | bottom: "inception_2a/output" 794 | top: "inception_2b/3x3_reduce" 795 | param { 796 | lr_mult: 0 797 | decay_mult: 0 798 | } 799 | param { 800 | lr_mult: 0 801 | decay_mult: 0 802 | } 803 | convolution_param { 804 | num_output: 128 805 | kernel_size: 1 806 | weight_filler { 807 | type: "xavier" 808 | } 809 | bias_filler { 810 | type: "constant" 811 | value: 0.2 812 | } 813 | } 814 | } 815 | layer { 816 | name: "inception_2b/relu_3x3_reduce" 817 | type: "ReLU" 818 | bottom: "inception_2b/3x3_reduce" 819 | top: "inception_2b/3x3_reduce" 820 | } 821 | layer { 822 | name: "inception_2b/3x3" 823 | type: "Convolution" 824 | bottom: "inception_2b/3x3_reduce" 825 | top: "inception_2b/3x3" 826 | param { 827 | lr_mult: 0 828 | decay_mult: 0 829 | } 830 | param { 831 | lr_mult: 0 832 | decay_mult: 0 833 | } 834 | convolution_param { 835 | num_output: 128 836 | pad: 1 837 | kernel_size: 3 838 | stride: 2 839 | weight_filler { 840 | type: "xavier" 841 | } 842 | bias_filler { 843 | type: "constant" 844 | value: 0.2 845 | } 846 | } 847 | } 848 | layer { 849 | name: "inception_2b/relu_3x3" 850 | type: "ReLU" 851 | bottom: "inception_2b/3x3" 852 | top: "inception_2b/3x3" 853 | } 854 | layer { 855 | name: "inception_2b/double_3x3_reduce" 856 | type: "Convolution" 857 | bottom: "inception_2a/output" 858 | top: "inception_2b/double_3x3_reduce" 859 | param { 860 | lr_mult: 0 861 | decay_mult: 0 862 | } 863 | param { 864 | lr_mult: 0 865 | decay_mult: 0 866 | } 867 | convolution_param { 868 | num_output: 128 869 | kernel_size: 1 870 | weight_filler { 871 | type: "xavier" 872 | } 873 | bias_filler { 874 | type: "constant" 875 | value: 0.2 876 | } 877 | } 878 | } 879 | layer { 880 | name: "inception_2b/relu_double_3x3_reduce" 881 | type: "ReLU" 882 | bottom: "inception_2b/double_3x3_reduce" 883 | top: "inception_2b/double_3x3_reduce" 884 | } 885 | layer { 886 | name: "inception_2b/double_3x3_1" 887 | type: "Convolution" 888 | bottom: "inception_2b/double_3x3_reduce" 889 | top: "inception_2b/double_3x3_1" 890 | param { 891 | lr_mult: 0 892 | decay_mult: 0 893 | } 894 | param { 895 | lr_mult: 0 896 | decay_mult: 0 897 | } 898 | convolution_param { 899 | num_output: 128 900 | pad: 1 901 | kernel_size: 3 902 | weight_filler { 903 | type: "xavier" 904 | } 905 | bias_filler { 906 | type: "constant" 907 | value: 0.2 908 | } 909 | } 910 | } 911 | layer { 912 | name: "inception_2b/relu_double_3x3_1" 913 | type: "ReLU" 914 | bottom: "inception_2b/double_3x3_1" 915 | top: "inception_2b/double_3x3_1" 916 | } 917 | layer { 918 | name: "inception_2b/double_3x3_2" 919 | type: "Convolution" 920 | bottom: "inception_2b/double_3x3_1" 921 | top: "inception_2b/double_3x3_2" 922 | param { 923 | lr_mult: 0 924 | decay_mult: 0 925 | } 926 | param { 927 | lr_mult: 0 928 | decay_mult: 0 929 | } 930 | convolution_param { 931 | num_output: 128 932 | pad: 1 933 | kernel_size: 3 934 | stride: 2 935 | weight_filler { 936 | type: "xavier" 937 | } 938 | bias_filler { 939 | type: "constant" 940 | value: 0.2 941 | } 942 | } 943 | } 944 | layer { 945 | name: "inception_2b/relu_double_3x3_2" 946 | type: "ReLU" 947 | bottom: "inception_2b/double_3x3_2" 948 | top: "inception_2b/double_3x3_2" 949 | } 950 | layer { 951 | name: "inception_2b/pool" 952 | type: "Pooling" 953 | bottom: "inception_2a/output" 954 | top: "inception_2b/pool" 955 | pooling_param { 956 | pool: MAX 957 | kernel_size: 3 958 | stride: 2 959 | } 960 | } 961 | layer { 962 | name: "inception_2b/output" 963 | type: "Concat" 964 | bottom: "inception_2b/3x3" 965 | bottom: "inception_2b/double_3x3_2" 966 | bottom: "inception_2b/pool" 967 | top: "inception_2b/output" 968 | } 969 | layer { 970 | name: "inception_3a/1x1" 971 | type: "Convolution" 972 | bottom: "inception_2b/output" 973 | top: "inception_3a/1x1" 974 | param { 975 | lr_mult: 0 976 | decay_mult: 0 977 | } 978 | param { 979 | lr_mult: 0 980 | decay_mult: 0 981 | } 982 | convolution_param { 983 | num_output: 256 984 | kernel_size: 1 985 | weight_filler { 986 | type: "xavier" 987 | } 988 | bias_filler { 989 | type: "constant" 990 | value: 0.2 991 | } 992 | } 993 | } 994 | layer { 995 | name: "inception_3a/relu_1x1" 996 | type: "ReLU" 997 | bottom: "inception_3a/1x1" 998 | top: "inception_3a/1x1" 999 | } 1000 | layer { 1001 | name: "inception_3a/3x3_reduce" 1002 | type: "Convolution" 1003 | bottom: "inception_2b/output" 1004 | top: "inception_3a/3x3_reduce" 1005 | param { 1006 | lr_mult: 0 1007 | decay_mult: 0 1008 | } 1009 | param { 1010 | lr_mult: 0 1011 | decay_mult: 0 1012 | } 1013 | convolution_param { 1014 | num_output: 256 1015 | kernel_size: 1 1016 | weight_filler { 1017 | type: "xavier" 1018 | } 1019 | bias_filler { 1020 | type: "constant" 1021 | value: 0.2 1022 | } 1023 | } 1024 | } 1025 | layer { 1026 | name: "inception_3a/relu_3x3_reduce" 1027 | type: "ReLU" 1028 | bottom: "inception_3a/3x3_reduce" 1029 | top: "inception_3a/3x3_reduce" 1030 | } 1031 | layer { 1032 | name: "inception_3a/3x3" 1033 | type: "Convolution" 1034 | bottom: "inception_3a/3x3_reduce" 1035 | top: "inception_3a/3x3" 1036 | param { 1037 | lr_mult: 0 1038 | decay_mult: 0 1039 | } 1040 | param { 1041 | lr_mult: 0 1042 | decay_mult: 0 1043 | } 1044 | convolution_param { 1045 | num_output: 256 1046 | pad: 1 1047 | kernel_size: 3 1048 | weight_filler { 1049 | type: "xavier" 1050 | } 1051 | bias_filler { 1052 | type: "constant" 1053 | value: 0.2 1054 | } 1055 | } 1056 | } 1057 | layer { 1058 | name: "inception_3a/relu_3x3" 1059 | type: "ReLU" 1060 | bottom: "inception_3a/3x3" 1061 | top: "inception_3a/3x3" 1062 | } 1063 | layer { 1064 | name: "inception_3a/double_3x3_reduce" 1065 | type: "Convolution" 1066 | bottom: "inception_2b/output" 1067 | top: "inception_3a/double_3x3_reduce" 1068 | param { 1069 | lr_mult: 0 1070 | decay_mult: 0 1071 | } 1072 | param { 1073 | lr_mult: 0 1074 | decay_mult: 0 1075 | } 1076 | convolution_param { 1077 | num_output: 256 1078 | kernel_size: 1 1079 | weight_filler { 1080 | type: "xavier" 1081 | } 1082 | bias_filler { 1083 | type: "constant" 1084 | value: 0.2 1085 | } 1086 | } 1087 | } 1088 | layer { 1089 | name: "inception_3a/relu_double_3x3_reduce" 1090 | type: "ReLU" 1091 | bottom: "inception_3a/double_3x3_reduce" 1092 | top: "inception_3a/double_3x3_reduce" 1093 | } 1094 | layer { 1095 | name: "inception_3a/double_3x3_1" 1096 | type: "Convolution" 1097 | bottom: "inception_3a/double_3x3_reduce" 1098 | top: "inception_3a/double_3x3_1" 1099 | param { 1100 | lr_mult: 0 1101 | decay_mult: 0 1102 | } 1103 | param { 1104 | lr_mult: 0 1105 | decay_mult: 0 1106 | } 1107 | convolution_param { 1108 | num_output: 256 1109 | pad: 1 1110 | kernel_size: 3 1111 | weight_filler { 1112 | type: "xavier" 1113 | } 1114 | bias_filler { 1115 | type: "constant" 1116 | value: 0.2 1117 | } 1118 | } 1119 | } 1120 | layer { 1121 | name: "inception_3a/relu_double_3x3_1" 1122 | type: "ReLU" 1123 | bottom: "inception_3a/double_3x3_1" 1124 | top: "inception_3a/double_3x3_1" 1125 | } 1126 | layer { 1127 | name: "inception_3a/double_3x3_2" 1128 | type: "Convolution" 1129 | bottom: "inception_3a/double_3x3_1" 1130 | top: "inception_3a/double_3x3_2" 1131 | param { 1132 | lr_mult: 0 1133 | decay_mult: 0 1134 | } 1135 | param { 1136 | lr_mult: 0 1137 | decay_mult: 0 1138 | } 1139 | convolution_param { 1140 | num_output: 256 1141 | pad: 1 1142 | kernel_size: 3 1143 | weight_filler { 1144 | type: "xavier" 1145 | } 1146 | bias_filler { 1147 | type: "constant" 1148 | value: 0.2 1149 | } 1150 | } 1151 | } 1152 | layer { 1153 | name: "inception_3a/relu_double_3x3_2" 1154 | type: "ReLU" 1155 | bottom: "inception_3a/double_3x3_2" 1156 | top: "inception_3a/double_3x3_2" 1157 | } 1158 | layer { 1159 | name: "inception_3a/pool" 1160 | type: "Pooling" 1161 | bottom: "inception_2b/output" 1162 | top: "inception_3a/pool" 1163 | pooling_param { 1164 | pool: AVE 1165 | kernel_size: 3 1166 | stride: 1 1167 | pad: 1 1168 | } 1169 | } 1170 | layer { 1171 | name: "inception_3a/pool_proj" 1172 | type: "Convolution" 1173 | bottom: "inception_3a/pool" 1174 | top: "inception_3a/pool_proj" 1175 | param { 1176 | lr_mult: 0 1177 | decay_mult: 0 1178 | } 1179 | param { 1180 | lr_mult: 0 1181 | decay_mult: 0 1182 | } 1183 | convolution_param { 1184 | num_output: 256 1185 | kernel_size: 1 1186 | weight_filler { 1187 | type: "xavier" 1188 | } 1189 | bias_filler { 1190 | type: "constant" 1191 | value: 0.2 1192 | } 1193 | } 1194 | } 1195 | layer { 1196 | name: "inception_3a/relu_pool_proj" 1197 | type: "ReLU" 1198 | bottom: "inception_3a/pool_proj" 1199 | top: "inception_3a/pool_proj" 1200 | } 1201 | layer { 1202 | name: "inception_3a/output" 1203 | type: "Concat" 1204 | bottom: "inception_3a/1x1" 1205 | bottom: "inception_3a/3x3" 1206 | bottom: "inception_3a/double_3x3_2" 1207 | bottom: "inception_3a/pool_proj" 1208 | top: "inception_3a/output" 1209 | } 1210 | layer { 1211 | name: "inception_3b/3x3_reduce" 1212 | type: "Convolution" 1213 | bottom: "inception_3a/output" 1214 | top: "inception_3b/3x3_reduce" 1215 | param { 1216 | lr_mult: 0 1217 | decay_mult: 0 1218 | } 1219 | param { 1220 | lr_mult: 0 1221 | decay_mult: 0 1222 | } 1223 | convolution_param { 1224 | num_output: 256 1225 | kernel_size: 1 1226 | weight_filler { 1227 | type: "xavier" 1228 | } 1229 | bias_filler { 1230 | type: "constant" 1231 | value: 0.2 1232 | } 1233 | } 1234 | } 1235 | layer { 1236 | name: "inception_3b/relu_3x3_reduce" 1237 | type: "ReLU" 1238 | bottom: "inception_3b/3x3_reduce" 1239 | top: "inception_3b/3x3_reduce" 1240 | } 1241 | layer { 1242 | name: "inception_3b/3x3" 1243 | type: "Convolution" 1244 | bottom: "inception_3b/3x3_reduce" 1245 | top: "inception_3b/3x3" 1246 | param { 1247 | lr_mult: 0 1248 | decay_mult: 0 1249 | } 1250 | param { 1251 | lr_mult: 0 1252 | decay_mult: 0 1253 | } 1254 | convolution_param { 1255 | num_output: 256 1256 | pad: 1 1257 | kernel_size: 3 1258 | stride: 2 1259 | weight_filler { 1260 | type: "xavier" 1261 | } 1262 | bias_filler { 1263 | type: "constant" 1264 | value: 0.2 1265 | } 1266 | } 1267 | } 1268 | layer { 1269 | name: "inception_3b/relu_3x3" 1270 | type: "ReLU" 1271 | bottom: "inception_3b/3x3" 1272 | top: "inception_3b/3x3" 1273 | } 1274 | layer { 1275 | name: "inception_3b/double_3x3_reduce" 1276 | type: "Convolution" 1277 | bottom: "inception_3a/output" 1278 | top: "inception_3b/double_3x3_reduce" 1279 | param { 1280 | lr_mult: 0 1281 | decay_mult: 0 1282 | } 1283 | param { 1284 | lr_mult: 0 1285 | decay_mult: 0 1286 | } 1287 | convolution_param { 1288 | num_output: 256 1289 | kernel_size: 1 1290 | weight_filler { 1291 | type: "xavier" 1292 | } 1293 | bias_filler { 1294 | type: "constant" 1295 | value: 0.2 1296 | } 1297 | } 1298 | } 1299 | layer { 1300 | name: "inception_3b/relu_double_3x3_reduce" 1301 | type: "ReLU" 1302 | bottom: "inception_3b/double_3x3_reduce" 1303 | top: "inception_3b/double_3x3_reduce" 1304 | } 1305 | layer { 1306 | name: "inception_3b/double_3x3_1" 1307 | type: "Convolution" 1308 | bottom: "inception_3b/double_3x3_reduce" 1309 | top: "inception_3b/double_3x3_1" 1310 | param { 1311 | lr_mult: 0 1312 | decay_mult: 0 1313 | } 1314 | param { 1315 | lr_mult: 0 1316 | decay_mult: 0 1317 | } 1318 | convolution_param { 1319 | num_output: 256 1320 | pad: 1 1321 | kernel_size: 3 1322 | weight_filler { 1323 | type: "xavier" 1324 | } 1325 | bias_filler { 1326 | type: "constant" 1327 | value: 0.2 1328 | } 1329 | } 1330 | } 1331 | layer { 1332 | name: "inception_3b/relu_double_3x3_1" 1333 | type: "ReLU" 1334 | bottom: "inception_3b/double_3x3_1" 1335 | top: "inception_3b/double_3x3_1" 1336 | } 1337 | layer { 1338 | name: "inception_3b/double_3x3_2" 1339 | type: "Convolution" 1340 | bottom: "inception_3b/double_3x3_1" 1341 | top: "inception_3b/double_3x3_2" 1342 | param { 1343 | lr_mult: 0 1344 | decay_mult: 0 1345 | } 1346 | param { 1347 | lr_mult: 0 1348 | decay_mult: 0 1349 | } 1350 | convolution_param { 1351 | num_output: 256 1352 | pad: 1 1353 | kernel_size: 3 1354 | stride: 2 1355 | weight_filler { 1356 | type: "xavier" 1357 | } 1358 | bias_filler { 1359 | type: "constant" 1360 | value: 0.2 1361 | } 1362 | } 1363 | } 1364 | layer { 1365 | name: "inception_3b/relu_double_3x3_2" 1366 | type: "ReLU" 1367 | bottom: "inception_3b/double_3x3_2" 1368 | top: "inception_3b/double_3x3_2" 1369 | } 1370 | layer { 1371 | name: "inception_3b/pool" 1372 | type: "Pooling" 1373 | bottom: "inception_3a/output" 1374 | top: "inception_3b/pool" 1375 | pooling_param { 1376 | pool: MAX 1377 | kernel_size: 3 1378 | stride: 2 1379 | pad_h: 0 1380 | pad_w: 1 1381 | } 1382 | } 1383 | layer { 1384 | name: "inception_3b/output" 1385 | type: "Concat" 1386 | bottom: "inception_3b/3x3" 1387 | bottom: "inception_3b/double_3x3_2" 1388 | bottom: "inception_3b/pool" 1389 | top: "inception_3b/output" 1390 | } 1391 | layer { 1392 | name: "global_pool" 1393 | type: "Pooling" 1394 | bottom: "inception_3b/output" 1395 | top: "global_pool" 1396 | pooling_param { 1397 | pool: AVE 1398 | stride: 1 1399 | kernel_h: 9 1400 | kernel_w: 4 1401 | } 1402 | } 1403 | layer { 1404 | name: "fc7" 1405 | type: "InnerProduct" 1406 | bottom: "global_pool" 1407 | top: "fc7" 1408 | param { 1409 | lr_mult: 0 1410 | decay_mult: 0 1411 | } 1412 | param { 1413 | lr_mult: 0 1414 | decay_mult: 0 1415 | } 1416 | inner_product_param { 1417 | num_output: 256 1418 | weight_filler { 1419 | type: "xavier" 1420 | } 1421 | bias_filler { 1422 | type: "constant" 1423 | value: 0.0 1424 | } 1425 | } 1426 | } 1427 | layer { 1428 | name: "relu7" 1429 | type: "ReLU" 1430 | bottom: "fc7" 1431 | top: "fc7" 1432 | } 1433 | layer { 1434 | name: "drop7" 1435 | type: "Dropout" 1436 | bottom: "fc7" 1437 | top: "fc7" 1438 | dropout_param { 1439 | dropout_ratio: 0.5 1440 | } 1441 | } 1442 | 1443 | ####################################### loss accuracy ####################################### 1444 | layer { name: "fc8_prid" type: "InnerProduct" bottom: "fc7" top: "fc8_prid" 1445 | param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } 1446 | inner_product_param { num_output: 385 1447 | weight_filler { type: "gaussian" std: 0.001 } 1448 | bias_filler { type: "constant" value: 0 } } } 1449 | layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_prid" bottom: "label" top: "loss" } 1450 | layer { name: "accuracy" type: "Accuracy" bottom: "fc8_prid" bottom: "label" top: "accuracy" 1451 | include { phase: TEST } } 1452 | --------------------------------------------------------------------------------