├── .gitignore ├── .gitmodules ├── data ├── .gitignore └── README.md ├── experiments ├── .gitignore └── scripts │ ├── archive │ ├── collect_ibm_attributes_soft_labels.sh │ ├── ibm_simple_test.sh │ ├── test_cluster.sh │ ├── train_cls_branch_scratch.sh │ ├── train_joint_entropyloss.sh │ ├── train_joint_squareloss.sh │ ├── train_test_quad.sh │ ├── train_test_quad_smart.sh │ ├── traintest_binary.sh │ ├── traintest_ecm_dis.sh │ ├── traintest_ecm_sim.sh │ ├── traintest_hairhatbald.sh │ ├── traintest_ibm_attributes.sh │ └── traintest_randbin.sh │ ├── ccc.txt │ ├── ccc_baseline_single_celeba_submit.sh │ ├── ccc_submit_baselines.sh │ ├── ccc_submit_branching.sh │ ├── ccc_submit_deepfashion.sh │ ├── ccc_submit_regroup.sh │ ├── test_cls.sh │ ├── test_cls_round.sh │ ├── test_cls_round_topk.sh │ ├── test_cls_topk.sh │ ├── train_baseline_celeba.sh │ ├── train_baseline_deepfashion.sh │ ├── train_baseline_scratch_celeba.sh │ ├── train_baseline_scratch_deepfashion.sh │ ├── train_baseline_single_celeba.sh │ ├── train_branch_celeba.sh │ ├── train_branch_deepfashion.sh │ ├── train_branch_person.sh │ └── train_prototxt.sh ├── lib ├── README.md ├── __init__.py ├── datasets │ ├── IBMattributes.py │ ├── __init__.py │ ├── celeba.py │ ├── celeba_plus_webcam_cls.py │ ├── datasets.py │ ├── deepfashion.py │ ├── factory.py │ ├── imdb.py │ └── personattr.py ├── evaluation │ ├── __init__.py │ ├── cluster.py │ └── test.py ├── layers │ ├── __init__.py │ ├── classification_data.py │ ├── multilabel_data.py │ ├── multilabel_err.py │ └── singlelabel_data.py ├── models │ ├── __init__.py │ ├── branch_5-layer-second │ │ ├── test.prototxt │ │ └── train_val.prototxt │ ├── branch_5-layer │ │ ├── test.prototxt │ │ └── train_val.prototxt │ ├── default_5-layer │ │ ├── test.prototxt │ │ └── train_val.prototxt │ ├── factory.py │ ├── layer_helpers.py │ ├── model_io.py │ ├── models_low_rank.py │ ├── modulo_row.py │ └── netmodel.py ├── solvers │ ├── __init__.py │ ├── classification_sw.py │ └── solver.py └── utils │ ├── __init__.py │ ├── blob.py │ ├── config.py │ ├── convertBN.py │ ├── error.py │ ├── holder.py │ ├── log.py │ ├── loss.png │ ├── somp.py │ ├── svd.py │ ├── test_parse.txt │ └── timer.py ├── models ├── .gitignore ├── hairhatbald │ ├── solver.prototxt │ └── train_val.prototxt ├── joint_entropy_loss │ ├── solver.prototxt │ ├── test.prototxt │ └── train_val.prototxt └── joint_square_loss │ ├── solver.prototxt │ ├── test.prototxt │ └── train_val.prototxt └── tools ├── _init_paths.py ├── convert_bn.py ├── convert_model.py ├── ibm_simple_test.py ├── load_person.py ├── parse_log.py ├── parse_log_and_save.py ├── pixel_means.py ├── save_softlabels.py ├── test_cls.py ├── test_cluster.py └── train_cls.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *~ 3 | /*.sh 4 | data 5 | output 6 | caffe 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "caffe"] 2 | path = caffe 3 | url = https://github.com/BVLC/caffe 4 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | imdb* 2 | models* 3 | cache 4 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | All datasets starts with "imdb". 2 | All models starts with "models". 3 | Cache files used in training are stored in "cache". 4 | 5 | -------------------------------------------------------------------------------- /experiments/.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | -------------------------------------------------------------------------------- /experiments/scripts/archive/collect_ibm_attributes_soft_labels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | test_class=$1 4 | 5 | set -x 6 | set -e 7 | 8 | export PYTHONUNBUFFERED="True" 9 | 10 | LOG="../logs/soft_labels-$1.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 11 | exec &> >(tee -a "$LOG") 12 | echo Logging output to "$LOG" 13 | 14 | cd ../.. 15 | 16 | time ./tools/save_softlabels.py --gpu 0 \ 17 | --imdb IBMattributes_train \ 18 | --test_class $test_class -------------------------------------------------------------------------------- /experiments/scripts/archive/ibm_simple_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="../logs/ibm_simple_test.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | cd ../.. 13 | time ./tools/ibm_simple_test.py --gpu 0 \ 14 | --model data/imdb_IBMAttributes/Ethnicity/ethnicitySecondRound.prototxt \ 15 | --weights data/imdb_IBMAttributes/Ethnicity/ethnicitySecondRound.caffemodel \ 16 | --folders Asian Black White 17 | 18 | time ./tools/ibm_simple_test.py --gpu 0 \ 19 | --model data/imdb_IBMAttributes/HairHatBald/hairhatbaldSecondRound.prototxt \ 20 | --weights data/imdb_IBMAttributes/HairHatBald/hairhatbaldSecondRound.caffemodel \ 21 | --folders Bald Hat Hair 22 | 23 | time ./tools/ibm_simple_test.py --gpu 0 \ 24 | --model data/imdb_IBMAttributes/HairColor/haircolor.prototxt \ 25 | --weights data/imdb_IBMAttributes/HairColor/haircolor.caffemodel \ 26 | --folders Blackhair Blondehair 27 | 28 | time ./tools/ibm_simple_test.py --gpu 0 \ 29 | --model data/imdb_IBMAttributes/FacialHair/facialhair.prototxt \ 30 | --weights data/imdb_IBMAttributes/FacialHair/facialhair.caffemodel \ 31 | --folders FacialHair NoFacialHair 32 | 33 | time ./tools/ibm_simple_test.py --gpu 0 \ 34 | --model data/imdb_IBMAttributes/Glasses/SunEyeNoGlasses.prototxt \ 35 | --weights data/imdb_IBMAttributes/Glasses/SunEyeNoGlasses.caffemodel \ 36 | --folders NoGlasses SunGlasses VisionGlasses -------------------------------------------------------------------------------- /experiments/scripts/archive/test_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | num=$1 4 | method=$2 5 | 6 | set -x 7 | set -e 8 | 9 | export PYTHONUNBUFFERED="True" 10 | 11 | LOG="../logs/test_cluster_${num}_${method}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 12 | exec &> >(tee -a "$LOG") 13 | echo Logging output to "$LOG" 14 | 15 | cd ../.. 16 | time ./tools/test_cluster.py --gpu 0 \ 17 | --model /dccstor/luyo1/multi-task-output/narrow-low-vgg-16-binary-0/celeba_train/prototxt/test.prototxt \ 18 | --weights /dccstor/luyo1/multi-task-output/narrow-low-vgg-16-binary-0/celeba_train/celeba_binary-0_iter_20000.caffemodel \ 19 | --imdb celeba_val \ 20 | --n_cluster $num \ 21 | --method $method -------------------------------------------------------------------------------- /experiments/scripts/archive/train_cls_branch_scratch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | stepsize=$2 5 | base_lr=$3 6 | model=$4 7 | last_low_rank=$5 8 | rounds=$6 9 | aff_type=$7 10 | 11 | set -x 12 | set -e 13 | 14 | export PYTHONUNBUFFERED="True" 15 | 16 | LOG="../logs/train_dynamic_branch_scratch_${aff_type}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 17 | exec &> >(tee -a "$LOG") 18 | echo Logging output to "$LOG" 19 | 20 | cd ../.. 21 | 22 | time ./tools/train_cls.py --gpu 0 \ 23 | --traindb celeba_train \ 24 | --valdb celeba_val \ 25 | --iters ${iters} \ 26 | --base_lr ${base_lr} \ 27 | --clip_gradients 20 \ 28 | --loss Sigmoid \ 29 | --model ${model} \ 30 | --last_low_rank ${last_low_rank} \ 31 | --use_svd \ 32 | --exp ${model}-branch-scratch-${last_low_rank}-${aff_type} \ 33 | --num_rounds ${rounds} \ 34 | --stepsize ${stepsize} \ 35 | --aff_type ${aff_type} \ 36 | --share_basis \ 37 | --use_bn 38 | -------------------------------------------------------------------------------- /experiments/scripts/archive/train_joint_entropyloss.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="../logs/train_joint_entropyloss.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | cd ../.. 13 | time ./tools/train_cls.py --gpu 0 \ 14 | --solver models/joint_entropy_loss/solver.prototxt \ 15 | --weights data/pretrained/gender.caffemodel \ 16 | --traindb celeba_train \ 17 | --valdb celeba_val \ 18 | --iters 80000 \ 19 | --exp joint_entropy 20 | 21 | -------------------------------------------------------------------------------- /experiments/scripts/archive/train_joint_squareloss.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="../logs/train_joint_squareloss.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | cd ../.. 13 | time ./tools/train_multi.py --gpu 0 \ 14 | --solver models/joint_square_loss/solver.prototxt \ 15 | --weights data/pretrained/gender.caffemodel \ 16 | --traindb celeba_train \ 17 | --valdb celeba_val \ 18 | --exp joint_square \ 19 | --iters 80000 20 | -------------------------------------------------------------------------------- /experiments/scripts/archive/train_test_quad.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | model=$2 5 | cut_depth=$3 6 | 7 | set -x 8 | set -e 9 | 10 | export PYTHONUNBUFFERED="True" 11 | 12 | LOG="../logs/traintest_quad_${model}_${cut_depth}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 13 | exec &> >(tee -a "$LOG") 14 | echo Logging output to "$LOG" 15 | 16 | cd ../.. 17 | 18 | time ./tools/train_cls.py --gpu 0 \ 19 | --traindb celeba_train \ 20 | --valdb celeba_val \ 21 | --iters $iters \ 22 | --base_lr 0.001 \ 23 | --clip_gradients 20 \ 24 | --loss Sigmoid \ 25 | --model $model \ 26 | --last_low_rank 16 \ 27 | --use_svd \ 28 | --stepsize 8000 \ 29 | --exp $model-quad-$cut_depth \ 30 | --weights data/pretrained/gender.caffemodel \ 31 | --cut_depth ${cut_depth} \ 32 | --cut_points [[0,1,2,3,4,5,6,7,8,9],[10,11,12,13,14,15,16,17,18,19],[20,21,22,23,24,25,26,27,28,29],[30,31,32,33,34,35,36,37,38,39]] \ 33 | --share_basis \ 34 | --use_bn 35 | 36 | time ./tools/test_cls.py --gpu 0 \ 37 | --model output/$model-quad-${cut_depth}/celeba_train/prototxt/test.prototxt \ 38 | --weights output/$model-quad-${cut_depth}/celeba_train/celeba_quad-${cut_depth}_iter_${iters}.caffemodel \ 39 | --imdb celeba_test -------------------------------------------------------------------------------- /experiments/scripts/archive/train_test_quad_smart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | model=$2 5 | cut_depth=$3 6 | 7 | set -x 8 | set -e 9 | 10 | export PYTHONUNBUFFERED="True" 11 | 12 | LOG="../logs/traintest_quadsmart_${model}_${cut_depth}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 13 | exec &> >(tee -a "$LOG") 14 | echo Logging output to "$LOG" 15 | 16 | cd ../.. 17 | 18 | time ./tools/train_cls.py --gpu 0 \ 19 | --traindb celeba_train \ 20 | --valdb celeba_val \ 21 | --iters $iters \ 22 | --base_lr 0.001 \ 23 | --clip_gradients 20 \ 24 | --loss Sigmoid \ 25 | --model $model \ 26 | --last_low_rank 16 \ 27 | --use_svd \ 28 | --stepsize 8000 \ 29 | --exp $model-quadsmart-$cut_depth \ 30 | --weights data/pretrained/gender.caffemodel \ 31 | --cut_depth ${cut_depth} \ 32 | --cut_points [[0,4,5,8,9,10,12,13,14,15,16,17,19,20,21,22,23,24,26,28,29,30,31,35,38],[2,3,7,39],[25,32],[1,6,11,18,27,33,34,36,37]] \ 33 | --share_basis \ 34 | --use_bn 35 | 36 | time ./tools/test_cls.py --gpu 0 \ 37 | --model output/$model-quadsmart-${cut_depth}/celeba_train/prototxt/test.prototxt \ 38 | --weights output/$model-quadsmart-${cut_depth}/celeba_train/celeba_quadsmart-${cut_depth}_iter_${iters}.caffemodel \ 39 | --imdb celeba_test -------------------------------------------------------------------------------- /experiments/scripts/archive/traintest_binary.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | model=$2 5 | cut_depth=$3 6 | 7 | set -x 8 | set -e 9 | 10 | export PYTHONUNBUFFERED="True" 11 | 12 | LOG="../logs/traintest_binary_${model}_${cut_depth}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 13 | exec &> >(tee -a "$LOG") 14 | echo Logging output to "$LOG" 15 | 16 | cd ../.. 17 | 18 | time ./tools/train_cls.py --gpu 0 \ 19 | --traindb celeba_train \ 20 | --valdb celeba_val \ 21 | --iters $iters \ 22 | --base_lr 0.001 \ 23 | --clip_gradients 20 \ 24 | --loss Sigmoid \ 25 | --model $model \ 26 | --last_low_rank 16 \ 27 | --use_svd \ 28 | --stepsize 8000 \ 29 | --exp $model-binary-$cut_depth \ 30 | --weights data/pretrained/gender.caffemodel \ 31 | --cut_depth ${cut_depth} \ 32 | --cut_points [[0,1,4,6,9,14,16,19,20,21,22,24,25,29,30,31,32,33,35,38],[2,3,5,7,8,10,11,12,13,15,17,18,23,26,27,28,34,36,37,39]] \ 33 | --share_basis 34 | 35 | time ./tools/test_cls.py --gpu 0 \ 36 | --model output/$model-binary-${cut_depth}/celeba_train/prototxt/test.prototxt \ 37 | --weights output/$model-binary-${cut_depth}/celeba_train/celeba_binary-${cut_depth}_iter_${iters}.caffemodel \ 38 | --imdb celeba_test -------------------------------------------------------------------------------- /experiments/scripts/archive/traintest_ecm_dis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | model=$2 5 | cut_depth=$3 6 | 7 | set -x 8 | set -e 9 | 10 | export PYTHONUNBUFFERED="True" 11 | 12 | LOG="../logs/traintest_ecmdis_${model}_${cut_depth}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 13 | exec &> >(tee -a "$LOG") 14 | echo Logging output to "$LOG" 15 | 16 | cd ../.. 17 | 18 | time ./tools/train_cls.py --gpu 0 \ 19 | --traindb celeba_train \ 20 | --valdb celeba_val \ 21 | --iters $iters \ 22 | --base_lr 0.001 \ 23 | --clip_gradients 20 \ 24 | --loss Sigmoid \ 25 | --model $model \ 26 | --last_low_rank 16 \ 27 | --use_svd \ 28 | --stepsize 8000 \ 29 | --exp $model-ecmdis-$cut_depth \ 30 | --weights data/pretrained/gender.caffemodel \ 31 | --cut_depth ${cut_depth} \ 32 | --cut_points [[0,1,4,6,9,14,16,19,20,21,22,24,25,29,30,31,32,33,35,38],[2,3,5,7,8,10,11,12,13,15,17,18,23,26,27,28,34,36,37,39]] \ 33 | --share_basis 34 | 35 | time ./tools/test_cls.py --gpu 0 \ 36 | --model output/$model-ecmdis-${cut_depth}/celeba_train/prototxt/test.prototxt \ 37 | --weights output/$model-ecmdis-${cut_depth}/celeba_train/celeba_ecmdis-${cut_depth}_iter_${iters}.caffemodel \ 38 | --imdb celeba_test -------------------------------------------------------------------------------- /experiments/scripts/archive/traintest_ecm_sim.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | model=$2 5 | cut_depth=$3 6 | 7 | set -x 8 | set -e 9 | 10 | export PYTHONUNBUFFERED="True" 11 | 12 | LOG="../logs/traintest_ecmsim_${model}_${cut_depth}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 13 | exec &> >(tee -a "$LOG") 14 | echo Logging output to "$LOG" 15 | 16 | cd ../.. 17 | 18 | time ./tools/train_cls.py --gpu 0 \ 19 | --traindb celeba_train \ 20 | --valdb celeba_val \ 21 | --iters $iters \ 22 | --base_lr 0.001 \ 23 | --clip_gradients 20 \ 24 | --loss Sigmoid \ 25 | --model $model \ 26 | --last_low_rank 16 \ 27 | --use_svd \ 28 | --stepsize 8000 \ 29 | --exp $model-ecmsim-$cut_depth \ 30 | --weights data/pretrained/gender.caffemodel \ 31 | --cut_depth ${cut_depth} \ 32 | --cut_points [[1,2,6,11,18,25,27,32,33,34,37],[0,3,4,5,7,8,9,10,12,13,14,15,16,17,19,20,21,22,23,24,26,28,29,30,31,35,36,38,39]] \ 33 | --share_basis 34 | 35 | time ./tools/test_cls.py --gpu 0 \ 36 | --model output/$model-ecmsim-${cut_depth}/celeba_train/prototxt/test.prototxt \ 37 | --weights output/$model-ecmsim-${cut_depth}/celeba_train/celeba_ecmsim-${cut_depth}_iter_${iters}.caffemodel \ 38 | --imdb celeba_test -------------------------------------------------------------------------------- /experiments/scripts/archive/traintest_hairhatbald.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | last_low_rank=$1 4 | 5 | set -x 6 | set -e 7 | 8 | export PYTHONUNBUFFERED="True" 9 | 10 | LOG="../logs/traintest_hairhatbald-$1.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 11 | exec &> >(tee -a "$LOG") 12 | echo Logging output to "$LOG" 13 | 14 | cd ../.. 15 | 16 | time ./tools/train_cls.py --gpu 0 \ 17 | --traindb celeba_plus_webcam_cls_train \ 18 | --valdb celeba_plus_webcam_cls_val \ 19 | --iters 10000 \ 20 | --base_lr 0.001 \ 21 | --clip_gradients 20 \ 22 | --stepsize 6000 \ 23 | --model low-vgg-16 \ 24 | --loss Softmax \ 25 | --last_low_rank $1 \ 26 | --use_svd \ 27 | --exp low-vgg-16-hairhatbald-$1 \ 28 | --weights data/pretrained/hairhatbald.caffemodel \ 29 | --task_name singlelabel 30 | 31 | time ./tools/test_cls.py --gpu 0 \ 32 | --model output/low-vgg-16-hairhatbald-$1/celeba_plus_webcam_cls_train/prototxt/test.prototxt \ 33 | --weights output/low-vgg-16-hairhatbald-$1/celeba_plus_webcam_cls_train/low-vgg-16-hairhatbald-$1_iter_10000.caffemodel \ 34 | --task_name singlelabel \ 35 | --imdb celeba_plus_webcam_cls_val -------------------------------------------------------------------------------- /experiments/scripts/archive/traintest_ibm_attributes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | last_low_rank=$1 4 | 5 | set -x 6 | set -e 7 | 8 | export PYTHONUNBUFFERED="True" 9 | 10 | LOG="../logs/traintest_ibmattributes-$1.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 11 | exec &> >(tee -a "$LOG") 12 | echo Logging output to "$LOG" 13 | 14 | cd ../.. 15 | 16 | time ./tools/save_softlabels.py --gpu 0 \ 17 | --imdb IBMattributes_train 18 | 19 | time ./tools/train_cls.py --gpu 0 \ 20 | --traindb IBMattributes_train \ 21 | --valdb IBMattributes_val \ 22 | --iters 20000 \ 23 | --base_lr 0.001 \ 24 | --clip_gradients 20 \ 25 | --stepsize 16000 \ 26 | --model low-vgg-16 \ 27 | --loss Sigmoid \ 28 | --last_low_rank $1 \ 29 | --use_svd \ 30 | --exp low-vgg-16-ibmattributes-$1 \ 31 | --weights data/pretrained/gender.caffemodel \ 32 | 33 | time ./tools/test_cls.py --gpu 0 \ 34 | --model output/low-vgg-16-ibmattributes-$1/IBMattributes_train/prototxt/test.prototxt \ 35 | --weights output/low-vgg-16-ibmattributes-$1/IBMattributes_train/low-vgg-16-ibmattributes-$1_iter_20000.caffemodel \ 36 | --imdb IBMattributes_val -------------------------------------------------------------------------------- /experiments/scripts/archive/traintest_randbin.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | model=$2 5 | cut_depth=$3 6 | 7 | set -x 8 | set -e 9 | 10 | export PYTHONUNBUFFERED="True" 11 | 12 | LOG="../logs/traintest_randbin_${model}_${cut_depth}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 13 | exec &> >(tee -a "$LOG") 14 | echo Logging output to "$LOG" 15 | 16 | cd ../.. 17 | 18 | time ./tools/train_cls.py --gpu 0 \ 19 | --traindb celeba_train \ 20 | --valdb celeba_val \ 21 | --iters $iters \ 22 | --base_lr 0.001 \ 23 | --clip_gradients 20 \ 24 | --loss Sigmoid \ 25 | --model $model \ 26 | --last_low_rank 16 \ 27 | --use_svd \ 28 | --stepsize 8000 \ 29 | --exp $model-randbin-${cut_depth} \ 30 | --weights data/pretrained/gender.caffemodel \ 31 | --cut_depth ${cut_depth} \ 32 | --cut_points [[11,12,15,16,17,6,7,8,10,13,14,19,24,28,34,35,37,38],[0,1,2,3,4,5,9,18,20,21,22,23,25,26,27,29,30,31,32,33,36,39]] \ 33 | --share_basis 34 | 35 | time ./tools/test_cls.py --gpu 0 \ 36 | --model output/$model-randbin-${cut_depth}/celeba_train/prototxt/test.prototxt \ 37 | --weights output/$model-randbin-${cut_depth}/celeba_train/celeba_randbin-${cut_depth}_iter_${iters}.caffemodel \ 38 | --imdb celeba_test -------------------------------------------------------------------------------- /experiments/scripts/ccc.txt: -------------------------------------------------------------------------------- 1 | jbsub -mem 3g -mail -cores 1+1 -queue x86_short ./train_joint_entropyloss.sh 2 | 3 | 4 | # experiments with baseline using thin models 5 | ./train_baseline_celeba.sh 40000 16000 0.001 small32-lowvgg16 0 6 | ./train_baseline_deepfashion.sh 16000 20000 0.001 small32-lowvgg16 0 7 | 8 | # experiments with baseline using large models 9 | ./train_baseline_celeba.sh 40000 16000 0.001 lowvgg16 0 10 | ./train_baseline_deepfashion.sh 40000 16000 0.001 lowvgg16 0 11 | 12 | # experiments with single class 13 | ./train_baseline_single_celeba.sh 10000 8000 0.001 lowvgg16 0 ${cls_id} 14 | ./train_baseline_single_celeba.sh 10000 8000 0.001 small32-lowvgg16 0 ${cls_id} 15 | 16 | # experiments with baseline using low rank models 17 | ./train_baseline_celeba.sh 40000 16000 0.001 lowvgg16 16 18 | ./train_baseline_deepfashion.sh 40000 16000 0.001 lowvgg16 16 19 | 20 | # experiments with branching 21 | ./train_branch_celeba.sh 40000 16000 0.001 small32-lowvgg16 0 15 1000 1.0 22 | ./train_branch_deepfashion.sh 40000 16000 0.001 small32-lowvgg16 0 15 1000 1.0 23 | 24 | # experiments with joint dataset 25 | ./train_branch_personattr.sh 80000 32000 0.001 small32-lowvgg16 0 15 2000 1.0 26 | 27 | # TODO: for thin models, we probably could use more training iterations? -------------------------------------------------------------------------------- /experiments/scripts/ccc_baseline_single_celeba_submit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in `seq 0 39`; 4 | do 5 | jbsub -mem 10g -mail -cores 4+1 -queue x86_short ./train_baseline_single_celeba.sh 10000 8000 0.001 small32-lowvgg16 0 ${i} 6 | done 7 | 8 | for i in `seq 0 39`; 9 | do 10 | jbsub -mem 10g -mail -cores 4+1 -queue x86 ./train_baseline_single_celeba.sh 10000 8000 0.001 lowvgg16 0 ${i} 11 | done -------------------------------------------------------------------------------- /experiments/scripts/ccc_submit_baselines.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | experiments with baseline using thin models 4 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_baseline_celeba.sh 60000 20000 0.001 small32-lowvgg16 0 5 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_baseline_deepfashion.sh 60000 20000 0.001 small32-lowvgg16 0 6 | 7 | experiments with baseline using thin models (from scratch) 8 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_baseline_scratch_celeba.sh 60000 20000 0.001 small32-lowvgg16 0 9 | 10 | # experiments with baseline using large models 11 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_baseline_celeba.sh 40000 16000 0.001 lowvgg16 0 12 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_baseline_deepfashion.sh 40000 16000 0.001 lowvgg16 0 13 | 14 | # experiments with baseline using low rank models 15 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_baseline_celeba.sh 40000 16000 0.001 lowvgg16 16 16 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_baseline_deepfashion.sh 40000 16000 0.001 lowvgg16 16 17 | -------------------------------------------------------------------------------- /experiments/scripts/ccc_submit_branching.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments with branching on CelebA 4 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_celeba.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 1.0 5 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_celeba.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 2.0 6 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_celeba.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 3.0 7 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_celeba.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 4.0 8 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_celeba.sh 60000 20000 0.001 small64-lowvgg16 0 15 1000 1.0 9 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_celeba.sh 60000 20000 0.001 small64-lowvgg16 0 15 1000 0.0 10 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_celeba.sh 60000 20000 0.001 small64-lowvgg16 0 15 1000 0.5 11 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_celeba.sh 60000 20000 0.001 small64-lowvgg16 0 15 1000 2.0 12 | 13 | 14 | # # experiments with branching on DeepFashion 15 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 0.0 16 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 1.0 17 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 2.0 18 | # jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small64-lowvgg16 0 15 1000 2.0 19 | 20 | 21 | # # experiments with joint dataset 22 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_person.sh 100000 40000 0.001 small64-lowvgg16 0 15 2000 1.0 23 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_person.sh 100000 40000 0.001 small64-lowvgg16 0 15 2000 2.0 24 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_person.sh 100000 40000 0.001 small32-lowvgg16 0 15 2000 1.0 25 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_person.sh 100000 40000 0.001 small32-lowvgg16 0 15 2000 2.0 -------------------------------------------------------------------------------- /experiments/scripts/ccc_submit_deepfashion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # submit deepfashion experiments 4 | 5 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_baseline_deepfashion.sh 40000 16000 0.001 lowvgg16 16 6 | 7 | # # experiments with branching on DeepFashion 8 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 0.0 9 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 1.0 10 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 2.0 11 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 3.0 12 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small32-lowvgg16 0 15 1000 4.0 13 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small64-lowvgg16 0 15 1000 0.0 14 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small64-lowvgg16 0 15 1000 0.5 15 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small64-lowvgg16 0 15 1000 1.0 16 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_branch_deepfashion.sh 60000 20000 0.001 small64-lowvgg16 0 15 1000 2.0 17 | -------------------------------------------------------------------------------- /experiments/scripts/ccc_submit_regroup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_prototxt.sh 60000 output/celeba_small32-lowvgg16_0_15_1000_2.0/celeba_train/prototxt/round_11/solver.prototxt original [19,31,18,26,36,4,13,14,17,38,15,16,22,30,1,8,27,29,33,34,37,3,7,39,23,32,21,25,6,5,10,20,2,9,28,0,12,24,11,35] 4 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_prototxt.sh 60000 output/celeba_small32-lowvgg16_0_15_1000_2.0/celeba_train/prototxt/round_11/solver.prototxt rand1 [27,20,26,17,25,28,18,13,23,14,33,37,6,2,12,22,11,24,5,3,35,39,21,31,15,1,34,30,8,38,10,29,32,36,19,16,7,9,4,0] 5 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_prototxt.sh 60000 output/celeba_small32-lowvgg16_0_15_1000_2.0/celeba_train/prototxt/round_11/solver.prototxt rand2 [31,26,20,5,35,11,28,25,21,2,34,6,3,22,9,17,37,27,29,33,30,8,24,16,32,0,14,19,39,13,12,15,18,4,10,1,38,23,7,36] 6 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_prototxt.sh 60000 output/celeba_small32-lowvgg16_0_15_1000_2.0/celeba_train/prototxt/round_11/solver.prototxt rand3 [3,6,31,28,1,36,8,18,35,21,17,2,15,10,9,12,0,4,33,11,16,37,24,20,38,39,30,14,29,32,7,22,19,27,5,13,26,25,34,23] 7 | jbsub -mem 10g -cores 4+1 -queue x86 -mail ./train_prototxt.sh 60000 output/celeba_small32-lowvgg16_0_15_1000_2.0/celeba_train/prototxt/round_11/solver.prototxt inverse [37,3,7,39,23,32,21,25,6,5,10,20,2,9,28,0,12,24,11,35,19,31,18,26,36,4,13,14,17,38,15,16,22,30,1,8,27,29,33,34] -------------------------------------------------------------------------------- /experiments/scripts/test_cls.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | train_imdb=$1 4 | test_imdb=$2 5 | datapath=$3 6 | 7 | if [ $# -eq 3 ] ; then 8 | outputpath="output" 9 | elif [ $# -eq 4 ] ; then 10 | outputpath=$4 11 | fi 12 | 13 | set -x 14 | set -e 15 | 16 | export PYTHONUNBUFFERED="True" 17 | 18 | LOG="../logs/test_model_${train_imdb}_${test_imdb}_${datapath}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 19 | exec &> >(tee -a "$LOG") 20 | echo Logging output to "$LOG" 21 | 22 | cd ../.. 23 | 24 | time ./tools/test_cls.py --gpu 0 \ 25 | --model ${outputpath}/${datapath}/${train_imdb}/prototxt/test.prototxt \ 26 | --weights ${outputpath}/${datapath}/${train_imdb}/deploy.caffemodel \ 27 | --imdb ${test_imdb} -------------------------------------------------------------------------------- /experiments/scripts/test_cls_round.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | train_imdb=$1 4 | test_imdb=$2 5 | datapath=$3 6 | round=$4 7 | 8 | if [ $# -eq 4 ] ; then 9 | outputpath="output" 10 | elif [ $# -eq 5 ] ; then 11 | outputpath=$5 12 | fi 13 | 14 | set -x 15 | set -e 16 | 17 | export PYTHONUNBUFFERED="True" 18 | 19 | LOG="../logs/test_model_${train_imdb}_${test_imdb}_${datapath}_${round}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 20 | exec &> >(tee -a "$LOG") 21 | echo Logging output to "$LOG" 22 | 23 | cd ../.. 24 | 25 | time ./tools/test_cls.py --gpu 0 \ 26 | --model ${outputpath}/${datapath}/${train_imdb}/prototxt/round_${round}/test.prototxt \ 27 | --weights ${outputpath}/${datapath}/${train_imdb}/round_${round}_deploy.caffemodel \ 28 | --imdb ${test_imdb} -------------------------------------------------------------------------------- /experiments/scripts/test_cls_round_topk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | train_imdb=$1 4 | test_imdb=$2 5 | datapath=$3 6 | round=$4 7 | k=$5 8 | 9 | if [ $# -eq 5 ] ; then 10 | outputpath="output" 11 | elif [ $# -eq 6 ] ; then 12 | outputpath=$6 13 | fi 14 | 15 | set -x 16 | set -e 17 | 18 | export PYTHONUNBUFFERED="True" 19 | 20 | LOG="../logs/test_model_${train_imdb}_${test_imdb}_${datapath}_${round}_top${k}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 21 | exec &> >(tee -a "$LOG") 22 | echo Logging output to "$LOG" 23 | 24 | cd ../.. 25 | 26 | time ./tools/test_cls.py --gpu 0 \ 27 | --model ${outputpath}/${datapath}/${train_imdb}/prototxt/round_${round}/test.prototxt \ 28 | --weights ${outputpath}/${datapath}/${train_imdb}/round_${round}_deploy.caffemodel \ 29 | --metric top-${k} \ 30 | --imdb ${test_imdb} -------------------------------------------------------------------------------- /experiments/scripts/test_cls_topk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | train_imdb=$1 4 | test_imdb=$2 5 | datapath=$3 6 | k=$4 7 | 8 | if [ $# -eq 4 ] ; then 9 | outputpath="output" 10 | elif [ $# -eq 5 ] ; then 11 | outputpath=$5 12 | fi 13 | 14 | set -x 15 | set -e 16 | 17 | export PYTHONUNBUFFERED="True" 18 | 19 | LOG="../logs/test_model_${train_imdb}_${test_imdb}_${datapath}_top${k}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 20 | exec &> >(tee -a "$LOG") 21 | echo Logging output to "$LOG" 22 | 23 | cd ../.. 24 | 25 | time ./tools/test_cls.py --gpu 0 \ 26 | --model ${outputpath}/${datapath}/${train_imdb}/prototxt/test.prototxt \ 27 | --weights ${outputpath}/${datapath}/${train_imdb}/deploy.caffemodel \ 28 | --metric top-${k} \ 29 | --imdb ${test_imdb} -------------------------------------------------------------------------------- /experiments/scripts/train_baseline_celeba.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | stepsize=$2 5 | base_lr=$3 6 | model=$4 7 | last_low_rank=$5 8 | 9 | set -x 10 | set -e 11 | 12 | export PYTHONUNBUFFERED="True" 13 | 14 | LOG="../logs/train_baseline_celeba_${model}_${last_low_rank}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 15 | exec &> >(tee -a "$LOG") 16 | echo Logging output to "$LOG" 17 | 18 | cd ../.. 19 | 20 | time ./tools/train_cls.py --gpu 0 \ 21 | --traindb celeba_train \ 22 | --valdb celeba_val \ 23 | --iters ${iters} \ 24 | --base_lr ${base_lr} \ 25 | --clip_gradients 20 \ 26 | --loss Sigmoid \ 27 | --model ${model} \ 28 | --last_low_rank ${last_low_rank} \ 29 | --use_svd \ 30 | --exp celeba_baseline_${model}_${last_low_rank} \ 31 | --max_rounds 1 \ 32 | --stepsize ${stepsize} \ 33 | --weights data/pretrained/gender.caffemodel \ 34 | --use_bn -------------------------------------------------------------------------------- /experiments/scripts/train_baseline_deepfashion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | stepsize=$2 5 | base_lr=$3 6 | model=$4 7 | last_low_rank=$5 8 | 9 | set -x 10 | set -e 11 | 12 | export PYTHONUNBUFFERED="True" 13 | 14 | LOG="../logs/train_baseline_deepfashion_${model}_${last_low_rank}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 15 | exec &> >(tee -a "$LOG") 16 | echo Logging output to "$LOG" 17 | 18 | cd ../.. 19 | 20 | time ./tools/train_cls.py --gpu 0 \ 21 | --traindb deepfashion_train \ 22 | --valdb deepfashion_val \ 23 | --iters ${iters} \ 24 | --base_lr ${base_lr} \ 25 | --clip_gradients 20 \ 26 | --loss Sigmoid \ 27 | --model ${model} \ 28 | --last_low_rank ${last_low_rank} \ 29 | --use_svd \ 30 | --exp deepfashion_baseline_${model}_${last_low_rank} \ 31 | --max_rounds 1 \ 32 | --stepsize ${stepsize} \ 33 | --weights data/pretrained/gender.caffemodel \ 34 | --use_bn -------------------------------------------------------------------------------- /experiments/scripts/train_baseline_scratch_celeba.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | stepsize=$2 5 | base_lr=$3 6 | model=$4 7 | last_low_rank=$5 8 | 9 | set -x 10 | set -e 11 | 12 | export PYTHONUNBUFFERED="True" 13 | 14 | LOG="../logs/train_baseline_scratch_celeba_${model}_${last_low_rank}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 15 | exec &> >(tee -a "$LOG") 16 | echo Logging output to "$LOG" 17 | 18 | cd ../.. 19 | 20 | time ./tools/train_cls.py --gpu 0 \ 21 | --traindb celeba_train \ 22 | --valdb celeba_val \ 23 | --iters ${iters} \ 24 | --base_lr ${base_lr} \ 25 | --clip_gradients 20 \ 26 | --loss Sigmoid \ 27 | --model ${model} \ 28 | --last_low_rank ${last_low_rank} \ 29 | --use_svd \ 30 | --exp celeba_baseline_scratch_${model}_${last_low_rank} \ 31 | --max_rounds 1 \ 32 | --stepsize ${stepsize} \ 33 | --use_bn -------------------------------------------------------------------------------- /experiments/scripts/train_baseline_scratch_deepfashion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | stepsize=$2 5 | base_lr=$3 6 | model=$4 7 | last_low_rank=$5 8 | 9 | set -x 10 | set -e 11 | 12 | export PYTHONUNBUFFERED="True" 13 | 14 | LOG="../logs/train_baseline_scratch_deepfashion_${model}_${last_low_rank}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 15 | exec &> >(tee -a "$LOG") 16 | echo Logging output to "$LOG" 17 | 18 | cd ../.. 19 | 20 | time ./tools/train_cls.py --gpu 0 \ 21 | --traindb deepfashion_train \ 22 | --valdb deepfashion_val \ 23 | --iters ${iters} \ 24 | --base_lr ${base_lr} \ 25 | --clip_gradients 20 \ 26 | --loss Sigmoid \ 27 | --model ${model} \ 28 | --last_low_rank ${last_low_rank} \ 29 | --use_svd \ 30 | --exp deepfashion_baseline_scratch_${model}_${last_low_rank} \ 31 | --max_rounds 1 \ 32 | --stepsize ${stepsize} \ 33 | --use_bn -------------------------------------------------------------------------------- /experiments/scripts/train_baseline_single_celeba.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | stepsize=$2 5 | base_lr=$3 6 | model=$4 7 | last_low_rank=$5 8 | cls_id=$6 9 | 10 | set -x 11 | set -e 12 | 13 | export PYTHONUNBUFFERED="True" 14 | 15 | LOG="../logs/train_baseline_celeba_${model}_${last_low_rank}_${cls_id}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 16 | exec &> >(tee -a "$LOG") 17 | echo Logging output to "$LOG" 18 | 19 | cd ../.. 20 | 21 | time ./tools/train_cls.py --gpu 0 \ 22 | --traindb celeba_train \ 23 | --valdb celeba_val \ 24 | --iters ${iters} \ 25 | --base_lr ${base_lr} \ 26 | --clip_gradients 20 \ 27 | --loss Sigmoid \ 28 | --model ${model} \ 29 | --last_low_rank ${last_low_rank} \ 30 | --cls_id [${cls_id}] \ 31 | --use_svd \ 32 | --exp celeba_baseline_${model}_${last_low_rank}_${cls_id} \ 33 | --max_rounds 1 \ 34 | --stepsize ${stepsize} \ 35 | --weights data/pretrained/gender.caffemodel \ 36 | --use_bn -------------------------------------------------------------------------------- /experiments/scripts/train_branch_celeba.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | stepsize=$2 5 | base_lr=$3 6 | model=$4 7 | last_low_rank=$5 8 | max_rounds=$6 9 | max_stall=$7 10 | split_thresh=$8 11 | 12 | set -x 13 | set -e 14 | 15 | export PYTHONUNBUFFERED="True" 16 | 17 | LOG="../logs/train_branch_celeba_${model}_${last_low_rank}_${max_rounds}_${max_stall}_${split_thresh}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 18 | exec &> >(tee -a "$LOG") 19 | echo Logging output to "$LOG" 20 | 21 | cd ../.. 22 | 23 | time ./tools/train_cls.py --gpu 0 \ 24 | --traindb celeba_train \ 25 | --valdb celeba_val \ 26 | --iters ${iters} \ 27 | --base_lr ${base_lr} \ 28 | --clip_gradients 20 \ 29 | --loss Sigmoid \ 30 | --model ${model} \ 31 | --last_low_rank ${last_low_rank} \ 32 | --use_svd \ 33 | --exp celeba_${model}_${last_low_rank}_${max_rounds}_${max_stall}_${split_thresh} \ 34 | --max_rounds ${max_rounds} \ 35 | --stepsize ${stepsize} \ 36 | --weights data/pretrained/gender.caffemodel \ 37 | --share_basis \ 38 | --use_bn \ 39 | --max_stall ${max_stall} \ 40 | --split_thresh ${split_thresh} -------------------------------------------------------------------------------- /experiments/scripts/train_branch_deepfashion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | stepsize=$2 5 | base_lr=$3 6 | model=$4 7 | last_low_rank=$5 8 | max_rounds=$6 9 | max_stall=$7 10 | split_thresh=$8 11 | 12 | set -x 13 | set -e 14 | 15 | export PYTHONUNBUFFERED="True" 16 | 17 | LOG="../logs/train_branch_deepfashion_${model}_${last_low_rank}_${max_rounds}_${max_stall}_${split_thresh}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 18 | exec &> >(tee -a "$LOG") 19 | echo Logging output to "$LOG" 20 | 21 | cd ../.. 22 | 23 | time ./tools/train_cls.py --gpu 0 \ 24 | --traindb deepfashion_train \ 25 | --valdb deepfashion_val \ 26 | --iters ${iters} \ 27 | --base_lr ${base_lr} \ 28 | --clip_gradients 20 \ 29 | --loss Sigmoid \ 30 | --model ${model} \ 31 | --last_low_rank ${last_low_rank} \ 32 | --use_svd \ 33 | --exp deepfashion_${model}_${last_low_rank}_${max_rounds}_${max_stall}_${split_thresh} \ 34 | --max_rounds ${max_rounds} \ 35 | --stepsize ${stepsize} \ 36 | --weights data/pretrained/gender.caffemodel \ 37 | --share_basis \ 38 | --use_bn \ 39 | --max_stall ${max_stall} \ 40 | --split_thresh ${split_thresh} -------------------------------------------------------------------------------- /experiments/scripts/train_branch_person.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | stepsize=$2 5 | base_lr=$3 6 | model=$4 7 | last_low_rank=$5 8 | max_rounds=$6 9 | max_stall=$7 10 | split_thresh=$8 11 | 12 | set -x 13 | set -e 14 | 15 | export PYTHONUNBUFFERED="True" 16 | 17 | LOG="../logs/train_branch_person_${model}_${last_low_rank}_${max_rounds}_${max_stall}_${split_thresh}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 18 | exec &> >(tee -a "$LOG") 19 | echo Logging output to "$LOG" 20 | 21 | cd ../.. 22 | 23 | time ./tools/load_person.py --gpu 0 \ 24 | --model_face output/celeba_baseline_lowvgg16_0/celeba_train/prototxt/test.prototxt \ 25 | --weights_face output/celeba_baseline_lowvgg16_0/celeba_train/deploy.caffemodel \ 26 | --model_clothes output/deepfashion_baseline_lowvgg16_0/deepfashion_train/prototxt/test.prototxt \ 27 | --weights_clothes output/deepfashion_baseline_lowvgg16_0/deepfashion_train/deploy.caffemodel \ 28 | --imdb_face person_clothes_train \ 29 | --imdb_clothes person_face_train 30 | 31 | time ./tools/load_person.py --gpu 0 \ 32 | --model_face output/celeba_baseline_lowvgg16_0/celeba_train/prototxt/test.prototxt \ 33 | --weights_face output/celeba_baseline_lowvgg16_0/celeba_train/deploy.caffemodel \ 34 | --model_clothes output/deepfashion_baseline_lowvgg16_0/deepfashion_train/prototxt/test.prototxt \ 35 | --weights_clothes output/deepfashion_baseline_lowvgg16_0/deepfashion_train/deploy.caffemodel \ 36 | --imdb_face person_clothes_val \ 37 | --imdb_clothes person_face_val 38 | 39 | time ./tools/train_cls.py --gpu 0 \ 40 | --traindb person_train \ 41 | --valdb person_val \ 42 | --iters ${iters} \ 43 | --base_lr ${base_lr} \ 44 | --clip_gradients 20 \ 45 | --loss Sigmoid \ 46 | --model ${model} \ 47 | --last_low_rank ${last_low_rank} \ 48 | --use_svd \ 49 | --exp person_${model}_${last_low_rank}_${max_rounds}_${max_stall}_${split_thresh} \ 50 | --max_rounds ${max_rounds} \ 51 | --stepsize ${stepsize} \ 52 | --weights data/pretrained/gender.caffemodel \ 53 | --share_basis \ 54 | --use_bn \ 55 | --max_stall ${max_stall} \ 56 | --split_thresh ${split_thresh} -------------------------------------------------------------------------------- /experiments/scripts/train_prototxt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | iters=$1 4 | solver=$2 5 | exp_postfix=$3 6 | 7 | if [ $# -eq 3 ] ; then 8 | class_id=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39] 9 | elif [ $# -eq 4 ] ; then 10 | class_id=$4 11 | fi 12 | 13 | set -x 14 | set -e 15 | 16 | export PYTHONUNBUFFERED="True" 17 | 18 | LOG="../logs/train_branch_celeba_${exp_postfix}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 19 | exec &> >(tee -a "$LOG") 20 | echo Logging output to "$LOG" 21 | 22 | cd ../.. 23 | 24 | time ./tools/train_cls.py --gpu 0 \ 25 | --traindb celeba_train \ 26 | --valdb celeba_val \ 27 | --iters ${iters} \ 28 | --solver ${solver} \ 29 | --exp celeba_solver_${exp_postfix} \ 30 | --share_basis \ 31 | --cls_id ${class_id} \ 32 | --use_bn -------------------------------------------------------------------------------- /lib/README.md: -------------------------------------------------------------------------------- 1 | datasets: Dataset Manipulation 2 | train: Training and Data Preprocessing 3 | test: Testing and Analysis 4 | utils: Common Utilities 5 | 6 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------- 2 | # Written by Yongxi Lu 3 | #----------------------------------------- 4 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi 2 | 3 | import datasets 4 | import factory 5 | 6 | 7 | -------------------------------------------------------------------------------- /lib/datasets/celeba.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | # import base class 4 | from imdb import Imdb 5 | import numpy as np 6 | import os 7 | import cPickle 8 | from utils.error import compute_mle 9 | 10 | """ 11 | Class to manipulate CelebA dataset 12 | """ 13 | 14 | class CelebA(Imdb): 15 | """ Image database for CelebA dataset. """ 16 | 17 | def __init__(self, split, align=False): 18 | name = 'celeba_'+split 19 | if align is True: 20 | name += '_align' 21 | 22 | Imdb.__init__(self, name) 23 | 24 | # attribute classes 25 | self._classes = \ 26 | ['5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', 'Bags_Under_Eyes', 'Bald', 'Bangs', 'Big_Lips', 'Big_Nose', 'Black_Hair', 'Blond_Hair', 'Blurry', 'Brown_Hair', 'Bushy_Eyebrows', 'Chubby', 'Double_Chin', 'Eyeglasses', 'Goatee', 'Gray_Hair', 'Heavy_Makeup', 'High_Cheekbones', 'Male', 'Mouth_Slightly_Open', 'Mustache', 'Narrow_Eyes', 'No_Beard', 'Oval_Face', 'Pale_Skin', 'Pointy_Nose', 'Receding_Hairline', 'Rosy_Cheeks', 'Sideburns', 'Smiling', 'Straight_Hair', 'Wavy_Hair', 'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick', 'Wearing_Necklace', 'Wearing_Necktie', 'Young'] 27 | 28 | # load image paths and annotations 29 | self._data_path = os.path.join(self.data_path, 'imdb_CelebA') 30 | self._load_dataset(split, align) 31 | 32 | def _load_dataset(self, split, align): 33 | """ Load image path list and ground truths """ 34 | 35 | # load image path list and ground truths from the cache 36 | cache_file = os.path.join(self.cache_path, self.name+'_dbcache.pkl') 37 | 38 | if os.path.exists(cache_file): 39 | with open(cache_file, 'rb') as fid: 40 | dbcache = cPickle.load(fid) 41 | print '{} database cache loaded from {}'.format(self.name, cache_file) 42 | self._image_list = dbcache['image_list'] 43 | self._gtdb = dbcache['gtdb'] 44 | return 45 | 46 | # load list of images 47 | self._image_list, self._index_list = self._do_load_filelist(split, align) 48 | 49 | # load attributes and landmarks data 50 | self._gtdb = {'attr': np.zeros((self.num_images, self.num_classes), dtype=np.bool), 'lm': np.zeros((self.num_images, 10), dtype=np.float32)} 51 | self._gtdb['attr'] = self._do_load_attributes() 52 | self._gtdb['lm'] = self._do_load_landmarks(align) 53 | 54 | dbcache = {'image_list': self.image_list, 'gtdb': self.gtdb} 55 | # save to cache 56 | with open(cache_file, 'wb') as fid: 57 | cPickle.dump(dbcache, fid, cPickle.HIGHEST_PROTOCOL) 58 | print 'wrote database cache to {}'.format(cache_file) 59 | 60 | def _do_load_filelist(self, split, align): 61 | """ Return the absolute paths to image files """ 62 | file = os.path.join(self.data_path, 'Eval', 'list_eval_partition.txt') 63 | 64 | # determine the matching id 65 | if split == 'train': 66 | sp_idx = ['0'] 67 | elif split == 'val': 68 | sp_idx = ['1'] 69 | elif split == 'test': 70 | sp_idx = ['2'] 71 | elif split == 'trainval': 72 | sp_idx = ['0', '1'] 73 | else: 74 | raise NameError('Undefined Data Split: {}'.format(split)) 75 | 76 | # determine image folder 77 | if align: 78 | basepath = os.path.join(self.data_path, 'Img', 'img_align_celeba') 79 | else: 80 | basepath = os.path.join(self.data_path, 'Img', 'img_celeba') 81 | 82 | # find paths to all files 83 | image_list = [] 84 | idx_list = [] 85 | idx = 0 86 | with open(file, 'r') as fid: 87 | for line in fid: 88 | split = line.split() 89 | if split[1] in sp_idx: 90 | image_list.append(os.path.join(basepath, split[0])) 91 | idx_list.append(idx) 92 | idx = idx + 1 93 | 94 | return image_list, idx_list 95 | 96 | def _do_load_attributes(self): 97 | """ Load attributes of the listed images. """ 98 | file = os.path.join(self.data_path, 'Anno', 'list_attr_celeba.txt') 99 | 100 | attr = np.zeros((self.num_images, self.num_classes), dtype=np.bool) 101 | 102 | base_idx = min(self._index_list) 103 | end_idx = max(self._index_list) 104 | 105 | with open(file, 'r') as fid: 106 | for _ in xrange(2+base_idx): # skip the first two+base_idx lines 107 | next(fid) 108 | 109 | idx = 0 110 | for line in fid: 111 | split = line.split() 112 | if idx <= end_idx-base_idx: 113 | attr[idx, :] = np.array(split[1:], dtype=np.float32) > 0 114 | idx = idx + 1 115 | 116 | return attr 117 | 118 | def _do_load_landmarks(self, align): 119 | """ Load landmarks of the litsed images. """ 120 | if align: 121 | file = os.path.join(self.data_path, 'Anno', 'list_landmarks_align_celeba.txt') 122 | else: 123 | file = os.path.join(self.data_path, 'Anno', 'list_landmarks_celeba.txt') 124 | 125 | lm = np.zeros((self.num_images, 10), dtype=np.float32) 126 | 127 | base_idx = min(self._index_list) 128 | end_idx = max(self._index_list) 129 | 130 | with open(file, 'r') as fid: 131 | for _ in xrange(2+base_idx): # skip the first two+base_idx lines 132 | next(fid) 133 | 134 | idx = 0 135 | for line in fid: 136 | split = line.split() 137 | if idx <= end_idx-base_idx: 138 | lm[idx, :] = np.array(split[1:], dtype=np.float32) 139 | idx = idx + 1 140 | 141 | return lm 142 | 143 | def evaluate(self, scores, ind, cls_idx=None): 144 | """ Evaluation: Report classification error rate. 145 | "scores" is a (N x C) matrix, where N is the number of samples, 146 | C is the number of classes. C=len(cls_idx) if provided. 147 | "ind" is an array that index into result 148 | """ 149 | if cls_idx is None: 150 | cls_idx = np.arange(self.num_classes) 151 | 152 | gt = self.gtdb['attr'][ind, :] 153 | gt = gt[:, cls_idx] 154 | err = compute_mle(scores, gt) 155 | 156 | return err 157 | 158 | def print_info(self, i): 159 | """ Output information about the image and some ground truth. """ 160 | 161 | im_size = self.image_size(i) 162 | print 'The path of the image is: {}'.format(self.image_path_at(i)) 163 | print 'width: {}, height: {}'.format(im_size[0], im_size[1]) 164 | 165 | attr_i = self.gtdb['attr'][i, :] 166 | lm_i = self.gtdb['lm'][i, :] 167 | 168 | print 'The attributes are: {}'.format(','.join([self._classes[i] for i in np.where(attr_i==1)[0]])) 169 | print 'The landmarks points are: {}'.format(lm_i) 170 | -------------------------------------------------------------------------------- /lib/datasets/celeba_plus_webcam_cls.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | # import base class 4 | from imdb import Imdb 5 | import numpy as np 6 | import os 7 | import os.path as osp 8 | import cPickle 9 | 10 | """ 11 | Class to manipulate CelebA dataset 12 | """ 13 | 14 | class CelebA_Plus_Webcam_Cls(Imdb): 15 | """ CelebA+Webcam data for 3-way classificatoin. """ 16 | 17 | def __init__(self, split): 18 | name = 'celeba_plus_webcam_cls_'+split 19 | Imdb.__init__(self, name) 20 | 21 | # object classes 22 | self._classes = ['Bald', 'Hat', 'Hair'] 23 | 24 | # load image paths and annotations 25 | self._data_path = osp.join(self.data_path, 'imdb_CelebA+Webcam') 26 | self._load_dataset(split) 27 | 28 | def _load_dataset(self, split): 29 | """ Load image path list and ground truths """ 30 | 31 | # load image path list and ground truths from the cache 32 | cache_file = osp.join(self.cache_path, self.name+'_dbcache.pkl') 33 | 34 | if osp.exists(cache_file): 35 | with open(cache_file, 'rb') as fid: 36 | dbcache = cPickle.load(fid) 37 | print '{} database cache loaded from {}'.format(self.name, cache_file) 38 | self._image_list = dbcache['image_list'] 39 | self._gtdb = dbcache['gtdb'] 40 | return 41 | 42 | # load list of images 43 | self._image_list = self._do_load_filelist(split) 44 | # load ground truth labels 45 | self._gtdb = {'label': self._do_load_labels()} 46 | 47 | dbcache = {'image_list': self.image_list, 'gtdb': self.gtdb} 48 | # save to cache 49 | with open(cache_file, 'wb') as fid: 50 | cPickle.dump(dbcache, fid, cPickle.HIGHEST_PROTOCOL) 51 | print 'wrote database cache to {}'.format(cache_file) 52 | 53 | def _do_load_filelist(self, split): 54 | """ Return the absolute paths to image files """ 55 | 56 | # images are already separated into folders called "TrainingData" and "ValidationData" 57 | if split == 'train': 58 | base_folder = osp.join(self.data_path, 'FinalHairHatBaldBalanced', 'TrainingData') 59 | elif split == 'val': 60 | base_folder = osp.join(self.data_path, 'FinalHairHatBaldBalanced', 'ValidationData') 61 | 62 | image_list = [] 63 | for cls_idx in xrange(self.num_classes): 64 | src_folder = osp.join(base_folder, self.classes[cls_idx]) 65 | image_list.extend([osp.join(src_folder, fn) for fn in os.listdir(src_folder)]) 66 | 67 | return image_list 68 | 69 | def _do_load_labels(self): 70 | """ Load labels of the listed images. 71 | Due to the structure of this dataset, we can find 72 | labels by looking at the folder of the file in the filelist 73 | """ 74 | labels = np.zeros((self.num_images), dtype=np.int64) 75 | for i in xrange(self.num_images): 76 | labels[i] = self.classes.index(osp.basename(osp.dirname(self.image_path_at(i)))) 77 | 78 | return labels 79 | 80 | def evaluate(self, scores, ind, cls_idx=None): 81 | """ Evaluation: Report classificaiton accuracy. 82 | The scores is a matrix, where each row is a sample point 83 | and the columns are scores for each class. 84 | A classfication is correct if the argmax in the scores are 85 | in fact correct label. 86 | """ 87 | gt = self.gtdb['label'][ind] 88 | if cls_idx is not None: 89 | gt = cls_idx[gt] 90 | pred = np.argmax(scores, axis=1) 91 | acc = float(gt == pred) / len(ind) 92 | 93 | return acc 94 | 95 | def print_info(self, i): 96 | """ Output information about the image and some ground truth. """ 97 | 98 | im_size = self.image_size(i) 99 | print 'The path of the image is: {}'.format(self.image_path_at(i)) 100 | print 'width: {}, height: {}'.format(im_size[0], im_size[1]) 101 | 102 | label_i = self.gtdb['label'][i] 103 | print 'The label is {}'.format(self.classes[label_i]) -------------------------------------------------------------------------------- /lib/datasets/datasets.py: -------------------------------------------------------------------------------- 1 | """ Datasets """ 2 | 3 | import os.path as osp 4 | ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..') 5 | -------------------------------------------------------------------------------- /lib/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | # import base class 4 | from imdb import Imdb 5 | import numpy as np 6 | import os 7 | import cPickle 8 | from utils.error import compute_mle 9 | 10 | """Class to manipulate DeepFashion dataset """ 11 | 12 | class DeepFashion(Imdb): 13 | """ Image database for DeepFashion dataset. """ 14 | 15 | def __init__(self, split): 16 | name = 'deepfashion_'+split 17 | Imdb.__init__(self, name) 18 | 19 | # load image paths 20 | self._data_path = os.path.join(self.data_path, 'imdb_DeepFashion') 21 | 22 | # attribute classes 23 | self._classes = [] 24 | self._class_types = [] 25 | attr_file = os.path.join(self.data_path, 'Anno', 'list_category_cloth.txt') 26 | with open(attr_file, 'r') as fid: 27 | # skip first two lines 28 | next(fid) 29 | next(fid) 30 | # read class list 31 | for line in fid: 32 | parsed_line = line.split() 33 | self._classes.append(' '.join(parsed_line[:-1])) 34 | self._class_types.append(int(parsed_line[-1])) 35 | 36 | # load annotations 37 | self._load_dataset(split) 38 | 39 | def _load_dataset(self, split): 40 | """ Load image path list and ground truths """ 41 | 42 | # load image path list and ground truths from the cache 43 | cache_file = os.path.join(self.cache_path, self.name+'_dbcache.pkl') 44 | 45 | if os.path.exists(cache_file): 46 | with open(cache_file, 'rb') as fid: 47 | dbcache = cPickle.load(fid) 48 | print '{} database cache loaded from {}'.format(self.name, cache_file) 49 | self._image_list = dbcache['image_list'] 50 | self._gtdb = dbcache['gtdb'] 51 | return 52 | 53 | # load list of images 54 | self._image_list, self._index_list = self._do_load_filelist(split) 55 | 56 | # load attributes and landmarks data 57 | self._gtdb = {'attr': np.zeros((self.num_images, self.num_classes), dtype=np.bool)} 58 | self._gtdb['attr'] = self._do_load_attributes() 59 | 60 | dbcache = {'image_list': self.image_list, 'gtdb': self.gtdb} 61 | # save to cache 62 | with open(cache_file, 'wb') as fid: 63 | cPickle.dump(dbcache, fid, cPickle.HIGHEST_PROTOCOL) 64 | print 'wrote database cache to {}'.format(cache_file) 65 | 66 | def _do_load_filelist(self, split): 67 | """ Return the absolute paths to image files """ 68 | file = os.path.join(self.data_path, 'Eval', 'list_eval_partition.txt') 69 | # list of keywords associated with the desired partition 70 | if split == 'train': 71 | sp_idx = ['train'] 72 | elif split == 'val': 73 | sp_idx = ['val'] 74 | elif split == 'test': 75 | sp_idx = ['test'] 76 | elif split == 'trainval': 77 | sp_idx = ['train', 'val'] 78 | else: 79 | raise NameError('Undefined Data Split: {}'.format(split)) 80 | 81 | basepath = self.data_path 82 | 83 | # find paths to all files 84 | image_list = [] 85 | idx_list = [] 86 | idx = 0 87 | with open(file, 'r') as fid: 88 | # skip first two lines 89 | next(fid) 90 | next(fid) 91 | # parse the ground truth file 92 | for line in fid: 93 | split = line.split() 94 | if split[1] in sp_idx: 95 | image_list.append(os.path.join(basepath, split[0])) 96 | idx_list.append(idx) 97 | idx = idx + 1 98 | 99 | return image_list, idx_list 100 | 101 | def _do_load_attributes(self): 102 | """ Load attributes of the listed images. """ 103 | file = os.path.join(self.data_path, 'Anno', 'list_category_img.txt') 104 | 105 | attr = np.zeros((self.num_images, self.num_classes), dtype=np.bool) 106 | 107 | 108 | with open(file, 'r') as fid: 109 | # skip the first two lines 110 | next(fid) 111 | next(fid) 112 | # parse the ground truth file 113 | for line in fid: 114 | split = line.split() 115 | name_i = os.path.join(self.data_path, split[0]) 116 | idx = [i for i in xrange(self.num_images) if self._image_list[i]==name_i] 117 | if len(idx) > 0: 118 | cls_id = int(split[1]) - 1 119 | attr[idx[0], cls_id] = 1.0 120 | # attr[idx[0], :] = np.array(split[1:], dtype=np.float32) > 0 121 | return attr 122 | 123 | def evaluate(self, scores, ind, cls_idx=None): 124 | """ Evaluation: Report classification error rate. 125 | "scores" is a (N x C) matrix, where N is the number of samples, 126 | C is the number of classes. C=len(cls_idx) if provided. 127 | "ind" is an array that index into result 128 | """ 129 | if cls_idx is None: 130 | cls_idx = np.arange(self.num_classes) 131 | 132 | gt = self.gtdb['attr'][ind, :] 133 | gt = gt[:, cls_idx] 134 | err = compute_mle(scores, gt) 135 | 136 | return err 137 | 138 | def print_info(self, i): 139 | """ Output information about the image and some ground truth. """ 140 | 141 | im_size = self.image_size(i) 142 | print 'The path of the image is: {}'.format(self.image_path_at(i)) 143 | print 'width: {}, height: {}'.format(im_size[0], im_size[1]) 144 | 145 | attr_i = self.gtdb['attr'][i, :] 146 | print 'The attributes are: {}'.format(','.join([self._classes[i] for i in np.where(attr_i==1)[0]])) 147 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Written by Yongxi Lu 4 | 5 | """ 6 | Factory method for easily getting imdbs by name. 7 | """ 8 | 9 | # import utilities 10 | import sys 11 | import os.path as osp 12 | import numpy.random as npr 13 | 14 | if __name__ == '__main__': 15 | def add_path(path): 16 | if path not in sys.path: 17 | sys.path.insert(0, path) 18 | 19 | this_dir = osp.dirname(__file__) 20 | # Add utils to PYTHONPATH 21 | lib_path = osp.join(this_dir, '..', '..', 'lib') 22 | add_path(lib_path) 23 | dataset_path = osp.join(this_dir) 24 | add_path(dataset_path) 25 | 26 | # import datasets 27 | from celeba import CelebA 28 | from celeba_plus_webcam_cls import CelebA_Plus_Webcam_Cls 29 | from IBMattributes import IBMAttributes 30 | from deepfashion import DeepFashion 31 | from personattr import PersonAttributes 32 | 33 | # dataset functor 34 | __sets = {} 35 | 36 | # PersonAttributes dataset 37 | for split in ['train', 'val', 'trainval']: 38 | name = 'person_{}'.format(split) 39 | __sets[name] = (lambda split=split: 40 | PersonAttributes(split)) 41 | 42 | # PersonAttributes dataset (align) 43 | for split in ['train', 'val', 'trainval']: 44 | name = 'person_{}_align'.format(split) 45 | __sets[name] = (lambda split=split: 46 | PersonAttributes(split, align=True)) 47 | 48 | # PersonAttributes dataset (face partition) 49 | for split in ['train', 'val', 'trainval', 'test']: 50 | name = 'person_face_{}'.format(split) 51 | __sets[name] = (lambda split=split: 52 | PersonAttributes(split, partition='face')) 53 | 54 | # PersonAttributes dataset (face partition) 55 | for split in ['train', 'val', 'trainval', 'test']: 56 | name = 'person_face_{}_align'.format(split) 57 | __sets[name] = (lambda split=split: 58 | PersonAttributes(split, align=True, partition='face')) 59 | 60 | # PersonAttributes dataset (clothes partition) 61 | for split in ['train', 'val', 'trainval', 'test']: 62 | name = 'person_clothes_{}'.format(split) 63 | __sets[name] = (lambda split=split: 64 | PersonAttributes(split, partition='clothes')) 65 | 66 | # setup DeepFashion dataset 67 | for split in ['train', 'val', 'test', 'trainval']: 68 | name = 'deepfashion_{}'.format(split) 69 | __sets[name] = (lambda split=split: 70 | DeepFashion(split)) 71 | # setup CelebA dataset 72 | for split in ['train', 'val', 'test', 'trainval']: 73 | name = 'celeba_{}'.format(split) 74 | __sets[name] = (lambda split=split: 75 | CelebA(split)) 76 | 77 | # setup CelebA (aligned) dataset 78 | for split in ['train', 'val', 'test', 'trainval']: 79 | name = 'celeba_{}_align'.format(split) 80 | __sets[name] = (lambda split=split: 81 | CelebA(split, align=True)) 82 | 83 | # setup CelebA+Webcam dataset 84 | for split in ['train', 'val']: 85 | name = 'celeba_plus_webcam_cls_{}'.format(split) 86 | __sets[name] = (lambda split=split: 87 | CelebA_Plus_Webcam_Cls(split)) 88 | 89 | # setup IBMattributes dataset 90 | for split in ['train', 'val']: 91 | name = 'IBMattributes_{}'.format(split) 92 | __sets[name] = (lambda split=split: 93 | IBMAttributes(split)) 94 | 95 | def get_imdb(name): 96 | """ Get an imdb (image database) by name.""" 97 | if not __sets.has_key(name): 98 | raise KeyError('Unknown dataset: {}'.format(name)) 99 | return __sets[name]() 100 | 101 | def list_imdbs(): 102 | """ List all registred imdbs.""" 103 | return __sets.keys() 104 | 105 | 106 | if __name__ == '__main__': 107 | 108 | if len(sys.argv) != 2: 109 | print 'Usage: ./factory.py imdb-name' 110 | sys.exit(1) 111 | 112 | imdb = get_imdb(sys.argv[1]) 113 | 114 | # print out dataset name and confirm the number of classes is correct 115 | print 'dataset name: {}'.format(imdb.name) 116 | print 'number of classes {}'.format(imdb.num_classes) 117 | print 'number of images {}'.format(imdb.num_images) 118 | print 'cache path: {}'.format(imdb.cache_path) 119 | print 'data path: {}'.format(imdb.data_path) 120 | 121 | # check few random examples 122 | idx = npr.choice(imdb.num_images, size=5, replace=False) 123 | print 'Please check against the dataset to see if the following printed information is correct...' 124 | for i in idx: 125 | imdb.print_info(i) 126 | -------------------------------------------------------------------------------- /lib/datasets/imdb.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | import cv2 4 | import os.path as osp 5 | import os 6 | import datasets 7 | 8 | class Imdb(object): 9 | """ Image database.""" 10 | 11 | def __init__(self, name): 12 | self._name = name 13 | self._classes = [] 14 | self._image_list = [] 15 | self._gtdb = {} 16 | self._data_path = osp.abspath(osp.join(datasets.ROOT_DIR, 'data')) 17 | # Options specific to a particular datasets 18 | self.config = {} 19 | 20 | @property 21 | def name(self): 22 | return self._name 23 | 24 | @property 25 | def num_classes(self): 26 | return len(self._classes) 27 | 28 | @property 29 | def classes(self): 30 | return self._classes 31 | 32 | @property 33 | def gtdb(self): 34 | return self._gtdb 35 | 36 | @property 37 | def image_list(self): 38 | return self._image_list 39 | 40 | @property 41 | def num_images(self): 42 | return len(self._image_list) 43 | 44 | def image_size(self, i): 45 | """ (width, height) """ 46 | assert (i>=0 and i 0: 65 | self._gtdb['attr'][:celeba_num, self._face_class_idx] = self._face.gtdb['attr'] 66 | # load soft labels for clothes attributes on celeba 67 | if align: 68 | fn = osp.join(self.data_path, 'person_'+'face'+'_'+split+'_align.pkl') 69 | else: 70 | fn = osp.join(self.data_path, 'person_'+'face'+'_'+split+'.pkl') 71 | if osp.exists(fn): 72 | if partition == 'all': 73 | with open(fn, 'rb') as fid: 74 | labels = cPickle.load(fid) 75 | self._gtdb['attr'][:celeba_num, self._clothes_class_idx] = labels 76 | else: 77 | 'Dataset {}: Labels for clothes attributes on CelebA are not loaded, the partition is not "all"'.format(self.name) 78 | else: 79 | print 'Dataset {}: Labels for clothes attributes on CelebA are not available! Missing filename: {}. Did you forget to run load_person.py first?'.\ 80 | format(self.name, fn) 81 | 82 | # load labels for deepfashion images if they are included. 83 | if deepfashion_num > 0: 84 | self._gtdb['attr'][celeba_num:, self._clothes_class_idx] = self._clothes.gtdb['attr'] 85 | # load soft labels for face attributes on deepfashion 86 | fn = osp.join(self.data_path, 'person_'+'clothes'+'_'+split+'.pkl') 87 | if osp.exists(fn): 88 | if partition == 'all': 89 | with open(fn, 'rb') as fid: 90 | labels = cPickle.load(fid) 91 | self._gtdb['attr'][celeba_num:, self._face_class_idx] = labels 92 | else: 93 | 'Dataset {}: Labels for face attributes on Deepfashion are not loaded, the partition is not "all"'.format(self.name) 94 | else: 95 | print 'Dataset {}: Labels for face attributes on Deepfashion are not available! Missing filename: {}. Did you forget to run load_person.py first?'.\ 96 | format(self.name, fn) 97 | 98 | def evaluate(self, scores, ind, cls_idx=None): 99 | """ Evaluation: Report classification error rate. 100 | "scores" is a (N x C) matrix, where N is the number of samples, 101 | C is the number of classes. C=len(cls_idx) if provided. 102 | "ind" is an array that index into result 103 | """ 104 | if cls_idx is None: 105 | cls_idx = np.arange(self.num_classes) 106 | 107 | gt = self.gtdb['attr'][ind, :] 108 | gt = gt[:, cls_idx] 109 | err = compute_mle(scores, gt) 110 | 111 | return err 112 | 113 | def print_info(self, i): 114 | """ Output information about the image and some ground truth. """ 115 | 116 | im_size = self.image_size(i) 117 | print 'The path of the image is: {}'.format(self.image_path_at(i)) 118 | print 'width: {}, height: {}'.format(im_size[0], im_size[1]) 119 | 120 | attr_i = self.gtdb['attr'][i, :] 121 | print 'The attributes are: {}'.format(','.join([self._classes[i] for i in np.where(attr_i==1)[0]])) 122 | -------------------------------------------------------------------------------- /lib/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # --------------------------------- 2 | # Written by Yongxi Lu 3 | # Attribute Classificatoin 4 | # --------------------------------- 5 | 6 | """ Useful for multi-label classification problem 7 | (a.k.a attribute classification) 8 | """ -------------------------------------------------------------------------------- /lib/evaluation/cluster.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | """ Experiment and visualize clustering of class labels """ 4 | 5 | import numpy as np 6 | from utils.blob import im_list_to_blob 7 | from utils.timer import Timer 8 | from utils.config import cfg 9 | from sklearn.cluster import SpectralClustering 10 | 11 | def _error2Aff(label): 12 | """ Given an binary label matrix, convert it to correlation matrix. 13 | label: N by C matrix in {0,1}. 14 | cm: C by C matrix in [-1,1]. 15 | """ 16 | # covert to {-1, 1} format 17 | label = 2.0*label-1.0 18 | cm = pairwise_kernels(label, metric=(lambda x, y: 19 | x.dot(y)/(norm(x,2)*norm(y,2)))) 20 | # N = label.shape[0] 21 | # cm = cm/N 22 | 23 | return (cm+1.0)/2.0 24 | 25 | def MultiLabel_ECM_cluster(net, k, imdb, cls_idx=None, reverse=False): 26 | """ Get Multi-label Label Correlation Matrix (LCM) """ 27 | # class list is an ordered list of class index (onto the given dataset) 28 | if cls_idx is None: 29 | cls_idx = np.arange(imdb.num_classes) 30 | num_classes = len(cls_idx) 31 | num_images = imdb.num_images 32 | 33 | # iterate over images, collect error vectors 34 | err = np.zeros((num_images, num_classes)) # in {0,1} format 35 | timer = Timer() 36 | for i in xrange(num_images): 37 | # prepare blobs 38 | label_name = "prob" 39 | fn = imdb.image_path_at(i) 40 | data = im_list_to_blob([fn], cfg.PIXEL_MEANS, cfg.SCALE) 41 | net.blobs['data'].reshape(*(data.shape)) 42 | # forward the network 43 | timer.tic() 44 | blobs_out = net.forward(data=data.astype(np.float32, copy=False)) 45 | timer.toc() 46 | # get results 47 | scores = blobs_out[label_name] 48 | # evaluate the scores 49 | err[i,:] = imdb.evaluate(scores, np.array([i]), cls_idx) 50 | # print infos 51 | print 'Image {}/{} ::: speed: {:.3f}s /iter'.format(i, num_images, timer.average_time) 52 | 53 | # get error correlation matrix 54 | aff = _error2Aff(err) 55 | if reverse: 56 | aff = 1.0-aff 57 | # perform clustering 58 | return _clusterAffinity(aff, k, imdb, cls_idx) 59 | 60 | def _clusterAffinity(aff, k, imdb, cls_idx): 61 | """ Cluster error correlation matrix using spectral clustering into k cluster, 62 | show the class labels in each cluster. 63 | """ 64 | # clustering model 65 | spectral = SpectralClustering(n_clusters=k, 66 | eigen_solver='arpack', 67 | affinity="precomputed") 68 | print 'Performing clustering...' 69 | labels = spectral.fit_predict(aff) 70 | 71 | # print out all labels 72 | for i in xrange(k): 73 | find_idx = np.where(labels==i)[0] 74 | print 'The list of classes in cluster {}'.format(i) 75 | print [imdb.classes[id] for id in find_idx] 76 | print '--------------------------------------------' 77 | 78 | return labels 79 | 80 | if __name__ == '__main__': 81 | # TODO: debug code if necessary 82 | 83 | pass 84 | -------------------------------------------------------------------------------- /lib/evaluation/test.py: -------------------------------------------------------------------------------- 1 | # --------------------------------- 2 | # Written by Yongxi Lu 3 | # --------------------------------- 4 | 5 | "Test a on a given network and a given dataset" 6 | 7 | import numpy as np 8 | from utils.blob import im_list_to_blob 9 | from utils.timer import Timer 10 | from utils.config import cfg 11 | import cPickle 12 | 13 | import os.path as osp 14 | 15 | def test_cls_topk(net, imdb, cls_idx, k): 16 | """ Test a model on imdb and evaluate the top-k accuracy metric """ 17 | 18 | if cls_idx is None: 19 | cls_idx = np.arange(imdb.num_classes) 20 | else: 21 | cls_idx = np.array(cls_idx) 22 | 23 | num_classes = len(cls_idx) 24 | assert k<=num_classes, 'k={} should be less than or equal to num_classes={}'.\ 25 | format(k, num_classes) 26 | num_images = imdb.num_images 27 | 28 | # iterate over images, collect error vectors 29 | # only test attributes that are properly labelled 30 | val_ind = np.any(imdb.gtdb['attr'][:, cls_idx]>=0, axis=0) 31 | found = np.zeros((num_images, np.sum(val_ind))) # in {0,1} format 32 | timer = Timer() 33 | for i in xrange(num_images): 34 | # prepare blobs 35 | label_name = "prob" 36 | fn = imdb.image_path_at(i) 37 | data = im_list_to_blob([fn], cfg.PIXEL_MEANS, cfg.SCALE) 38 | net.blobs['data'].reshape(*(data.shape)) 39 | # forward the network 40 | timer.tic() 41 | blobs_out = net.forward(data=data.astype(np.float32, copy=False)) 42 | timer.toc() 43 | # get results 44 | scores = blobs_out[label_name] 45 | # find recall of top-k attributes 46 | top_classes = np.argsort(-scores[:, val_ind])[0, :k] 47 | pos_classes = np.where(imdb.gtdb['attr'][i, cls_idx[val_ind]] == 1)[0] 48 | 49 | found_classes = [idx for idx in pos_classes if idx in top_classes] 50 | found[i, found_classes] = 1.0 51 | 52 | # print infos 53 | print 'Image {}/{} ::: speed: {:.3f}s per image.'.format(i, num_images, timer.average_time) 54 | 55 | # print out basic dataset information 56 | print '---------------------------------------------------------------' 57 | print '!!! Summary of results.' 58 | print '!!! Test model on the "{}" dataset'.format(imdb.name) 59 | print '!!! The dataset has {} images.'.format(imdb.num_images) 60 | print '!!! On average, there are {} active attribute classese per image'.format(np.mean(np.sum(imdb.gtdb['attr'][:, cls_idx[val_ind]], axis=1))) 61 | print '!!! The average run time is {} per image.'.format(timer.average_time) 62 | 63 | # get recall for each class 64 | class_names = imdb.classes 65 | num_pos = np.sum(imdb.gtdb['attr'][:, cls_idx[val_ind]], axis=0) 66 | recall = np.nansum(found, axis=0)/num_pos 67 | for i in xrange(np.sum(val_ind)): 68 | print '!!! Top {} recall rate for class {} is: {} ({} instances)'.\ 69 | format(k, class_names[cls_idx[val_ind][i]], recall[i], num_pos[i]) 70 | 71 | print '!!! The top-{} recall rate is {}.'.format(k, np.nanmean(recall)) 72 | print '---------------------------------------------------------------' 73 | 74 | # print top-k accuracy 75 | print '!!! The top-{} accuracy rate is {}.'.format(k, np.sum(np.any(found>0, axis=1), dtype=np.float64)/num_images) 76 | 77 | def test_cls_error(net, imdb, cls_idx): 78 | """ Test a model on imdb and evaluate the error rate. """ 79 | 80 | if cls_idx is None: 81 | cls_idx = np.arange(imdb.num_classes) 82 | else: 83 | cls_idx = np.array(cls_idx) 84 | 85 | num_classes = len(cls_idx) 86 | num_images = imdb.num_images 87 | 88 | # iterate over images, collect error vectors 89 | val_ind = np.any(imdb.gtdb['attr'][:, cls_idx]>=0, axis=0) 90 | err = np.zeros((num_images, np.sum(val_ind))) # in {0,1} format 91 | timer = Timer() 92 | for i in xrange(num_images): 93 | # prepare blobs 94 | label_name = "prob" 95 | fn = imdb.image_path_at(i) 96 | data = im_list_to_blob([fn], cfg.PIXEL_MEANS, cfg.SCALE) 97 | net.blobs['data'].reshape(*(data.shape)) 98 | # forward the network 99 | timer.tic() 100 | blobs_out = net.forward(data=data.astype(np.float32, copy=False)) 101 | timer.toc() 102 | # get results 103 | scores = blobs_out[label_name] 104 | # evaluate the scores 105 | err[i,:] = imdb.evaluate(scores[:, val_ind], np.array([i]), cls_idx[val_ind]) 106 | # print infos 107 | print 'Image {}/{} ::: speed: {:.3f}s per image.'.format(i, num_images, timer.average_time) 108 | 109 | # print out basic dataset information 110 | print '---------------------------------------------------------------' 111 | print '!!! Summary of results.' 112 | print '!!! Test model on the "{}" dataset'.format(imdb.name) 113 | print '!!! The dataset has {} images.'.format(imdb.num_images) 114 | print '!!! The average run time is {} per image.'.format(timer.average_time) 115 | 116 | # get error for each class 117 | class_names = imdb.classes 118 | mean_err = np.nanmean(err, axis=0) 119 | for i in xrange(np.sum(val_ind)): 120 | print '!!! Error rate for class {} is: {}'.\ 121 | format(class_names[cls_idx[val_ind][i]], mean_err[i]) 122 | 123 | print '!!! The average error rate is {}.'.format(mean_err.mean()) 124 | print '---------------------------------------------------------------' 125 | 126 | def save_softlabels(net, image_list, score_file, labeler): 127 | """ Save the labels over a set of images of the given class to a file """ 128 | 129 | num_images = len(image_list) 130 | # iterate over images, collect error vectors 131 | scores = -1.0 * np.ones((num_images, ), dtype=np.float32) 132 | # decode labler 133 | score_name = labeler[1] 134 | score_idx = labeler[2] 135 | timer = Timer() 136 | for i in xrange(num_images): 137 | # prepare blobs 138 | fn = image_list[i] 139 | # print 'Image {}/{}: {}'.format(i, num_images, fn) 140 | data = im_list_to_blob([fn], cfg.PIXEL_MEANS, cfg.SCALE) 141 | net.blobs['data'].reshape(*(data.shape)) 142 | # forward the network 143 | timer.tic() 144 | blobs_out = net.forward(data=data.astype(np.float32, copy=False)) 145 | timer.toc() 146 | # get results 147 | scores[i] = blobs_out[score_name][:, score_idx] 148 | # print infos 149 | print 'Image {}/{} ::: speed: {:.3f}s per image.'.format(i, num_images, timer.average_time) 150 | 151 | # print out basic dataset information 152 | print '---------------------------------------------------------------' 153 | with open(score_file, 'wb') as fid: 154 | cPickle.dump(scores, fid, cPickle.HIGHEST_PROTOCOL) 155 | print '!!! The scores are saved to {}.'.format(score_file) 156 | 157 | def eval_and_save(net, imdb, cls_idx): 158 | """ Evaluate the network, and save the scores to a given destination """ 159 | 160 | if cls_idx is None: 161 | cls_idx = np.arange(imdb.num_classes) 162 | 163 | num_classes = len(cls_idx) 164 | num_images = imdb.num_images 165 | 166 | # iterate over images, collect error vectors 167 | scores = np.zeros((num_images, num_classes)) # in {0,1} format 168 | timer = Timer() 169 | for i in xrange(num_images): 170 | # prepare blobs 171 | label_name = "prob" 172 | fn = imdb.image_path_at(i) 173 | data = im_list_to_blob([fn], cfg.PIXEL_MEANS, cfg.SCALE) 174 | net.blobs['data'].reshape(*(data.shape)) 175 | # forward the network 176 | timer.tic() 177 | blobs_out = net.forward(data=data.astype(np.float32, copy=False)) 178 | timer.toc() 179 | # get results 180 | scores[i, cls_idx] = blobs_out[label_name] 181 | # print infos 182 | print 'Image {}/{} ::: speed: {:.3f}s per image.'.format(i, num_images, timer.average_time) 183 | 184 | # save scores as a pkl file 185 | score_fn = osp.join(imdb.data_path, imdb.name+'.pkl') 186 | with open(score_fn, 'wb') as fid: 187 | cPickle.dump(scores, fid, cPickle.HIGHEST_PROTOCOL) 188 | print '!!! The scores are saved to {}.'.format(score_fn) 189 | 190 | -------------------------------------------------------------------------------- /lib/layers/__init__.py: -------------------------------------------------------------------------------- 1 | #--------------------------------- 2 | # Facial Attribute Classificaiton 3 | # Written by Yongxi Lu 4 | #--------------------------------- 5 | -------------------------------------------------------------------------------- /lib/layers/classification_data.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Written by Yongxi Lu 3 | # -------------------------------------------------------- 4 | 5 | """Base class for python layers that supplies inputs for classifications in training""" 6 | 7 | import caffe 8 | from utils.config import cfg 9 | from utils.blob import im_list_to_blob 10 | import numpy as np 11 | import yaml 12 | 13 | class ClassificationData(caffe.Layer): 14 | """Classification data layer.""" 15 | 16 | def _shuffle_img_inds(self): 17 | """Randomly permute the training images.""" 18 | self._perm = np.random.permutation(np.arange(self._imdb.num_images)) 19 | self._cur = 0 20 | 21 | def _get_next_minibatch_inds(self): 22 | """Return the image indices for the next imnibatch""" 23 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= self._imdb.num_images: 24 | self._shuffle_img_inds() 25 | 26 | db_inds = self._perm[self._cur:self._cur+cfg.TRAIN.IMS_PER_BATCH] 27 | self._cur += cfg.TRAIN.IMS_PER_BATCH 28 | return db_inds 29 | 30 | def _get_next_minibatch(self): 31 | """Get a set of minibatches as a blob""" 32 | db_inds = self._get_next_minibatch_inds() 33 | blobs = self._get_blobs_from_inds(db_inds) 34 | return blobs 35 | 36 | def _get_random_val_batch(self): 37 | """ Get a random batch of samples for validation 38 | Packed into a blob 39 | """ 40 | num_samples = min(self._imdb.num_images, 1) 41 | db_inds = np.random.choice(self._imdb.num_images, size=num_samples, replace=False) 42 | blobs = self._get_blobs_from_inds(db_inds) 43 | return blobs 44 | 45 | def _get_blobs_from_inds(self, inds): 46 | """ Prepare a blob of images given inds """ 47 | filelist = [self._imdb.image_path_at(i) for i in inds] 48 | im_blob = im_list_to_blob(filelist, cfg.PIXEL_MEANS, cfg.SCALE) 49 | 50 | label_gt = self._label_gt_from_inds(inds) 51 | blobs = {'data': im_blob, 'label': label_gt} 52 | return blobs 53 | 54 | def _label_gt_from_inds(self, inds): 55 | """ Get label gt from inds """ 56 | return NotImplementedError 57 | 58 | def set_imdb(self, imdb): 59 | """ Set imdb to be use by this layer """ 60 | self._imdb = imdb 61 | if self._stage == 'TRAIN': 62 | self._shuffle_img_inds() 63 | 64 | def set_classlist(self, class_list): 65 | """ Change the list of classes to test """ 66 | self._class_list = class_list 67 | assert len(self._class_list) == self._num_classes, \ 68 | 'Number of classes does not match class list: {} vs {}'.\ 69 | format(len(self._classlist), self._num_classes) 70 | 71 | def setup(self, bottom, top): 72 | """Setup the AttributeData.""" 73 | 74 | # parse the layer parameter string, which must be valid YAML 75 | layer_params = yaml.load(self.param_str) 76 | 77 | self._num_classes = layer_params['num_classes'] 78 | self._stage = layer_params['stage'] 79 | 80 | # load class list if provided, otherwise set it to default 81 | if ['class_list'] in layer_params.keys(): 82 | self._class_list = np.array(layer_params['class_list']) 83 | assert len(self._class_list) == self._num_classes, \ 84 | 'Number of classes does not match class list: {} vs {}'.\ 85 | format(len(self._classlist), self._num_classes) 86 | else: 87 | self._class_list = np.arange(self._num_classes) 88 | 89 | self._shape_output_maps(top) 90 | 91 | def _shape_output_maps(self, top): 92 | """ reshape the output maps """ 93 | return NotImplementedError 94 | 95 | @property 96 | def num_classes(self): 97 | return self._num_classes 98 | 99 | def forward(self, bottom, top): 100 | """Get blobs and copy them into this layer's top blob vector.""" 101 | 102 | if self._stage == 'TRAIN': 103 | blobs = self._get_next_minibatch() 104 | elif self._stage == 'VAL': 105 | blobs = self._get_random_val_batch() 106 | 107 | for blob_name, blob in blobs.iteritems(): 108 | top_ind = self._name_to_top_map[blob_name] 109 | # Reshape net's input blobs 110 | top[top_ind].reshape(*(blob.shape)) 111 | # Copy data into net's input blobs 112 | top[top_ind].data[...] = blob.astype(np.float32, copy=False) 113 | 114 | def backward(self, top, propagate_down, bottom): 115 | """This layer does not propagate gradients.""" 116 | pass 117 | 118 | def reshape(self, bottom, top): 119 | """Reshaping happens during the call to forward.""" 120 | pass 121 | -------------------------------------------------------------------------------- /lib/layers/multilabel_data.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Written by Yongxi Lu 3 | # -------------------------------------------------------- 4 | 5 | """A Python layer for multi-label classification input """ 6 | 7 | from utils.config import cfg 8 | from .classification_data import ClassificationData 9 | import numpy as np 10 | 11 | class MultiLabelData(ClassificationData): 12 | """Multilabel data layer.""" 13 | 14 | def _shape_output_maps(self, top): 15 | """ reshape the output maps """ 16 | 17 | self._name_to_top_map = { 18 | 'data': 0, 19 | 'label': 1} 20 | 21 | # data blob: holds a batch of N images, each with 3 channels 22 | # label blob: holds a batch of N labels, each with _num_classes labels 23 | if self._stage == 'TRAIN': 24 | top[0].reshape(cfg.TRAIN.IMS_PER_BATCH, 3, cfg.SCALE, cfg.SCALE) 25 | top[1].reshape(cfg.TRAIN.IMS_PER_BATCH, self._num_classes) 26 | elif self._stage == 'VAL': 27 | top[0].reshape(1, 3, cfg.SCALE, cfg.SCALE) 28 | top[1].reshape(1, self._num_classes) 29 | 30 | def _label_gt_from_inds(self, inds): 31 | """ Get label gt from inds """ 32 | attr_all = self._imdb.gtdb['attr'][inds, :].astype(np.float32, copy=False) 33 | attr_gt = attr_all[:, self._class_list] 34 | 35 | return attr_gt -------------------------------------------------------------------------------- /lib/layers/multilabel_err.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Written by Yongxi Lu 3 | # -------------------------------------------------------- 4 | 5 | """A Python layer to comupte multi-label error """ 6 | 7 | import caffe 8 | import numpy as np 9 | import yaml 10 | 11 | # def _eval_soft_error(scores, targets): 12 | # """ Compute multi-label error """ 13 | # num_samples, num_classes = targets.shape 14 | # err = np.empty((num_samples, num_classes), dtype=np.float32) 15 | # err[:] = np.nan 16 | # for c in xrange(num_classes): 17 | # # negative label is reserved for "unknown", evaluation of those entries are skipped. 18 | # valid_ind = np.where(targets[:,c]>=0.0)[0] 19 | # if len(valid_ind>0): 20 | # # err[valid_ind, c] = (scores[valid_ind, [c]]>=0.5) != (targets[valid_ind, [c]]>=0.5) 21 | # # soft errors 22 | # pos_labels = np.where(targets[valid_ind, [c]]>=0.5)[0] 23 | # neg_labels = np.where(targets[valid_ind, [c]]<0.5)[0] 24 | # err[valid_ind[pos_labels], c] = 1.0 - scores[valid_ind[pos_labels], [c]] 25 | # err[valid_ind[neg_labels], c] = scores[valid_ind[neg_labels], [c]] 26 | # return err 27 | 28 | def _eval_soft_error(scores, targets): 29 | """ Compute multi-label error """ 30 | num_samples, num_classes = targets.shape 31 | targets[np.where(targets[:]<0.0)] = np.nan 32 | err = np.empty((num_samples, num_classes), dtype=np.float32) 33 | err[:] = np.nan 34 | 35 | pos_labels = np.where(targets>=0.5) 36 | neg_labels = np.where(targets<0.5) 37 | err[pos_labels] = 1.0 - scores[pos_labels] 38 | err[neg_labels] = scores[neg_labels] 39 | 40 | return err 41 | 42 | class MultiLabelErr(caffe.Layer): 43 | """Multi-label error.""" 44 | 45 | def setup(self, bottom, top): 46 | """Setup the layer.""" 47 | top[0].reshape(1, 1) 48 | 49 | def forward(self, bottom, top): 50 | """Compute multi-label error.""" 51 | 52 | scores = bottom[0].data 53 | targets = bottom[1].data 54 | err = _eval_soft_error(scores, targets) 55 | 56 | top[0].reshape(*(err.shape)) 57 | top[0].data[...] = err.astype(np.float32, copy=False) 58 | 59 | def backward(self, top, propagate_down, bottom): 60 | """This layer does not propagate gradients.""" 61 | pass 62 | 63 | def reshape(self, bottom, top): 64 | """Reshaping happens at setup.""" 65 | pass 66 | -------------------------------------------------------------------------------- /lib/layers/singlelabel_data.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Written by Yongxi Lu 3 | # -------------------------------------------------------- 4 | 5 | """A Python layer for single-label classification input """ 6 | 7 | from utils.config import cfg 8 | from .classification_data import ClassificationData 9 | import numpy as np 10 | 11 | class SingleLabelData(ClassificationData): 12 | """Single label data layer.""" 13 | 14 | def _shape_output_maps(self, top): 15 | """ reshape the output maps """ 16 | self._name_to_top_map = { 17 | 'data': 0, 18 | 'label': 1} 19 | 20 | # data blob: holds a batch of N images, each with 3 channels 21 | # label blob: holds a batch of labels 22 | if self._stage == 'TRAIN': 23 | top[0].reshape(cfg.TRAIN.IMS_PER_BATCH, 3, cfg.SCALE, cfg.SCALE) 24 | top[1].reshape(cfg.TRAIN.IMS_PER_BATCH) 25 | elif self._stage == 'VAL': 26 | top[0].reshape(1, 3, cfg.SCALE, cfg.SCALE) 27 | top[1].reshape(1) 28 | 29 | def _label_gt_from_inds(self, inds): 30 | """ Get label gt from inds """ 31 | label = self._imdb.gtdb['label'][inds] 32 | class_list = np.array(self._class_list) 33 | indexed_label = class_list[label].astype(np.float32, copy=False) 34 | 35 | return indexed_label -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu -------------------------------------------------------------------------------- /lib/models/branch_5-layer/test.prototxt: -------------------------------------------------------------------------------- 1 | name: "branch_5-layer" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | layer { 8 | name: "conv1_1_1" 9 | type: "Convolution" 10 | bottom: "data" 11 | top: "conv1_1_1" 12 | param { 13 | name: "conv1_1_1_w" 14 | lr_mult: 1 15 | decay_mult: 1 16 | } 17 | param { 18 | name: "conv1_1_1_b" 19 | lr_mult: 2 20 | decay_mult: 0 21 | } 22 | convolution_param { 23 | num_output: 96 24 | bias_term: true 25 | pad: 0 26 | kernel_size: 7 27 | stride: 2 28 | weight_filler { 29 | type: "msra" 30 | } 31 | bias_filler { 32 | type: "constant" 33 | value: 0 34 | } 35 | } 36 | } 37 | layer { 38 | name: "relu1_1_1" 39 | type: "ReLU" 40 | bottom: "conv1_1_1" 41 | top: "conv1_1_1" 42 | } 43 | layer { 44 | name: "norm1_1_1" 45 | type: "LRN" 46 | bottom: "conv1_1_1" 47 | top: "conv1_1_1" 48 | lrn_param { 49 | local_size: 5 50 | alpha: 0.0005 51 | beta: 0.75 52 | k: 2 53 | } 54 | } 55 | layer { 56 | name: "pool1_1_1" 57 | type: "Pooling" 58 | bottom: "conv1_1_1" 59 | top: "pool1_1_1" 60 | pooling_param { 61 | pool: MAX 62 | kernel_size: 3 63 | stride: 2 64 | pad: 0 65 | } 66 | } 67 | layer { 68 | name: "conv2_1_1" 69 | type: "Convolution" 70 | bottom: "pool1_1_1" 71 | top: "conv2_1_1" 72 | param { 73 | name: "conv2_1_1_w" 74 | lr_mult: 1 75 | decay_mult: 1 76 | } 77 | param { 78 | name: "conv2_1_1_b" 79 | lr_mult: 2 80 | decay_mult: 0 81 | } 82 | convolution_param { 83 | num_output: 256 84 | bias_term: true 85 | pad: 1 86 | kernel_size: 5 87 | stride: 2 88 | weight_filler { 89 | type: "msra" 90 | } 91 | bias_filler { 92 | type: "constant" 93 | value: 0 94 | } 95 | } 96 | } 97 | layer { 98 | name: "relu2_1_1" 99 | type: "ReLU" 100 | bottom: "conv2_1_1" 101 | top: "conv2_1_1" 102 | } 103 | layer { 104 | name: "norm2_1_1" 105 | type: "LRN" 106 | bottom: "conv2_1_1" 107 | top: "conv2_1_1" 108 | lrn_param { 109 | local_size: 5 110 | alpha: 0.0005 111 | beta: 0.75 112 | k: 2 113 | } 114 | } 115 | layer { 116 | name: "pool2_1_1" 117 | type: "Pooling" 118 | bottom: "conv2_1_1" 119 | top: "pool2_1_1" 120 | pooling_param { 121 | pool: MAX 122 | kernel_size: 3 123 | stride: 2 124 | pad: 0 125 | } 126 | } 127 | layer { 128 | name: "conv3_1_1" 129 | type: "Convolution" 130 | bottom: "pool2_1_1" 131 | top: "conv3_1_1" 132 | param { 133 | name: "conv3_1_1_w" 134 | lr_mult: 1 135 | decay_mult: 1 136 | } 137 | param { 138 | name: "conv3_1_1_b" 139 | lr_mult: 2 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 512 144 | bias_term: true 145 | pad: 1 146 | kernel_size: 3 147 | stride: 1 148 | weight_filler { 149 | type: "msra" 150 | } 151 | bias_filler { 152 | type: "constant" 153 | value: 0 154 | } 155 | } 156 | } 157 | layer { 158 | name: "relu3_1_1" 159 | type: "ReLU" 160 | bottom: "conv3_1_1" 161 | top: "conv3_1_1" 162 | } 163 | layer { 164 | name: "conv4_1_1" 165 | type: "Convolution" 166 | bottom: "conv3_1_1" 167 | top: "conv4_1_1" 168 | param { 169 | name: "conv4_1_1_w" 170 | lr_mult: 1 171 | decay_mult: 1 172 | } 173 | param { 174 | name: "conv4_1_1_b" 175 | lr_mult: 2 176 | decay_mult: 0 177 | } 178 | convolution_param { 179 | num_output: 512 180 | bias_term: true 181 | pad: 1 182 | kernel_size: 3 183 | stride: 1 184 | weight_filler { 185 | type: "msra" 186 | } 187 | bias_filler { 188 | type: "constant" 189 | value: 0 190 | } 191 | } 192 | } 193 | layer { 194 | name: "relu4_1_1" 195 | type: "ReLU" 196 | bottom: "conv4_1_1" 197 | top: "conv4_1_1" 198 | } 199 | layer { 200 | name: "conv5_1_1" 201 | type: "Convolution" 202 | bottom: "conv4_1_1" 203 | top: "conv5_1_1" 204 | param { 205 | name: "conv5_1_1_w" 206 | lr_mult: 1 207 | decay_mult: 1 208 | } 209 | param { 210 | name: "conv5_1_1_b" 211 | lr_mult: 2 212 | decay_mult: 0 213 | } 214 | convolution_param { 215 | num_output: 512 216 | bias_term: true 217 | pad: 1 218 | kernel_size: 3 219 | stride: 1 220 | weight_filler { 221 | type: "msra" 222 | } 223 | bias_filler { 224 | type: "constant" 225 | value: 0 226 | } 227 | } 228 | } 229 | layer { 230 | name: "relu5_1_1" 231 | type: "ReLU" 232 | bottom: "conv5_1_1" 233 | top: "conv5_1_1" 234 | } 235 | layer { 236 | name: "pool5_1_1" 237 | type: "Pooling" 238 | bottom: "conv5_1_1" 239 | top: "pool5_1_1" 240 | pooling_param { 241 | pool: MAX 242 | kernel_size: 3 243 | stride: 2 244 | pad: 0 245 | } 246 | } 247 | layer { 248 | name: "fc6_1_1" 249 | type: "InnerProduct" 250 | bottom: "pool5_1_1" 251 | top: "fc6_1_1" 252 | param { 253 | name: "fc6_1_1_w" 254 | lr_mult: 1 255 | decay_mult: 1 256 | } 257 | param { 258 | name: "fc6_1_1_b" 259 | lr_mult: 2 260 | decay_mult: 0 261 | } 262 | inner_product_param { 263 | num_output: 4096 264 | weight_filler { 265 | type: "msra" 266 | } 267 | bias_filler { 268 | type: "constant" 269 | value: 0 270 | } 271 | } 272 | } 273 | layer { 274 | name: "relu6_1_1" 275 | type: "ReLU" 276 | bottom: "fc6_1_1" 277 | top: "fc6_1_1" 278 | } 279 | layer { 280 | name: "drop6_1_1" 281 | type: "Dropout" 282 | bottom: "fc6_1_1" 283 | top: "fc6_1_1" 284 | dropout_param { 285 | dropout_ratio: 0.5 286 | } 287 | } 288 | layer { 289 | name: "fc7_1_1" 290 | type: "InnerProduct" 291 | bottom: "fc6_1_1" 292 | top: "fc7_1_1" 293 | param { 294 | name: "fc7_1_1_w" 295 | lr_mult: 1 296 | decay_mult: 1 297 | } 298 | param { 299 | name: "fc7_1_1_b" 300 | lr_mult: 2 301 | decay_mult: 0 302 | } 303 | inner_product_param { 304 | num_output: 4096 305 | weight_filler { 306 | type: "msra" 307 | } 308 | bias_filler { 309 | type: "constant" 310 | value: 0 311 | } 312 | } 313 | } 314 | layer { 315 | name: "relu7_1_1" 316 | type: "ReLU" 317 | bottom: "fc7_1_1" 318 | top: "fc7_1_1" 319 | } 320 | layer { 321 | name: "drop7_1_1" 322 | type: "Dropout" 323 | bottom: "fc7_1_1" 324 | top: "fc7_1_1" 325 | dropout_param { 326 | dropout_ratio: 0.5 327 | } 328 | } 329 | layer { 330 | name: "fc7_1_2" 331 | type: "InnerProduct" 332 | bottom: "fc6_1_1" 333 | top: "fc7_1_2" 334 | param { 335 | name: "fc7_1_2_w" 336 | lr_mult: 1 337 | decay_mult: 1 338 | } 339 | param { 340 | name: "fc7_1_2_b" 341 | lr_mult: 2 342 | decay_mult: 0 343 | } 344 | inner_product_param { 345 | num_output: 4096 346 | weight_filler { 347 | type: "msra" 348 | } 349 | bias_filler { 350 | type: "constant" 351 | value: 0 352 | } 353 | } 354 | } 355 | layer { 356 | name: "relu7_1_2" 357 | type: "ReLU" 358 | bottom: "fc7_1_2" 359 | top: "fc7_1_2" 360 | } 361 | layer { 362 | name: "drop7_1_2" 363 | type: "Dropout" 364 | bottom: "fc7_1_2" 365 | top: "fc7_1_2" 366 | dropout_param { 367 | dropout_ratio: 0.5 368 | } 369 | } 370 | layer { 371 | name: "score_fc1_1" 372 | type: "InnerProduct" 373 | bottom: "fc7_1_1" 374 | top: "score_fc1_1" 375 | param { 376 | name: "score_fc1_1_w" 377 | lr_mult: 1 378 | decay_mult: 1 379 | } 380 | param { 381 | name: "score_fc1_1_b" 382 | lr_mult: 2 383 | decay_mult: 0 384 | } 385 | inner_product_param { 386 | num_output: 1 387 | weight_filler { 388 | type: "msra" 389 | } 390 | bias_filler { 391 | type: "constant" 392 | value: 0 393 | } 394 | } 395 | } 396 | layer { 397 | name: "score_fc2_1" 398 | type: "InnerProduct" 399 | bottom: "fc7_1_2" 400 | top: "score_fc2_1" 401 | param { 402 | name: "score_fc2_1_w" 403 | lr_mult: 1 404 | decay_mult: 1 405 | } 406 | param { 407 | name: "score_fc2_1_b" 408 | lr_mult: 2 409 | decay_mult: 0 410 | } 411 | inner_product_param { 412 | num_output: 1 413 | weight_filler { 414 | type: "msra" 415 | } 416 | bias_filler { 417 | type: "constant" 418 | value: 0 419 | } 420 | } 421 | } 422 | layer { 423 | name: "score_fc2_2" 424 | type: "InnerProduct" 425 | bottom: "fc7_1_2" 426 | top: "score_fc2_2" 427 | param { 428 | name: "score_fc2_2_w" 429 | lr_mult: 1 430 | decay_mult: 1 431 | } 432 | param { 433 | name: "score_fc2_2_b" 434 | lr_mult: 2 435 | decay_mult: 0 436 | } 437 | inner_product_param { 438 | num_output: 1 439 | weight_filler { 440 | type: "msra" 441 | } 442 | bias_filler { 443 | type: "constant" 444 | value: 0 445 | } 446 | } 447 | } 448 | layer { 449 | name: "score_fc2_3" 450 | type: "InnerProduct" 451 | bottom: "fc7_1_2" 452 | top: "score_fc2_3" 453 | param { 454 | name: "score_fc2_3_w" 455 | lr_mult: 1 456 | decay_mult: 1 457 | } 458 | param { 459 | name: "score_fc2_3_b" 460 | lr_mult: 2 461 | decay_mult: 0 462 | } 463 | inner_product_param { 464 | num_output: 1 465 | weight_filler { 466 | type: "msra" 467 | } 468 | bias_filler { 469 | type: "constant" 470 | value: 0 471 | } 472 | } 473 | } 474 | layer { 475 | name: "score" 476 | type: "Concat" 477 | bottom: "score_fc1_1" 478 | bottom: "score_fc2_1" 479 | bottom: "score_fc2_2" 480 | bottom: "score_fc2_3" 481 | top: "score" 482 | concat_param { 483 | axis: 1 484 | } 485 | } 486 | layer { 487 | name: "prob" 488 | type: "Sigmoid" 489 | bottom: "score" 490 | top: "prob" 491 | } 492 | -------------------------------------------------------------------------------- /lib/models/default_5-layer/test.prototxt: -------------------------------------------------------------------------------- 1 | name: "default_5-layer" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | layer { 8 | name: "conv1_1_1" 9 | type: "Convolution" 10 | bottom: "data" 11 | top: "conv1_1_1" 12 | param { 13 | name: "conv1_1_1_w" 14 | lr_mult: 1 15 | decay_mult: 1 16 | } 17 | param { 18 | name: "conv1_1_1_b" 19 | lr_mult: 2 20 | decay_mult: 0 21 | } 22 | convolution_param { 23 | num_output: 96 24 | bias_term: true 25 | pad: 0 26 | kernel_size: 7 27 | stride: 2 28 | weight_filler { 29 | type: "msra" 30 | } 31 | bias_filler { 32 | type: "constant" 33 | value: 0 34 | } 35 | } 36 | } 37 | layer { 38 | name: "relu1_1_1" 39 | type: "ReLU" 40 | bottom: "conv1_1_1" 41 | top: "conv1_1_1" 42 | } 43 | layer { 44 | name: "norm1_1_1" 45 | type: "LRN" 46 | bottom: "conv1_1_1" 47 | top: "conv1_1_1" 48 | lrn_param { 49 | local_size: 5 50 | alpha: 0.0005 51 | beta: 0.75 52 | k: 2 53 | } 54 | } 55 | layer { 56 | name: "pool1_1_1" 57 | type: "Pooling" 58 | bottom: "conv1_1_1" 59 | top: "pool1_1_1" 60 | pooling_param { 61 | pool: MAX 62 | kernel_size: 3 63 | stride: 2 64 | pad: 0 65 | } 66 | } 67 | layer { 68 | name: "conv2_1_1" 69 | type: "Convolution" 70 | bottom: "pool1_1_1" 71 | top: "conv2_1_1" 72 | param { 73 | name: "conv2_1_1_w" 74 | lr_mult: 1 75 | decay_mult: 1 76 | } 77 | param { 78 | name: "conv2_1_1_b" 79 | lr_mult: 2 80 | decay_mult: 0 81 | } 82 | convolution_param { 83 | num_output: 256 84 | bias_term: true 85 | pad: 1 86 | kernel_size: 5 87 | stride: 2 88 | weight_filler { 89 | type: "msra" 90 | } 91 | bias_filler { 92 | type: "constant" 93 | value: 0 94 | } 95 | } 96 | } 97 | layer { 98 | name: "relu2_1_1" 99 | type: "ReLU" 100 | bottom: "conv2_1_1" 101 | top: "conv2_1_1" 102 | } 103 | layer { 104 | name: "norm2_1_1" 105 | type: "LRN" 106 | bottom: "conv2_1_1" 107 | top: "conv2_1_1" 108 | lrn_param { 109 | local_size: 5 110 | alpha: 0.0005 111 | beta: 0.75 112 | k: 2 113 | } 114 | } 115 | layer { 116 | name: "pool2_1_1" 117 | type: "Pooling" 118 | bottom: "conv2_1_1" 119 | top: "pool2_1_1" 120 | pooling_param { 121 | pool: MAX 122 | kernel_size: 3 123 | stride: 2 124 | pad: 0 125 | } 126 | } 127 | layer { 128 | name: "conv3_1_1" 129 | type: "Convolution" 130 | bottom: "pool2_1_1" 131 | top: "conv3_1_1" 132 | param { 133 | name: "conv3_1_1_w" 134 | lr_mult: 1 135 | decay_mult: 1 136 | } 137 | param { 138 | name: "conv3_1_1_b" 139 | lr_mult: 2 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 512 144 | bias_term: true 145 | pad: 1 146 | kernel_size: 3 147 | stride: 1 148 | weight_filler { 149 | type: "msra" 150 | } 151 | bias_filler { 152 | type: "constant" 153 | value: 0 154 | } 155 | } 156 | } 157 | layer { 158 | name: "relu3_1_1" 159 | type: "ReLU" 160 | bottom: "conv3_1_1" 161 | top: "conv3_1_1" 162 | } 163 | layer { 164 | name: "conv4_1_1" 165 | type: "Convolution" 166 | bottom: "conv3_1_1" 167 | top: "conv4_1_1" 168 | param { 169 | name: "conv4_1_1_w" 170 | lr_mult: 1 171 | decay_mult: 1 172 | } 173 | param { 174 | name: "conv4_1_1_b" 175 | lr_mult: 2 176 | decay_mult: 0 177 | } 178 | convolution_param { 179 | num_output: 512 180 | bias_term: true 181 | pad: 1 182 | kernel_size: 3 183 | stride: 1 184 | weight_filler { 185 | type: "msra" 186 | } 187 | bias_filler { 188 | type: "constant" 189 | value: 0 190 | } 191 | } 192 | } 193 | layer { 194 | name: "relu4_1_1" 195 | type: "ReLU" 196 | bottom: "conv4_1_1" 197 | top: "conv4_1_1" 198 | } 199 | layer { 200 | name: "conv5_1_1" 201 | type: "Convolution" 202 | bottom: "conv4_1_1" 203 | top: "conv5_1_1" 204 | param { 205 | name: "conv5_1_1_w" 206 | lr_mult: 1 207 | decay_mult: 1 208 | } 209 | param { 210 | name: "conv5_1_1_b" 211 | lr_mult: 2 212 | decay_mult: 0 213 | } 214 | convolution_param { 215 | num_output: 512 216 | bias_term: true 217 | pad: 1 218 | kernel_size: 3 219 | stride: 1 220 | weight_filler { 221 | type: "msra" 222 | } 223 | bias_filler { 224 | type: "constant" 225 | value: 0 226 | } 227 | } 228 | } 229 | layer { 230 | name: "relu5_1_1" 231 | type: "ReLU" 232 | bottom: "conv5_1_1" 233 | top: "conv5_1_1" 234 | } 235 | layer { 236 | name: "pool5_1_1" 237 | type: "Pooling" 238 | bottom: "conv5_1_1" 239 | top: "pool5_1_1" 240 | pooling_param { 241 | pool: MAX 242 | kernel_size: 3 243 | stride: 2 244 | pad: 0 245 | } 246 | } 247 | layer { 248 | name: "fc6_1_1" 249 | type: "InnerProduct" 250 | bottom: "pool5_1_1" 251 | top: "fc6_1_1" 252 | param { 253 | name: "fc6_1_1_w" 254 | lr_mult: 1 255 | decay_mult: 1 256 | } 257 | param { 258 | name: "fc6_1_1_b" 259 | lr_mult: 2 260 | decay_mult: 0 261 | } 262 | inner_product_param { 263 | num_output: 4096 264 | weight_filler { 265 | type: "msra" 266 | } 267 | bias_filler { 268 | type: "constant" 269 | value: 0 270 | } 271 | } 272 | } 273 | layer { 274 | name: "relu6_1_1" 275 | type: "ReLU" 276 | bottom: "fc6_1_1" 277 | top: "fc6_1_1" 278 | } 279 | layer { 280 | name: "drop6_1_1" 281 | type: "Dropout" 282 | bottom: "fc6_1_1" 283 | top: "fc6_1_1" 284 | dropout_param { 285 | dropout_ratio: 0.5 286 | } 287 | } 288 | layer { 289 | name: "fc7_1_1" 290 | type: "InnerProduct" 291 | bottom: "fc6_1_1" 292 | top: "fc7_1_1" 293 | param { 294 | name: "fc7_1_1_w" 295 | lr_mult: 1 296 | decay_mult: 1 297 | } 298 | param { 299 | name: "fc7_1_1_b" 300 | lr_mult: 2 301 | decay_mult: 0 302 | } 303 | inner_product_param { 304 | num_output: 4096 305 | weight_filler { 306 | type: "msra" 307 | } 308 | bias_filler { 309 | type: "constant" 310 | value: 0 311 | } 312 | } 313 | } 314 | layer { 315 | name: "relu7_1_1" 316 | type: "ReLU" 317 | bottom: "fc7_1_1" 318 | top: "fc7_1_1" 319 | } 320 | layer { 321 | name: "drop7_1_1" 322 | type: "Dropout" 323 | bottom: "fc7_1_1" 324 | top: "fc7_1_1" 325 | dropout_param { 326 | dropout_ratio: 0.5 327 | } 328 | } 329 | layer { 330 | name: "score_fc1_1" 331 | type: "InnerProduct" 332 | bottom: "fc7_1_1" 333 | top: "score_fc1_1" 334 | param { 335 | name: "score_fc1_1_w" 336 | lr_mult: 1 337 | decay_mult: 1 338 | } 339 | param { 340 | name: "score_fc1_1_b" 341 | lr_mult: 2 342 | decay_mult: 0 343 | } 344 | inner_product_param { 345 | num_output: 1 346 | weight_filler { 347 | type: "msra" 348 | } 349 | bias_filler { 350 | type: "constant" 351 | value: 0 352 | } 353 | } 354 | } 355 | layer { 356 | name: "score_fc1_2" 357 | type: "InnerProduct" 358 | bottom: "fc7_1_1" 359 | top: "score_fc1_2" 360 | param { 361 | name: "score_fc1_2_w" 362 | lr_mult: 1 363 | decay_mult: 1 364 | } 365 | param { 366 | name: "score_fc1_2_b" 367 | lr_mult: 2 368 | decay_mult: 0 369 | } 370 | inner_product_param { 371 | num_output: 1 372 | weight_filler { 373 | type: "msra" 374 | } 375 | bias_filler { 376 | type: "constant" 377 | value: 0 378 | } 379 | } 380 | } 381 | layer { 382 | name: "score_fc1_3" 383 | type: "InnerProduct" 384 | bottom: "fc7_1_1" 385 | top: "score_fc1_3" 386 | param { 387 | name: "score_fc1_3_w" 388 | lr_mult: 1 389 | decay_mult: 1 390 | } 391 | param { 392 | name: "score_fc1_3_b" 393 | lr_mult: 2 394 | decay_mult: 0 395 | } 396 | inner_product_param { 397 | num_output: 1 398 | weight_filler { 399 | type: "msra" 400 | } 401 | bias_filler { 402 | type: "constant" 403 | value: 0 404 | } 405 | } 406 | } 407 | layer { 408 | name: "score_fc1_4" 409 | type: "InnerProduct" 410 | bottom: "fc7_1_1" 411 | top: "score_fc1_4" 412 | param { 413 | name: "score_fc1_4_w" 414 | lr_mult: 1 415 | decay_mult: 1 416 | } 417 | param { 418 | name: "score_fc1_4_b" 419 | lr_mult: 2 420 | decay_mult: 0 421 | } 422 | inner_product_param { 423 | num_output: 1 424 | weight_filler { 425 | type: "msra" 426 | } 427 | bias_filler { 428 | type: "constant" 429 | value: 0 430 | } 431 | } 432 | } 433 | layer { 434 | name: "score" 435 | type: "Concat" 436 | bottom: "score_fc1_1" 437 | bottom: "score_fc1_2" 438 | bottom: "score_fc1_3" 439 | bottom: "score_fc1_4" 440 | top: "score" 441 | concat_param { 442 | axis: 1 443 | } 444 | } 445 | layer { 446 | name: "prob" 447 | type: "Sigmoid" 448 | bottom: "score" 449 | top: "prob" 450 | } 451 | -------------------------------------------------------------------------------- /lib/models/default_5-layer/train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "default_5-layer" 2 | layer { 3 | name: "data" 4 | type: "Python" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TEST 9 | } 10 | python_param { 11 | module: "layers.multilabel_data" 12 | layer: "MultiLabelData" 13 | param_str: "class_list: [0, 1, 2, 3]\nnum_classes: 4\nstage: VAL\n" 14 | } 15 | } 16 | layer { 17 | name: "data" 18 | type: "Python" 19 | top: "data" 20 | top: "label" 21 | include { 22 | phase: TRAIN 23 | } 24 | python_param { 25 | module: "layers.multilabel_data" 26 | layer: "MultiLabelData" 27 | param_str: "class_list: [0, 1, 2, 3]\nnum_classes: 4\nstage: TRAIN\n" 28 | } 29 | } 30 | layer { 31 | name: "conv1_1_1" 32 | type: "Convolution" 33 | bottom: "data" 34 | top: "conv1_1_1" 35 | param { 36 | name: "conv1_1_1_w" 37 | lr_mult: 1 38 | decay_mult: 1 39 | } 40 | param { 41 | name: "conv1_1_1_b" 42 | lr_mult: 2 43 | decay_mult: 0 44 | } 45 | convolution_param { 46 | num_output: 96 47 | bias_term: true 48 | pad: 0 49 | kernel_size: 7 50 | stride: 2 51 | weight_filler { 52 | type: "msra" 53 | } 54 | bias_filler { 55 | type: "constant" 56 | value: 0 57 | } 58 | } 59 | } 60 | layer { 61 | name: "relu1_1_1" 62 | type: "ReLU" 63 | bottom: "conv1_1_1" 64 | top: "conv1_1_1" 65 | } 66 | layer { 67 | name: "norm1_1_1" 68 | type: "LRN" 69 | bottom: "conv1_1_1" 70 | top: "conv1_1_1" 71 | lrn_param { 72 | local_size: 5 73 | alpha: 0.0005 74 | beta: 0.75 75 | k: 2 76 | } 77 | } 78 | layer { 79 | name: "pool1_1_1" 80 | type: "Pooling" 81 | bottom: "conv1_1_1" 82 | top: "pool1_1_1" 83 | pooling_param { 84 | pool: MAX 85 | kernel_size: 3 86 | stride: 2 87 | pad: 0 88 | } 89 | } 90 | layer { 91 | name: "conv2_1_1" 92 | type: "Convolution" 93 | bottom: "pool1_1_1" 94 | top: "conv2_1_1" 95 | param { 96 | name: "conv2_1_1_w" 97 | lr_mult: 1 98 | decay_mult: 1 99 | } 100 | param { 101 | name: "conv2_1_1_b" 102 | lr_mult: 2 103 | decay_mult: 0 104 | } 105 | convolution_param { 106 | num_output: 256 107 | bias_term: true 108 | pad: 1 109 | kernel_size: 5 110 | stride: 2 111 | weight_filler { 112 | type: "msra" 113 | } 114 | bias_filler { 115 | type: "constant" 116 | value: 0 117 | } 118 | } 119 | } 120 | layer { 121 | name: "relu2_1_1" 122 | type: "ReLU" 123 | bottom: "conv2_1_1" 124 | top: "conv2_1_1" 125 | } 126 | layer { 127 | name: "norm2_1_1" 128 | type: "LRN" 129 | bottom: "conv2_1_1" 130 | top: "conv2_1_1" 131 | lrn_param { 132 | local_size: 5 133 | alpha: 0.0005 134 | beta: 0.75 135 | k: 2 136 | } 137 | } 138 | layer { 139 | name: "pool2_1_1" 140 | type: "Pooling" 141 | bottom: "conv2_1_1" 142 | top: "pool2_1_1" 143 | pooling_param { 144 | pool: MAX 145 | kernel_size: 3 146 | stride: 2 147 | pad: 0 148 | } 149 | } 150 | layer { 151 | name: "conv3_1_1" 152 | type: "Convolution" 153 | bottom: "pool2_1_1" 154 | top: "conv3_1_1" 155 | param { 156 | name: "conv3_1_1_w" 157 | lr_mult: 1 158 | decay_mult: 1 159 | } 160 | param { 161 | name: "conv3_1_1_b" 162 | lr_mult: 2 163 | decay_mult: 0 164 | } 165 | convolution_param { 166 | num_output: 512 167 | bias_term: true 168 | pad: 1 169 | kernel_size: 3 170 | stride: 1 171 | weight_filler { 172 | type: "msra" 173 | } 174 | bias_filler { 175 | type: "constant" 176 | value: 0 177 | } 178 | } 179 | } 180 | layer { 181 | name: "relu3_1_1" 182 | type: "ReLU" 183 | bottom: "conv3_1_1" 184 | top: "conv3_1_1" 185 | } 186 | layer { 187 | name: "conv4_1_1" 188 | type: "Convolution" 189 | bottom: "conv3_1_1" 190 | top: "conv4_1_1" 191 | param { 192 | name: "conv4_1_1_w" 193 | lr_mult: 1 194 | decay_mult: 1 195 | } 196 | param { 197 | name: "conv4_1_1_b" 198 | lr_mult: 2 199 | decay_mult: 0 200 | } 201 | convolution_param { 202 | num_output: 512 203 | bias_term: true 204 | pad: 1 205 | kernel_size: 3 206 | stride: 1 207 | weight_filler { 208 | type: "msra" 209 | } 210 | bias_filler { 211 | type: "constant" 212 | value: 0 213 | } 214 | } 215 | } 216 | layer { 217 | name: "relu4_1_1" 218 | type: "ReLU" 219 | bottom: "conv4_1_1" 220 | top: "conv4_1_1" 221 | } 222 | layer { 223 | name: "conv5_1_1" 224 | type: "Convolution" 225 | bottom: "conv4_1_1" 226 | top: "conv5_1_1" 227 | param { 228 | name: "conv5_1_1_w" 229 | lr_mult: 1 230 | decay_mult: 1 231 | } 232 | param { 233 | name: "conv5_1_1_b" 234 | lr_mult: 2 235 | decay_mult: 0 236 | } 237 | convolution_param { 238 | num_output: 512 239 | bias_term: true 240 | pad: 1 241 | kernel_size: 3 242 | stride: 1 243 | weight_filler { 244 | type: "msra" 245 | } 246 | bias_filler { 247 | type: "constant" 248 | value: 0 249 | } 250 | } 251 | } 252 | layer { 253 | name: "relu5_1_1" 254 | type: "ReLU" 255 | bottom: "conv5_1_1" 256 | top: "conv5_1_1" 257 | } 258 | layer { 259 | name: "pool5_1_1" 260 | type: "Pooling" 261 | bottom: "conv5_1_1" 262 | top: "pool5_1_1" 263 | pooling_param { 264 | pool: MAX 265 | kernel_size: 3 266 | stride: 2 267 | pad: 0 268 | } 269 | } 270 | layer { 271 | name: "fc6_1_1" 272 | type: "InnerProduct" 273 | bottom: "pool5_1_1" 274 | top: "fc6_1_1" 275 | param { 276 | name: "fc6_1_1_w" 277 | lr_mult: 1 278 | decay_mult: 1 279 | } 280 | param { 281 | name: "fc6_1_1_b" 282 | lr_mult: 2 283 | decay_mult: 0 284 | } 285 | inner_product_param { 286 | num_output: 4096 287 | weight_filler { 288 | type: "msra" 289 | } 290 | bias_filler { 291 | type: "constant" 292 | value: 0 293 | } 294 | } 295 | } 296 | layer { 297 | name: "relu6_1_1" 298 | type: "ReLU" 299 | bottom: "fc6_1_1" 300 | top: "fc6_1_1" 301 | } 302 | layer { 303 | name: "drop6_1_1" 304 | type: "Dropout" 305 | bottom: "fc6_1_1" 306 | top: "fc6_1_1" 307 | dropout_param { 308 | dropout_ratio: 0.5 309 | } 310 | } 311 | layer { 312 | name: "fc7_1_1" 313 | type: "InnerProduct" 314 | bottom: "fc6_1_1" 315 | top: "fc7_1_1" 316 | param { 317 | name: "fc7_1_1_w" 318 | lr_mult: 1 319 | decay_mult: 1 320 | } 321 | param { 322 | name: "fc7_1_1_b" 323 | lr_mult: 2 324 | decay_mult: 0 325 | } 326 | inner_product_param { 327 | num_output: 4096 328 | weight_filler { 329 | type: "msra" 330 | } 331 | bias_filler { 332 | type: "constant" 333 | value: 0 334 | } 335 | } 336 | } 337 | layer { 338 | name: "relu7_1_1" 339 | type: "ReLU" 340 | bottom: "fc7_1_1" 341 | top: "fc7_1_1" 342 | } 343 | layer { 344 | name: "drop7_1_1" 345 | type: "Dropout" 346 | bottom: "fc7_1_1" 347 | top: "fc7_1_1" 348 | dropout_param { 349 | dropout_ratio: 0.5 350 | } 351 | } 352 | layer { 353 | name: "score_fc1_1" 354 | type: "InnerProduct" 355 | bottom: "fc7_1_1" 356 | top: "score_fc1_1" 357 | param { 358 | name: "score_fc1_1_w" 359 | lr_mult: 1 360 | decay_mult: 1 361 | } 362 | param { 363 | name: "score_fc1_1_b" 364 | lr_mult: 2 365 | decay_mult: 0 366 | } 367 | inner_product_param { 368 | num_output: 1 369 | weight_filler { 370 | type: "msra" 371 | } 372 | bias_filler { 373 | type: "constant" 374 | value: 0 375 | } 376 | } 377 | } 378 | layer { 379 | name: "score_fc1_2" 380 | type: "InnerProduct" 381 | bottom: "fc7_1_1" 382 | top: "score_fc1_2" 383 | param { 384 | name: "score_fc1_2_w" 385 | lr_mult: 1 386 | decay_mult: 1 387 | } 388 | param { 389 | name: "score_fc1_2_b" 390 | lr_mult: 2 391 | decay_mult: 0 392 | } 393 | inner_product_param { 394 | num_output: 1 395 | weight_filler { 396 | type: "msra" 397 | } 398 | bias_filler { 399 | type: "constant" 400 | value: 0 401 | } 402 | } 403 | } 404 | layer { 405 | name: "score_fc1_3" 406 | type: "InnerProduct" 407 | bottom: "fc7_1_1" 408 | top: "score_fc1_3" 409 | param { 410 | name: "score_fc1_3_w" 411 | lr_mult: 1 412 | decay_mult: 1 413 | } 414 | param { 415 | name: "score_fc1_3_b" 416 | lr_mult: 2 417 | decay_mult: 0 418 | } 419 | inner_product_param { 420 | num_output: 1 421 | weight_filler { 422 | type: "msra" 423 | } 424 | bias_filler { 425 | type: "constant" 426 | value: 0 427 | } 428 | } 429 | } 430 | layer { 431 | name: "score_fc1_4" 432 | type: "InnerProduct" 433 | bottom: "fc7_1_1" 434 | top: "score_fc1_4" 435 | param { 436 | name: "score_fc1_4_w" 437 | lr_mult: 1 438 | decay_mult: 1 439 | } 440 | param { 441 | name: "score_fc1_4_b" 442 | lr_mult: 2 443 | decay_mult: 0 444 | } 445 | inner_product_param { 446 | num_output: 1 447 | weight_filler { 448 | type: "msra" 449 | } 450 | bias_filler { 451 | type: "constant" 452 | value: 0 453 | } 454 | } 455 | } 456 | layer { 457 | name: "score" 458 | type: "Concat" 459 | bottom: "score_fc1_1" 460 | bottom: "score_fc1_2" 461 | bottom: "score_fc1_3" 462 | bottom: "score_fc1_4" 463 | top: "score" 464 | concat_param { 465 | axis: 1 466 | } 467 | } 468 | layer { 469 | name: "prob" 470 | type: "Sigmoid" 471 | bottom: "score" 472 | top: "prob" 473 | } 474 | layer { 475 | name: "loss" 476 | type: "SigmoidCrossEntropyLoss" 477 | bottom: "score" 478 | bottom: "label" 479 | top: "loss" 480 | loss_weight: 1.0 481 | include { 482 | phase: TRAIN 483 | } 484 | } 485 | layer { 486 | name: "error" 487 | type: "Python" 488 | bottom: "prob" 489 | bottom: "label" 490 | top: "error" 491 | python_param { 492 | module: "layers.multilabel_err" 493 | layer: "MultiLabelErr" 494 | } 495 | } 496 | -------------------------------------------------------------------------------- /lib/models/layer_helpers.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | """ Helper functions to add layers to the network. """ 4 | import caffe 5 | from caffe import layers as L 6 | from caffe import params as P 7 | from math import sqrt 8 | import yaml 9 | 10 | def get_init_params(std): 11 | if std == 'linear': 12 | weight_filler = {'type': 'xavier'} 13 | elif std == 'ReLu': 14 | weight_filler = {'type': 'msra'} 15 | else: 16 | weight_filler = {'type': 'gaussian', 'std': std} 17 | 18 | bias_filler = {'type': 'constant', 'value': 0} 19 | return weight_filler, bias_filler 20 | 21 | def add_conv(net, bottom, name, param_name, k, ks, pad, nout, lr_factor=1, std=0.01): 22 | """Add a convolutional layer """ 23 | # names of the parameters 24 | param = [{'name': param_name['weights'], 'lr_mult': lr_factor, 'decay_mult': 1}, 25 | {'name': param_name['bias'], 'lr_mult': 2*lr_factor, 'decay_mult': 0}] 26 | # weight filler 27 | weight_filler, bias_filler = get_init_params(std) 28 | # set up the layer 29 | net[name] = L.Convolution(bottom, param=param, convolution_param=dict(kernel_size=k, 30 | stride=ks, pad=pad, num_output=nout, bias_term=True, weight_filler=weight_filler, 31 | bias_filler=bias_filler)) 32 | 33 | def add_fc(net, bottom, name, param_name, nout, lr_factor=1, std=0.01): 34 | """Add a fully-connected layer """ 35 | param = [{'name': param_name['weights'], 'lr_mult': lr_factor, 'decay_mult': 1}, 36 | {'name': param_name['bias'], 'lr_mult': 2*lr_factor, 'decay_mult': 0}] 37 | # weight filler 38 | weight_filler, bias_filler = get_init_params(std) 39 | # set up the layer 40 | net[name] = L.InnerProduct(bottom, param=param, 41 | inner_product_param=dict(num_output=nout, weight_filler=weight_filler, 42 | bias_filler=bias_filler)) 43 | 44 | def add_relu(net, bottom, name, in_place=True): 45 | """Add ReLu activation """ 46 | net[name] = L.ReLU(bottom, in_place=in_place) 47 | 48 | def add_bn(net, bottom, name, in_place=True, ma_fraction=0.95): 49 | """ Add batch normalization layer """ 50 | batch_norm_param = dict(moving_average_fraction=ma_fraction) 51 | net[name] = L.BatchNorm(bottom, in_place=in_place, 52 | batch_norm_param=batch_norm_param) 53 | 54 | def add_scale(net, bottom, name, in_place=True, lr_factor=1): 55 | """ Add a scale layer """ 56 | param = [{'lr_mult': lr_factor, 'decay_mult': 1}, 57 | {'lr_mult': 2*lr_factor, 'decay_mult': 0}] 58 | net[name] = L.Scale(bottom, in_place=in_place, param=param, 59 | scale_param=dict(bias_term=True)) 60 | 61 | def add_maxpool(net, bottom, name, k, ks, pad): 62 | """Add max pooling layer """ 63 | net[name] = L.Pooling(bottom, kernel_size=k, stride=ks, pad=pad, 64 | pool=P.Pooling.MAX) 65 | 66 | def add_lrn(net, bottom, name, local_size, alpha, beta, k): 67 | """Add local response normalizaiton unit """ 68 | net[name] = L.LRN(bottom, local_size=local_size, 69 | alpha=alpha, beta=beta, k=k, in_place=True) 70 | 71 | def add_dropout(net, bottom, name, dropout_ratio=0.5, in_place=True): 72 | """ Add dropout layer """ 73 | net[name] = L.Dropout(bottom, dropout_ratio=dropout_ratio, in_place=in_place) 74 | 75 | def add_concat(net, bottom, name, axis): 76 | """ Add a concatenation layer along an axis """ 77 | net[name] = L.Concat(*bottom, axis=axis) 78 | 79 | def add_sigmoid(net, bottom, name, in_place=True): 80 | """Add Sigmoid activation """ 81 | net[name] = L.Sigmoid(bottom, in_place=in_place) 82 | 83 | def add_softmax(net, bottom, name, in_place=True): 84 | """Add Sigmoid activation """ 85 | net[name] = L.Softmax(bottom, in_place=in_place) 86 | 87 | def add_dummy_layer(net, name): 88 | """Add a dummy data layer """ 89 | net[name] = L.Layer() 90 | 91 | def add_multilabel_data_layer(net, name, phase, num_classes, class_list=None): 92 | """ Add a MultiLabelData layer """ 93 | include_dict = {'phase': phase} 94 | param = {'num_classes': num_classes} 95 | if phase == caffe.TRAIN: 96 | param['stage'] = 'TRAIN' 97 | elif phase == caffe.TEST: 98 | param['stage'] = 'VAL' 99 | if class_list is not None: 100 | assert len(class_list) == num_classes, \ 101 | 'Length of class list does not match number of classes {} vs {}'.\ 102 | format(len(class_list), num_classes) 103 | param['class_list'] = class_list 104 | 105 | param_str = yaml.dump(param) 106 | net[name[0]], net[name[1]] = L.Python(name=name[0], python_param=dict(module='layers.multilabel_data', 107 | layer='MultiLabelData', param_str=param_str), include=include_dict, ntop=2) 108 | 109 | def add_singlelabel_data_layer(net, name, phase, num_classes, class_list=None): 110 | """ Add a MultiLabelData layer """ 111 | include_dict = {'phase': phase} 112 | param = {'num_classes': num_classes} 113 | if phase == caffe.TRAIN: 114 | param['stage'] = 'TRAIN' 115 | elif phase == caffe.TEST: 116 | param['stage'] = 'VAL' 117 | if class_list is not None: 118 | assert len(class_list) == num_classes, \ 119 | 'Length of class list does not match number of classes {} vs {}'.\ 120 | format(len(class_list), num_classes) 121 | param['class_list'] = class_list 122 | 123 | param_str = yaml.dump(param) 124 | net[name[0]], net[name[1]] = L.Python(name=name[0], python_param=dict(module='layers.singlelabel_data', 125 | layer='SingleLabelData', param_str=param_str), include=include_dict, ntop=2) 126 | 127 | def add_multilabel_err_layer(net, bottom, name): 128 | """ Add a MultilabelErr layer """ 129 | net[name] = L.Python(bottom[0], bottom[1], 130 | python_param=dict(module='layers.multilabel_err', layer='MultiLabelErr')) 131 | 132 | def add_euclidean_loss(net, bottom, name, loss_weight, phase): 133 | """ Add Euclidean Loss """ 134 | include_dict = {'phase': phase} 135 | net[name] = L.EuclideanLoss(bottom[0], bottom[1], loss_weight=loss_weight, include=include_dict) 136 | 137 | def add_sigmoid_entropy_loss(net, bottom, name, loss_weight, phase): 138 | """ Add sigmoid entropy loss """ 139 | include_dict = {'phase': phase} 140 | net[name] = L.SigmoidCrossEntropyLoss(bottom[0], bottom[1], loss_weight=loss_weight, include=include_dict) 141 | 142 | def add_softmax_loss(net, bottom, name, loss_weight, phase): 143 | """ Add sigmoid entropy loss """ 144 | include_dict = {'phase': phase} 145 | net[name] = L.SoftmaxWithLoss(bottom[0], bottom[1], loss_weight=loss_weight, include=include_dict) 146 | 147 | def add_accuracy_layer(net, bottom, name): 148 | """ Add accuracy layer """ 149 | net[name] = L.Accuracy(bottom[0], bottom[1]) 150 | 151 | if __name__ == '__main__': 152 | net = caffe.NetSpec() 153 | net_test = caffe.NetSpec() 154 | add_multilabel_data_layer(net, name=['data', 'label'], phase=caffe.TRAIN, num_classes=2, class_list=[4,24]) 155 | param_name = {'weights': 'conv1_w', 'bias': 'conv1_b'} 156 | add_conv(net, bottom=net['data'], name='conv1', param_name=param_name, 157 | k=3, ks=1, pad=0, nout=128, lr_factor=1) 158 | add_relu(net, bottom=net['conv1'], name='relu1') 159 | param_name = {'weights': 'fc1_w', 'bias': 'fc1_b'} 160 | add_fc(net, bottom=net['relu1'], name='fc1-1', param_name=param_name, nout=128, lr_factor=1, std=0.01) 161 | add_fc(net, bottom=net['relu1'], name='fc1-2', param_name=param_name, nout=128, lr_factor=1, std=0.01) 162 | add_concat(net, bottom=[net['fc1-1'], net['fc1-2']], name='fc1', axis=1) 163 | add_sigmoid_entropy_loss(net, bottom=[net['data'],net['fc1']], name='loss', loss_weight=1.0, phase=caffe.TRAIN) 164 | add_multilabel_err_layer(net, bottom=[net['data'], net['fc1']], name='error') 165 | 166 | add_multilabel_data_layer(net_test, name=['data', 'label'], phase=caffe.TEST, num_classes=2, class_list=[4,24]) 167 | 168 | with open('function_test.prototxt', 'w') as f: 169 | f.write(str(net_test.to_proto())) 170 | f.write(str(net.to_proto())) -------------------------------------------------------------------------------- /lib/models/model_io.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | """ Maintain I/O layers for interested neural net models """ 4 | 5 | import layer_helpers as lh 6 | import caffe 7 | 8 | class ModelIO(object): 9 | """Base class for an I/O model that are uesd in NetModel class """ 10 | 11 | def __init__(self): 12 | self._num_tasks = 0 13 | self._data_name = '' 14 | self._postfix = '' 15 | # name of the labels 16 | self._label_names = None 17 | 18 | @property 19 | def label_names(self): 20 | return self._label_names 21 | 22 | @property 23 | def num_tasks(self): 24 | return self._num_tasks 25 | 26 | @property 27 | def data_name(self): 28 | return self._data_name 29 | 30 | @property 31 | def postfix(self): 32 | return self._postfix 33 | 34 | def add_input(self, net, deploy=False): 35 | """ add input layers """ 36 | return NotImplementedError 37 | 38 | def add_output(self, net, bottom_dict, deploy=False, share_basis=False): 39 | """ add output layers """ 40 | return NotImplementedError 41 | 42 | def col_name_at_j(self, j): 43 | """ provide the name of column """ 44 | return NotImplementedError 45 | 46 | def branch_name_at_j_k(self, j, k): 47 | """ provide the name of a branch """ 48 | return NotImplementedError 49 | 50 | class ClassificationIO(ModelIO): 51 | """ IO base-class for grouping different outputs in the a classification problem as tasks""" 52 | 53 | def __init__(self, class_list, data_name, label_names, postfix, loss_layer): 54 | 55 | ModelIO.__init__(self) 56 | self._class_list = class_list 57 | self._num_tasks = len(class_list) 58 | self._data_name = data_name 59 | self._label_names = label_names 60 | self._postfix = postfix 61 | 62 | self._loss_layer = loss_layer 63 | 64 | @property 65 | def class_list(self): 66 | return self._class_list 67 | 68 | @property 69 | def loss_layer(self): 70 | return self._loss_layer 71 | 72 | def add_output(self, net, bottom_dict, num_filters=None, deploy=False, share_basis=False): 73 | """ add output layers """ 74 | # bottom_dict[k] is a tuple (num_tasks_at(i,k), bottom[k]) 75 | # this determines the number of fc layers needed. 76 | 77 | use_basis = (num_filters is not None) 78 | task_layer_list = [] 79 | for j in xrange(len(bottom_dict)): 80 | if use_basis: 81 | # basis_name = 'score_basis_{}'.format(j+1) + self.postfix 82 | # basis_name = 'score_basis' + self._post_fix_at(j) 83 | basis_name = self.col_name_at_j(j) 84 | if share_basis: 85 | blob_param_name = basis_name.split('_')[0] 86 | else: 87 | blob_param_name = basis_name 88 | param_names = {'weights': blob_param_name+'_w', 'bias': blob_param_name+'_b'} 89 | lh.add_fc(net, bottom=bottom_dict[j][1], name=basis_name, param_name=param_names, 90 | nout=num_filters, lr_factor=1, std='linear') 91 | bottom=net[basis_name] 92 | else: 93 | bottom=bottom_dict[j][1] 94 | 95 | # each task gets its own layer 96 | for k in xrange(bottom_dict[j][0]): 97 | # blob_name = 'score_fc' + self._post_fix_at(j, k) 98 | blob_name = self.branch_name_at_j_k(j,k) 99 | filter_names = {'weights': blob_name+'_w', 'bias': blob_name+'_b'} 100 | lh.add_fc(net, bottom=bottom, name=blob_name, param_name=filter_names, 101 | nout=1, lr_factor=1, std='ReLu') 102 | task_layer_list.append(net[blob_name]) 103 | 104 | self.add_loss(net, task_layer_list, deploy) 105 | 106 | def add_loss(self, net, task_layer_list, deploy): 107 | """ Add the loss layers """ 108 | return NotImplementedError 109 | 110 | def _post_fix_at(self, j, k=None): 111 | """ Output post fix for the names at column j, [branch k] 112 | Use 1-based indexing. 113 | """ 114 | if k is None: 115 | return '{}'.format(j+1) + self.postfix 116 | else: 117 | return '{}_{}'.format(j+1,k+1) + self.postfix 118 | 119 | def col_name_at_j(self, j): 120 | """ provide the name of column """ 121 | return "score_basis" + self._post_fix_at(j) 122 | 123 | def branch_name_at_j_k(self, j, k): 124 | """ provide the name of a branch """ 125 | return 'score_fc' + self._post_fix_at(j, k) 126 | 127 | class MultiLabelIO(ClassificationIO): 128 | """ IO for grouping different outputs in the a multi-label classification problem as tasks """ 129 | 130 | def __init__(self, class_list, data_name='data', label_names='label', postfix='', loss_layer='Sigmoid'): 131 | 132 | ClassificationIO.__init__(self, class_list, data_name, label_names, postfix, loss_layer) 133 | 134 | def add_input(self, net, deploy=False): 135 | """ add input layers """ 136 | class_list = self.class_list 137 | num_classes = len(class_list) 138 | 139 | if not deploy: 140 | train_net = net['train'] 141 | val_net = net['val'] 142 | lh.add_multilabel_data_layer(train_net, name=[self.data_name, self.label_names], 143 | phase=caffe.TRAIN, num_classes=num_classes, class_list=class_list) 144 | lh.add_multilabel_data_layer(val_net, name=[self.data_name, self.label_names], 145 | phase=caffe.TEST, num_classes=num_classes, class_list=class_list) 146 | 147 | def add_loss(self, net, task_layer_list, deploy): 148 | """ Add the loss layers """ 149 | # concatenate layers in the order specified by task_layer_list, compute the sigmoid 150 | lh.add_concat(net, bottom=task_layer_list, name='score'+self.postfix, axis=1) 151 | lh.add_sigmoid(net, bottom=net['score'+self.postfix], name='prob'+self.postfix, in_place=False) 152 | if not deploy: 153 | if self.loss_layer == 'Sigmoid': 154 | lh.add_sigmoid_entropy_loss(net, bottom=[net['score'+self.postfix], net[self.label_names]], 155 | name='loss'+self.postfix, loss_weight=1.0, phase=caffe.TRAIN) 156 | elif self.loss_layer == 'Square': 157 | lh.add_euclidean_loss(net, bottom=[net['prob'+self.postfix], net[self.label_names]], 158 | name='loss'+self.postfix, loss_weight=1.0, phase=caffe.TRAIN) 159 | else: 160 | print 'The layer type {} is not recognized!'.format(self.loss_layer) 161 | raise 162 | 163 | lh.add_multilabel_err_layer(net, bottom=[net['prob'+self.postfix], net[self.label_names]], 164 | name='error'+self.postfix) 165 | 166 | class SingleLabelIO(ClassificationIO): 167 | """ IO for grouping different outputs in the a single-label classification problem as tasks""" 168 | 169 | def __init__(self, class_list, data_name='data', label_names='label', postfix='', loss_layer='Softmax'): 170 | 171 | ClassificationIO.__init__(self, class_list, data_name, label_names, postfix, loss_layer) 172 | 173 | def add_input(self, net, deploy=False): 174 | """ add input layers """ 175 | class_list = self.class_list 176 | num_classes = len(class_list) 177 | 178 | if not deploy: 179 | train_net = net['train'] 180 | val_net = net['val'] 181 | lh.add_singlelabel_data_layer(train_net, name=[self.data_name, self.label_names], 182 | phase=caffe.TRAIN, num_classes=num_classes, class_list=class_list) 183 | lh.add_singlelabel_data_layer(val_net, name=[self.data_name, self.label_names], 184 | phase=caffe.TEST, num_classes=num_classes, class_list=class_list) 185 | 186 | def add_loss(self, net, task_layer_list, deploy): 187 | """ Add the loss layers """ 188 | # concatenate layers in the order specified by task_layer_list, compute the sigmoid 189 | lh.add_concat(net, bottom=task_layer_list, name='score'+self.postfix, axis=1) 190 | lh.add_softmax(net, bottom=net['score'+self.postfix], name='prob'+self.postfix, in_place=False) 191 | if not deploy: 192 | if self.loss_layer == 'Softmax': 193 | lh.add_softmax_loss(net, bottom=[net['score'+self.postfix], net[self.label_names]], 194 | name='loss'+self.postfix, loss_weight=1.0, phase=caffe.TRAIN) 195 | else: 196 | print 'The layer type {} is not recognized!'.format(self.loss_layer) 197 | raise 198 | 199 | lh.add_accuracy_layer(net, bottom=[net['prob'+self.postfix], net[self.label_names]], 200 | name='acc'+self.postfix) -------------------------------------------------------------------------------- /lib/models/modulo_row.py: -------------------------------------------------------------------------------- 1 | # Modified by Yongxi Lu 2 | # Based on source code provided by Abhishek Kumar 3 | 4 | import cvxpy as cvx 5 | from cvxpy import Variable, Minimize, Problem, sum_entries, CVXOPT, SCS 6 | import numpy as np 7 | import sys 8 | 9 | def l1dist_matrices_rowperm(A,B,tol): 10 | """ 11 | min_P ||A - PB||_1 s.t. P is a permutation matrix 12 | LP formulation: 13 | min 1'Z1 s.t. A-PB <= Z, A-PB>=-Z, P'1 = 1, 1'P = 1', P>=0 14 | """ 15 | 16 | m, n = A.shape 17 | 18 | Z = Variable(m,n) 19 | P = Variable(m,m) 20 | ones_m = np.ones((m,1)) 21 | 22 | #objective = Minimize(sum_entries(Z)) 23 | #constraints = [A-P*B <= Z, 24 | # P*B-A <= Z, 25 | # P >= 0, 26 | # P*ones_m == ones_m, 27 | # P.T * ones_m == ones_m] 28 | 29 | objective = Minimize(cvx.norm(A-P*B,1)) 30 | constraints = [P >= 0, P*ones_m == ones_m, P.T * ones_m == ones_m] 31 | 32 | prob = Problem(objective, constraints) 33 | prob.solve(verbose=True, solver=SCS, eps=tol) 34 | #prob.solve(verbose=True, solver=CVXOPT, abstol=1e-6) 35 | 36 | P = P.value 37 | dist = A - P.dot(B) 38 | return dist.sum(), P 39 | 40 | if __name__=='__main__': 41 | if len(sys.argv)<3: 42 | print 'Usage:', sys.argv[0], ' rowsize colsize' 43 | print 'Solves a problem of specified size with random matrices' 44 | sys.exit() 45 | 46 | m = int(sys.argv[1]) 47 | n = int(sys.argv[2]) 48 | A = np.random.rand(m,n) 49 | p = np.random.permutation(m) 50 | P = np.zeros((m,m)) #permutation matrix 51 | for i in range(m): 52 | P[i,p[i]] = 1 53 | B = P.dot(A) 54 | 55 | d, Q = l1dist_matrices_rowperm(A,B,1e-3) 56 | d_naive = np.abs(A - B) 57 | print 'L1 distance (naive):', d_naive.sum() 58 | print 'L1 distance (modulo row permutations):', d 59 | PQ = np.abs(P-Q) 60 | print PQ.sum() 61 | 62 | -------------------------------------------------------------------------------- /lib/solvers/__init__.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # --------------------------------- 2 | # Written by Yongxi Lu 3 | # Facial Attribute Classification 4 | # --------------------------------- 5 | -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | #------------------------------ 2 | # Written by Yongxi Lu 3 | #------------------------------ 4 | 5 | 6 | """ Blob helper functions. """ 7 | 8 | import numpy as np 9 | import cv2 10 | 11 | def im_list_to_blob(filelist, pixel_means, scale): 12 | """ Load a lits of images, convert them into a network input""" 13 | 14 | # file list to image list 15 | imgs = [cv2.imread(fn) for fn in filelist] 16 | 17 | num_images = len(imgs) 18 | blob = np.zeros((num_images, scale, scale, 3), dtype=np.float32) 19 | for i in xrange(num_images): 20 | assert imgs[i] is not None, 'File {} is not loaded correctly'.format(filelist[i]) 21 | im = prep_im_for_blob(imgs[i], pixel_means, scale) 22 | blob[i, 0:scale, 0:scale, :] = im 23 | # permute channel to (batch_size, channel, height, width) 24 | channel_swap = (0, 3, 1, 2) 25 | blob = blob.transpose(channel_swap) 26 | return blob 27 | 28 | def prep_im_for_blob(im, pixel_means, scale): 29 | """ Mean subtract and scale an image """ 30 | 31 | im = im.astype(np.float32, copy=False) 32 | im -= pixel_means 33 | im = cv2.resize(im, dsize=(scale,scale), interpolation=cv2.INTER_LINEAR) 34 | 35 | return im 36 | -------------------------------------------------------------------------------- /lib/utils/config.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | """ 4 | Configuration files used in the system 5 | 6 | This file specifies default config options 7 | """ 8 | 9 | import os 10 | import os.path as osp 11 | import numpy as np 12 | import cPickle 13 | # `pip install easydict` if package not found 14 | from easydict import EasyDict as edict 15 | 16 | 17 | __C = edict() 18 | # Consumers can get config by: 19 | # from utils import cfg 20 | cfg = __C 21 | 22 | # TODO: the controls here should be less significant to the algorithm. 23 | 24 | ## ------------------------------------------------------------------------------ 25 | # Options for training 26 | ## ------------------------------------------------------------------------------ 27 | __C.TRAIN = edict() 28 | 29 | # Iterations between snapshots 30 | __C.TRAIN.SNAPSHOT_ITERS = 1000 31 | 32 | # Perform validation or not 33 | __C.TRAIN.USE_VAL = True 34 | # validation size 35 | __C.TRAIN.VAL_SIZE = 100 36 | # Sample frequency for validation 37 | __C.TRAIN.VAL_FREQ = 500 38 | 39 | # Frequency to display timer 40 | __C.TRAIN.TIMER_FREQ = 500 41 | 42 | # Frequency to display training 43 | __C.TRAIN.TRAIN_FREQ = 20 44 | 45 | # Frequency to save error correlation 46 | __C.TRAIN.CORR_FREQ = 100 47 | 48 | # Cluster repetition 49 | __C.TRAIN.CLUSTER_REP = 5 50 | 51 | # Minibatch sizes 52 | __C.TRAIN.IMS_PER_BATCH = 32 53 | 54 | # Random noies factor for branches 55 | __C.TRAIN.NOISE_FACTOR = 0 56 | 57 | 58 | ## ------------------------------------------------------------------------------ 59 | 60 | ## ------------------------------------------------------------------------------ 61 | # Options used in testing 62 | ## ------------------------------------------------------------------------------ 63 | __C.TEST = edict() 64 | 65 | ## ------------------------------------------------------------------------------ 66 | 67 | # standard size uesd in training and testing 68 | __C.SCALE = 224 69 | 70 | # a useful small value 71 | __C.EPS = 1e-16 72 | 73 | # Pixel mean values (BGR order) as a (1, 1, 3) array 74 | # These are values originally used for training VGG16 75 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) 76 | 77 | # For reproducibility 78 | __C.RNG_SEED = 3 79 | 80 | # Root directory of project 81 | __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) 82 | 83 | def get_output_dir(imdb, net): 84 | """ Return the directory that stores experimental results """ 85 | 86 | path = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) 87 | 88 | if net is None: 89 | return path 90 | else: 91 | return osp.join(path, net.name) 92 | 93 | def _merge_a_into_b(a, b): 94 | """ Merge config dictionary a into dictionary b, clobbering the 95 | options in b whenever they are also specified in a. 96 | """ 97 | 98 | if type(a) is not edict: 99 | return 100 | 101 | for k, v in a.iteritems(): 102 | # a must specify keys that are in b 103 | if not b.has_key(k): 104 | raise KeyError('{} is not a valid config key'.format(k)) 105 | 106 | # the types must match, too 107 | if type(b[k]) is not type(v): 108 | raise ValueError(('Type mismatch ({} vs. {}) ' 109 | 'for config key: {}').format(type(b[k]), 110 | type(v), k)) 111 | # recursively merge dicts 112 | if type(v) is edict: 113 | try: 114 | _merge_a_into_b(a[k], b[k]) 115 | except: 116 | print('Error under config key: {}'.format(k)) 117 | raise 118 | else: 119 | b[k] = v 120 | 121 | def cfg_from_file(filename): 122 | """Load a config file and merge it into the default options.""" 123 | import yaml 124 | 125 | with open(filename, 'r') as f: 126 | yaml_cfg = edict(yaml.load(f)) 127 | 128 | _merge_a_into_b(yaml_cfg, __C) 129 | 130 | def cfg_set_path(exp_dir): 131 | """ Set experiment paths """ 132 | if exp_dir is None: 133 | __C.EXP_DIR = 'default' 134 | else: 135 | __C.EXP_DIR = exp_dir 136 | 137 | def cfg_print_info(): 138 | 139 | def print_values(a, root): 140 | for k, v in a.iteritems(): 141 | if type(v) is edict: 142 | print_values(v, k) 143 | else: 144 | print '{}->{} is {}, its value is {}'.format(root, k, type(v), v) 145 | 146 | print_values(__C, 'cfg') 147 | 148 | if __name__ == '__main__': 149 | 150 | print 'The default of the dictionary' 151 | cfg_print_info() 152 | 153 | -------------------------------------------------------------------------------- /lib/utils/convertBN.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | """ Transform a model that have BN layers, and subsume the normalization into the layers themselves.""" 4 | 5 | 6 | from caffe.proto import caffe_pb2 7 | import numpy as np 8 | import cPickle 9 | from scipy.io import savemat 10 | import os.path as osp 11 | from utils.config import cfg 12 | 13 | def convertBN(inmodel, outmodel): 14 | """ subsume all the BN layers inside inmode to normal layers in the out model """ 15 | 16 | # load files 17 | print 'Loading caffemodel: {}'.format(inmodel) 18 | with open(inmodel, 'rb') as f: 19 | binary_content = f.read() 20 | 21 | protobuf = caffe_pb2.NetParameter() 22 | protobuf.ParseFromString(binary_content) 23 | layers = protobuf.layer 24 | 25 | _eps = 1e-5 26 | for layer in layers: 27 | if layer.type == 'BatchNorm': 28 | # the layer to be modified. 29 | layer_c = [l for l in layers if l.name == layer.name[3:]][0] 30 | # the parameters fo the computational layer 31 | w = np.reshape(np.array(layer_c.blobs[0].data), layer_c.blobs[0].shape.dim) 32 | b = np.reshape(np.array(layer_c.blobs[1].data), layer_c.blobs[1].shape.dim) 33 | # load the BN parameters 34 | factor = 0 if np.array(layer.blobs[2].data) == 0 else 1./np.array(layer.blobs[2].data) 35 | mean = np.array(layer.blobs[0].data) * factor 36 | var = np.array(layer.blobs[1].data) * factor 37 | 38 | # display information 39 | print 'Modifying layer {} based on information from {}'.format(layer_c.name, layer.name) 40 | # update weights 41 | if len(w.shape) == 4: 42 | w /= (_eps + np.sqrt(var)[:, np.newaxis, np.newaxis, np.newaxis]) 43 | elif len(w.shape) == 2: 44 | w /= (_eps + np.sqrt(var)[:, np.newaxis]) 45 | # update bias 46 | b -= mean 47 | b /= (_eps + np.sqrt(var)) 48 | # save the changes back to the model 49 | del layer_c.blobs[0].data[:] 50 | del layer_c.blobs[1].data[:] 51 | layer_c.blobs[0].data.extend(w.flatten().tolist()) 52 | layer_c.blobs[1].data.extend(b.flatten().tolist()) 53 | 54 | # save the model to out model 55 | new_binary_content = protobuf.SerializeToString() 56 | 57 | print 'Saving caffemodel: {}'.format(outmodel) 58 | with open(outmodel, 'wb') as f: 59 | f.write(new_binary_content) -------------------------------------------------------------------------------- /lib/utils/error.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | """ Utility functions used to compute error """ 4 | 5 | import numpy as np 6 | 7 | def compute_mle(scores, targets): 8 | """ Compute multi-label error """ 9 | num_classes = targets.shape[1] 10 | err = np.empty((num_classes,), dtype=np.float32) 11 | err[:] = np.nan 12 | for c in xrange(num_classes): 13 | # negative label is reserved for "unknown", evaluation of those entries are skipped. 14 | valid_ind = np.where(targets[:,c]>=0.0)[0] 15 | if len(valid_ind>0): 16 | err[c] = np.mean(((scores[valid_ind, [c]]>=0.5) != (targets[valid_ind, [c]]>=0.5)), axis=0) 17 | return err -------------------------------------------------------------------------------- /lib/utils/holder.py: -------------------------------------------------------------------------------- 1 | #------------------------------ 2 | # Written by Yongxi Lu 3 | #------------------------------ 4 | 5 | from collections import deque 6 | import numpy as np 7 | from utils.config import cfg 8 | 9 | class CircularQueue(object): 10 | """ Hold a queue with length """ 11 | 12 | def __init__(self, maxlen): 13 | self._holder = deque([], maxlen=maxlen) 14 | 15 | def append(self, value): 16 | self._holder.append(value) 17 | 18 | def toMatrix(self): 19 | return np.concatenate(self._holder, axis=0) -------------------------------------------------------------------------------- /lib/utils/log.py: -------------------------------------------------------------------------------- 1 | # ------------------------------- 2 | # Written by Yongxi Lu 3 | # ------------------------------- 4 | 5 | 6 | """ Utilities to interpret automatic logs generated by the training procedure 7 | """ 8 | 9 | # TODO: we dont need to plot now! 10 | # TODO: we need to save things to a file so that we can plot as much as we like! 11 | # Just getting everything out and saving it to a .mat file would be sufficient! 12 | 13 | import re 14 | import numpy as np 15 | 16 | # necessary when used without a display 17 | import matplotlib as mpl 18 | mpl.use('Agg') 19 | import matplotlib.pyplot as plt 20 | 21 | def parse_mle_and_plot(filename, splits, metric='error', output='loss.png', run_length=1, max_iters=None, epoch_size=None, max_y=None): 22 | """ parse the mle log for a particular split, output file at the output location """ 23 | 24 | # parse from files 25 | iters = {} 26 | err = {} 27 | for split in splits: 28 | iters[split] = [] 29 | err[split] = [] 30 | for fn in filename: 31 | it, v = parse_mle(fn, split, metric) 32 | iters[split].append(it) 33 | err[split].append(v) 34 | # take average 35 | iters[split] = iters[split][0] 36 | err[split] = np.mean(np.dstack(err[split]), axis=2) 37 | 38 | # if max_iters is specified, truncate loss 39 | if max_iters is not None: 40 | for split in splits: 41 | end = min(max_iters, np.max(iters[split])) 42 | iters[split] = iters[split][:end] 43 | err[split] = err[split][:end] 44 | 45 | # perform sampling and smoothing for the training split 46 | if 'training' in splits and 'validation' in splits: 47 | A = iters['training'] 48 | B = iters['validation'] 49 | # sample iterations 50 | center_inds = np.array([i for i in xrange(len(A)) if A[i] in B], dtype=np.int32) 51 | iters['training'] = A[center_inds] 52 | # sample errors 53 | begins = np.maximum(0, center_inds - run_length) 54 | ends = np.minimum(center_inds + run_length, len(A)) 55 | idx = np.vstack((begins, ends)).transpose() 56 | values = np.array([np.mean(err['training'][idx[i,0]:idx[i,1], :], axis=0) for i in xrange(len(center_inds))]) 57 | err['training'] = values 58 | 59 | # convert iterations to epochs if necessary 60 | if epoch_size is not None: 61 | for split in splits: 62 | iters[split] = iters[split].astype(np.float32, copy=False) 63 | iters[split] /= epoch_size 64 | 65 | plot_mle_mean(iters, err, output, max_y) 66 | 67 | def parse_mle(filename, split, metric): 68 | """ parse mle log for a particular split """ 69 | 70 | iters = [] 71 | err = [] 72 | pattern = 'Round [0-9]*, Iteration [0-9]*: {} {} = [0-9.\ ]*'.format(split, metric) 73 | with open(filename) as f: 74 | data = ' '.join([line.replace('\n', '') for line in f]) 75 | match = re.findall(pattern, data) 76 | for i in xrange(len(match)): 77 | parts = match[i].split(' = ') 78 | # match iterations 79 | iters.append(int(re.search('Iteration [0-9]+',parts[0]).group().split(' ')[1])) 80 | # match error rates 81 | err_str = re.search('[0-9.\ ]+', parts[1]).group().split() 82 | err.append([float(err_str[i]) for i in xrange(len(err_str))]) 83 | 84 | iters = np.array(iters) 85 | err = np.array(err) 86 | 87 | return iters, err 88 | 89 | def plot_mle_mean(iters, err, output="loss.png", max_y=None): 90 | """ Plot the evolution of multi-label error as a function of training iterations """ 91 | 92 | fig, ax = plt.subplots() 93 | ax.set_autoscale_on(False) 94 | max_value = 0 95 | max_iters = 0 96 | 97 | # split y axis [0, max], split x axis [0, max_iters] (if the provided max_iters is not integer, 98 | # e.g. it is the number of epochs, round it) 99 | for split, x in iters.iteritems(): 100 | ax.plot(x, np.mean(err[split], axis=1), label=split, linewidth=3.0) 101 | max_value = max(max_value, np.max(np.mean(err[split], axis=1))) 102 | max_iters = np.round(max(max_iters, np.max(iters[split]))).astype(np.int32) 103 | # if user sets maximum of y axis, use the user setting 104 | if max_y is not None: 105 | max_value = max_y 106 | 107 | ax.axis([0, max_iters, 0, max_value]) 108 | legend = ax.legend(loc='best', shadow=True) 109 | 110 | plt.xlabel('Number of iterations') 111 | plt.ylabel('Error rate') 112 | plt.title('Multi-label error as function of training iterations') 113 | # save the figure 114 | plt.savefig(output) 115 | 116 | if __name__ == '__main__': 117 | 118 | iters_train, err_train = parse_mle('test_parse.txt', 'training') 119 | iters_val, err_val = parse_mle('test_parse.txt', 'validation') 120 | 121 | print 'Parsing results for training' 122 | print iters_train 123 | print err_train 124 | 125 | print 'Parsing results for testing' 126 | print iters_val 127 | print err_val 128 | 129 | iters = {'training': iters_train, 'validation': iters_val} 130 | err = {'training': err_train, 'validation': err_val} 131 | 132 | plot_mle_mean(iters, err) 133 | -------------------------------------------------------------------------------- /lib/utils/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luyongxi/deep_share/52863842550244aaf6ecd1a978fa5b6f14206af3/lib/utils/loss.png -------------------------------------------------------------------------------- /lib/utils/somp.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | """ Simultaneously orthogonal matching pursuit algorihtm 4 | We implement the naive form of this algorithm, as documented in: 5 | Slide 7 of http://users.cms.caltech.edu/~jtropp/slides/Tro05-Simultaneous-Sparsity-Talk.pdf 6 | 7 | As well as the efficient implementation based on Inverse Cholesky Factorization, as documented in 8 | http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6692175 9 | 10 | It is used to solve the following problem. 11 | 12 | Let S be a signal matrix, where each column is an observation. 13 | Let D be a dictionary matrix, where each column is a basis. 14 | 15 | We want to solve argmin |Y - TS|, where S is a sparse matrix with |S|_0_\infty <= K, which means 16 | each column has at most K non-zeros. In other words, no more than K columns of T are used as 17 | basis to approximate signal Y. 18 | """ 19 | 20 | import numpy as np 21 | import numpy.random as npr 22 | from numpy.linalg import norm 23 | from numpy.linalg import inv 24 | from numpy.linalg import lstsq 25 | 26 | from utils.config import cfg 27 | 28 | def somp_cholesky(Y, T, K, p=2): 29 | """ Inverse Cholesky implementation of SOMP algorihtm 30 | Input: 31 | Y: The signal matrix 32 | T: the dictionary matrix (each column is an atom) 33 | K: the cardinality of the basis 34 | p: use p-norm in finding the columns 35 | Output: 36 | w: the non-zero entries 37 | S: the linear combination 38 | """ 39 | 40 | # the initial residual is Y itself 41 | R = Y 42 | N = T.shape[1] 43 | F = Y.shape[1] 44 | I = [] 45 | # gram matrix 46 | G = np.dot(np.transpose(T), T) 47 | # norm of colums of T 48 | norm_T = norm(T, ord=2, axis=0) 49 | # initialize D and a 50 | a = np.zeros((0, F)) 51 | D = np.zeros((N, 0)) 52 | 53 | for k in xrange(K): 54 | # the remaining set of basis 55 | I_cmp = [i for i in range(N) if i not in I] 56 | if k == 0: 57 | gamma = np.dot(np.transpose(R), T) 58 | else: 59 | gamma = gamma - np.dot(Dk[:, np.newaxis], ak[np.newaxis, :]) 60 | obj = norm(gamma[:, I_cmp], ord=p, axis=0)/norm_T[I_cmp] 61 | # update the set of matching basis 62 | ik = I_cmp[np.argmax(obj)] 63 | I.append(ik) 64 | # iteration updates 65 | if k >= 1: 66 | w = D[ik, :] 67 | if G[ik, ik] - np.dot(w,w) <= 0: 68 | break 69 | lam = 1/(np.sqrt(G[ik, ik] - np.dot(w,w))) 70 | Dk = lam*(G[:,ik]-np.dot(D,w)) 71 | else: 72 | lam = 1/np.sqrt(G[ik, ik]) 73 | Dk = lam*(G[:, ik]) 74 | 75 | ak = lam*gamma[:, ik] 76 | a = np.vstack((a, ak)) 77 | D = np.hstack((D, Dk[:, np.newaxis])) 78 | 79 | if len(I) < K: 80 | I_rand = npr.choice(I_cmp, K-len(I), replace=False) 81 | I.extend(I_rand) 82 | 83 | return I 84 | 85 | def somp_naive(Y, T, K, p=2): 86 | """ Naive implementation of SOMP algorihtm 87 | Input: 88 | Y: The signal matrix 89 | T: the dictionary matrix (each column is an atom) 90 | K: the cardinality of the basis 91 | p: use p-norm in finding the columns 92 | Output: 93 | w: the non-zero entries 94 | S: the linear combination 95 | """ 96 | 97 | # the initial residual is Y itself 98 | R = Y 99 | N = T.shape[1] 100 | I = [] 101 | 102 | # norm of columns of T 103 | norm_T = norm(T, ord=2, axis=0) 104 | 105 | for _ in xrange(K): 106 | # the remaining set of basis 107 | I_cmp = [i for i in range(N) if i not in I] 108 | # project columns of R to columns of T, find the column in T that maximizes the 109 | # sum of the absolute values of inner product. 110 | # gamma(i,j) = /||t_j||, where the appropriate inner product is the dot product 111 | gamma = np.dot(np.transpose(R), T[:, I_cmp]) 112 | obj = norm(gamma, ord=p, axis=0)/norm_T[I_cmp] 113 | # update the set of matching basis 114 | ik = I_cmp[np.argmax(obj)] 115 | I.append(ik) 116 | # update residual by subtracting the projection 117 | S = lstsq(T[:,I], Y)[0] 118 | R = Y - np.dot(T[:, I], S) 119 | 120 | return I 121 | 122 | # def somp_pair(S1, S2, D1, D2, T): 123 | # """ Solve a pair-wise SOMP problem 124 | # Input: 125 | # S1, S2: Signal matrices 126 | # D1, D2: Dictionary matrices 127 | # T: the cardinality of the basis 128 | # Output: 129 | # A: the sparse linear combination 130 | # """ 131 | 132 | # # the initial residual is S itself 133 | # R1 = S1 134 | # R2 = S2 135 | 136 | # # Check if the number of columns of D1 and D2 are the same 137 | # assert D1.shape[0]==D2.shape[0], 'Mismatch in number of columns: {} vs {}.'.\ 138 | # format(D1.shape[0], D2.shape[0]) 139 | # d = D1.shape[1] 140 | # w = [] 141 | # for _ in xrange(T): 142 | # # the remaining set of basis 143 | # w_cmp = [i for i in range(d) if i not in w] 144 | # # project columns of R to columns of D, find the column in D that maximizes the 145 | # # sum of the absolute values of inner product. 146 | # # inner(i,j) = , where the appropriate inner product is the dot product 147 | # inner1 = np.dot(np.transpose(R1), D1[:, w_cmp]) 148 | # inner2 = np.dot(np.transpose(R2), D2[:, w_cmp]) 149 | # obj = np.abs(inner1).sum(axis=0) + np.abs(inner2).sum(axis=0) 150 | # # update the set of matching basis 151 | # w.append(w_cmp[np.argmax(obj)]) 152 | # # update residual by subtracting the projection 153 | # A1 = lstsq(D1[:,w], S1)[0] 154 | # A2 = lstsq(D2[:,w], S2)[0] 155 | # R1 = S - np.dot(D1[:, w], A1) 156 | # R2 = S - np.dot(D2[:, w], A2) 157 | 158 | # return w -------------------------------------------------------------------------------- /lib/utils/svd.py: -------------------------------------------------------------------------------- 1 | # Written by Yongxi Lu 2 | 3 | """ Truncated SVD used to initialize low-rank factorization """ 4 | 5 | import numpy as np 6 | from numpy.linalg import svd 7 | 8 | def truncated_svd(W, k): 9 | """ Given input filters, return a set of basis and the linear combination 10 | required to approximate the original input filters 11 | Input: 12 | W: [dxc] matrix, where c is the input dimension, 13 | d is the output dimension 14 | Output: 15 | B: [kxc] matrix, where c is the input dimension, 16 | k is the maximum rank of output filters 17 | L: [dxk] matrix, where k is the maximum rank of the 18 | output filters, d is the output dimension 19 | 20 | Note that k <= min(c,d). It is an error if that is encountered. 21 | """ 22 | d, c = W.shape 23 | assert k <= min(c,d), 'k={} is too large for c={}, d={}'.format(k,c,d) 24 | # S in this case is a vector with len=K=min(c,d), and U is [d x K], V is [K x c] 25 | u, s, v = svd(W, full_matrices=False) 26 | # compute square of s -> s_sqrt 27 | s_sqrt = np.sqrt(s[:k]) 28 | # extract L from u 29 | B = v[:k, :] * s_sqrt[:, np.newaxis] 30 | # extract B from v 31 | L = u[:, :k] * s_sqrt 32 | 33 | return B, L 34 | -------------------------------------------------------------------------------- /lib/utils/test_parse.txt: -------------------------------------------------------------------------------- 1 | Iteration 1: training error = [ 0.8125 0.59375 0.4375 0.375 0.53125 0.4375 0.40625 0.625 2 | 0.34375 0.34375 0.4375 0.46875 0.6875 0.625 0.40625 0.59375 3 | 0.65625 0.34375 0.5625 0.59375 0.28125 0.5 0.84375 0.4375 4 | 0.53125 0.3125 0.5 0.84375 0.625 0.46875 0.65625 0.25 0.25 5 | 0.71875 0.5 0.5 0.8125 0.5625 0.90625 0.5625 ] 6 | Iteration 2: training error = [ 0.4375 0.375 0.3125 0.09375 0.09375 0.09375 0.21875 0.28125 7 | 0.125 0.21875 0.09375 0.4375 0.21875 0.15625 0. 0.03125 8 | 0.15625 0.03125 0.375 0.4375 0.15625 0.53125 0.28125 0.125 9 | 0.34375 0.5 0. 0.5 0.125 0.09375 0.09375 0.46875 10 | 0.3125 0.3125 0.3125 0. 0.3125 0.21875 0.40625 0.21875] 11 | Iteration 3: training error = [ 0.1875 0.125 0.34375 0.25 0. 0.15625 0.15625 0.28125 12 | 0.40625 0.0625 0.03125 0.21875 0.28125 0.0625 0.0625 0.03125 13 | 0.15625 0.03125 0.15625 0.5 0.125 0.46875 0.0625 0.125 14 | 0.21875 0.4375 0.03125 0.28125 0.03125 0.03125 0.09375 0.65625 15 | 0.34375 0.28125 0.0625 0.09375 0.15625 0. 0.125 0.28125] 16 | Iteration 4: training error = [ 0.15625 0.09375 0.28125 0.1875 0.03125 0.125 0.21875 0.25 17 | 0.40625 0.125 0.03125 0.21875 0.15625 0.03125 0.03125 0. 0.125 18 | 0.03125 0.1875 0.28125 0.1875 0.46875 0.03125 0.25 0.15625 19 | 0.34375 0. 0.25 0.0625 0.0625 0.09375 0.375 0.25 20 | 0.1875 0.09375 0.09375 0.1875 0.0625 0. 0.1875 ] 21 | Iteration 2: validation error = [ 0.0875 0.2515625 0.2640625 0.209375 0.0203125 0.09140625 22 | 0.15390625 0.2046875 0.14921875 0.071875 0.0484375 0.21015625 23 | 0.11875 0.053125 0.03828125 0.06953125 0.0640625 0.03984375 24 | 0.14765625 0.15703125 0.04921875 0.16875 0.04453125 0.06328125 25 | 0.06796875 0.2703125 0.04375 0.2953125 0.07421875 0.0640625 26 | 0.05234375 0.13359375 0.20078125 0.20859375 0.16640625 0.0484375 27 | 0.1109375 0.13984375 0.05390625 0.18046875] 28 | Iteration 4: validation error = [ 0.0875 0.2515625 0.2640625 0.209375 0.0203125 0.09140625 29 | 0.15390625 0.2046875 0.14921875 0.071875 0.0484375 0.21015625 30 | 0.11875 0.053125 0.03828125 0.06953125 0.0640625 0.03984375 31 | 0.14765625 0.15703125 0.04921875 0.16875 0.04453125 0.06328125 32 | 0.06796875 0.2703125 0.04375 0.2953125 0.07421875 0.0640625 33 | 0.05234375 0.13359375 0.20078125 0.20859375 0.16640625 0.0484375 34 | 0.1109375 0.13984375 0.05390625 0.18046875] -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /models/.gitignore: -------------------------------------------------------------------------------- 1 | cache 2 | -------------------------------------------------------------------------------- /models/hairhatbald/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/hairhatbald/train_val.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 20000 6 | momentum: 0.9 7 | weight_decay: 0.0005 8 | # caffe solver snapshotting is disabled 9 | snapshot: 0 10 | snapshot_prefix: "vgg16_hairhatbald" 11 | # shut-down caffe display 12 | display: 0 13 | # shut-down caffe validation 14 | test_iter: 0 15 | test_interval: 1000 16 | -------------------------------------------------------------------------------- /models/hairhatbald/train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | input: "data" 3 | layer { 4 | name: "data" 5 | type: "Python" 6 | top: "data" 7 | top: "label" 8 | python_param { 9 | module: "layers.singlelabel_data" 10 | layer: "SingleLabelData" 11 | param_str: "{num_classes: 3, stage: TRAIN}" 12 | } 13 | include: { phase: TRAIN } 14 | } 15 | layer { 16 | name: "data" 17 | type: "Python" 18 | top: "data" 19 | top: "label" 20 | python_param { 21 | module: "layers.singlelabel_data" 22 | layer: "SingleLabelData" 23 | param_str: "{num_classes: 3, stage: VAL}" 24 | } 25 | include: { phase: TEST } 26 | } 27 | layer { 28 | bottom: "data" 29 | top: "conv1_1" 30 | name: "conv1_1" 31 | type: "Convolution" 32 | param { 33 | lr_mult: 1 34 | decay_mult: 1 35 | } 36 | param { 37 | lr_mult: 2 38 | decay_mult: 0 39 | } 40 | convolution_param { 41 | num_output: 64 42 | pad: 1 43 | kernel_size: 3 44 | } 45 | } 46 | layer { 47 | bottom: "conv1_1" 48 | top: "conv1_1" 49 | name: "relu1_1" 50 | type: "ReLU" 51 | } 52 | layer { 53 | bottom: "conv1_1" 54 | top: "conv1_2" 55 | name: "conv1_2" 56 | type: "Convolution" 57 | param { 58 | lr_mult: 1 59 | decay_mult: 1 60 | } 61 | param { 62 | lr_mult: 2 63 | decay_mult: 0 64 | } 65 | convolution_param { 66 | num_output: 64 67 | pad: 1 68 | kernel_size: 3 69 | } 70 | } 71 | layer { 72 | bottom: "conv1_2" 73 | top: "conv1_2" 74 | name: "relu1_2" 75 | type: "ReLU" 76 | } 77 | layer { 78 | bottom: "conv1_2" 79 | top: "pool1" 80 | name: "pool1" 81 | type: "Pooling" 82 | pooling_param { 83 | pool: MAX 84 | kernel_size: 2 85 | stride: 2 86 | } 87 | } 88 | layer { 89 | bottom: "pool1" 90 | top: "conv2_1" 91 | name: "conv2_1" 92 | type: "Convolution" 93 | param { 94 | lr_mult: 1 95 | decay_mult: 1 96 | } 97 | param { 98 | lr_mult: 2 99 | decay_mult: 0 100 | } 101 | convolution_param { 102 | num_output: 128 103 | pad: 1 104 | kernel_size: 3 105 | } 106 | } 107 | layer { 108 | bottom: "conv2_1" 109 | top: "conv2_1" 110 | name: "relu2_1" 111 | type: "ReLU" 112 | } 113 | layer { 114 | bottom: "conv2_1" 115 | top: "conv2_2" 116 | name: "conv2_2" 117 | type: "Convolution" 118 | param { 119 | lr_mult: 1 120 | decay_mult: 1 121 | } 122 | param { 123 | lr_mult: 2 124 | decay_mult: 0 125 | } 126 | convolution_param { 127 | num_output: 128 128 | pad: 1 129 | kernel_size: 3 130 | } 131 | } 132 | layer { 133 | bottom: "conv2_2" 134 | top: "conv2_2" 135 | name: "relu2_2" 136 | type: "ReLU" 137 | } 138 | layer { 139 | bottom: "conv2_2" 140 | top: "pool2" 141 | name: "pool2" 142 | type: "Pooling" 143 | pooling_param { 144 | pool: MAX 145 | kernel_size: 2 146 | stride: 2 147 | } 148 | } 149 | layer { 150 | bottom: "pool2" 151 | top: "conv3_1" 152 | name: "conv3_1" 153 | type: "Convolution" 154 | param { 155 | lr_mult: 1 156 | decay_mult: 1 157 | } 158 | param { 159 | lr_mult: 2 160 | decay_mult: 0 161 | } 162 | convolution_param { 163 | num_output: 256 164 | pad: 1 165 | kernel_size: 3 166 | } 167 | } 168 | layer { 169 | bottom: "conv3_1" 170 | top: "conv3_1" 171 | name: "relu3_1" 172 | type: "ReLU" 173 | } 174 | layer { 175 | bottom: "conv3_1" 176 | top: "conv3_2" 177 | name: "conv3_2" 178 | type: "Convolution" 179 | param { 180 | lr_mult: 1 181 | decay_mult: 1 182 | } 183 | param { 184 | lr_mult: 2 185 | decay_mult: 0 186 | } 187 | convolution_param { 188 | num_output: 256 189 | pad: 1 190 | kernel_size: 3 191 | } 192 | } 193 | layer { 194 | bottom: "conv3_2" 195 | top: "conv3_2" 196 | name: "relu3_2" 197 | type: "ReLU" 198 | } 199 | layer { 200 | bottom: "conv3_2" 201 | top: "conv3_3" 202 | name: "conv3_3" 203 | type: "Convolution" 204 | param { 205 | lr_mult: 1 206 | decay_mult: 1 207 | } 208 | param { 209 | lr_mult: 2 210 | decay_mult: 0 211 | } 212 | convolution_param { 213 | num_output: 256 214 | pad: 1 215 | kernel_size: 3 216 | } 217 | } 218 | layer { 219 | bottom: "conv3_3" 220 | top: "conv3_3" 221 | name: "relu3_3" 222 | type: "ReLU" 223 | } 224 | layer { 225 | bottom: "conv3_3" 226 | top: "pool3" 227 | name: "pool3" 228 | type: "Pooling" 229 | pooling_param { 230 | pool: MAX 231 | kernel_size: 2 232 | stride: 2 233 | } 234 | } 235 | layer { 236 | bottom: "pool3" 237 | top: "conv4_1" 238 | name: "conv4_1" 239 | type: "Convolution" 240 | param { 241 | lr_mult: 1 242 | decay_mult: 1 243 | } 244 | param { 245 | lr_mult: 2 246 | decay_mult: 0 247 | } 248 | convolution_param { 249 | num_output: 512 250 | pad: 1 251 | kernel_size: 3 252 | } 253 | } 254 | layer { 255 | bottom: "conv4_1" 256 | top: "conv4_1" 257 | name: "relu4_1" 258 | type: "ReLU" 259 | } 260 | layer { 261 | bottom: "conv4_1" 262 | top: "conv4_2" 263 | name: "conv4_2" 264 | type: "Convolution" 265 | param { 266 | lr_mult: 1 267 | decay_mult: 1 268 | } 269 | param { 270 | lr_mult: 2 271 | decay_mult: 0 272 | } 273 | convolution_param { 274 | num_output: 512 275 | pad: 1 276 | kernel_size: 3 277 | } 278 | } 279 | layer { 280 | bottom: "conv4_2" 281 | top: "conv4_2" 282 | name: "relu4_2" 283 | type: "ReLU" 284 | } 285 | layer { 286 | bottom: "conv4_2" 287 | top: "conv4_3" 288 | name: "conv4_3" 289 | param { 290 | lr_mult: 1 291 | decay_mult: 1 292 | } 293 | param { 294 | lr_mult: 2 295 | decay_mult: 0 296 | } 297 | type: "Convolution" 298 | convolution_param { 299 | num_output: 512 300 | pad: 1 301 | kernel_size: 3 302 | } 303 | } 304 | layer { 305 | bottom: "conv4_3" 306 | top: "conv4_3" 307 | name: "relu4_3" 308 | type: "ReLU" 309 | } 310 | layer { 311 | bottom: "conv4_3" 312 | top: "pool4" 313 | name: "pool4" 314 | type: "Pooling" 315 | pooling_param { 316 | pool: MAX 317 | kernel_size: 2 318 | stride: 2 319 | } 320 | } 321 | layer { 322 | bottom: "pool4" 323 | top: "conv5_1" 324 | name: "conv5_1" 325 | type: "Convolution" 326 | param { 327 | lr_mult: 1 328 | decay_mult: 1 329 | } 330 | param { 331 | lr_mult: 2 332 | decay_mult: 0 333 | } 334 | convolution_param { 335 | num_output: 512 336 | pad: 1 337 | kernel_size: 3 338 | } 339 | } 340 | layer { 341 | bottom: "conv5_1" 342 | top: "conv5_1" 343 | name: "relu5_1" 344 | type: "ReLU" 345 | } 346 | layer { 347 | bottom: "conv5_1" 348 | top: "conv5_2" 349 | name: "conv5_2" 350 | type: "Convolution" 351 | param { 352 | lr_mult: 1 353 | decay_mult: 1 354 | } 355 | param { 356 | lr_mult: 2 357 | decay_mult: 0 358 | } 359 | convolution_param { 360 | num_output: 512 361 | pad: 1 362 | kernel_size: 3 363 | } 364 | } 365 | layer { 366 | bottom: "conv5_2" 367 | top: "conv5_2" 368 | name: "relu5_2" 369 | type: "ReLU" 370 | } 371 | layer { 372 | bottom: "conv5_2" 373 | top: "conv5_3" 374 | name: "conv5_3" 375 | type: "Convolution" 376 | param { 377 | lr_mult: 1 378 | decay_mult: 1 379 | } 380 | param { 381 | lr_mult: 2 382 | decay_mult: 0 383 | } 384 | convolution_param { 385 | num_output: 512 386 | pad: 1 387 | kernel_size: 3 388 | } 389 | } 390 | layer { 391 | bottom: "conv5_3" 392 | top: "conv5_3" 393 | name: "relu5_3" 394 | type: "ReLU" 395 | } 396 | layer { 397 | bottom: "conv5_3" 398 | top: "pool5" 399 | name: "pool5" 400 | type: "Pooling" 401 | pooling_param { 402 | pool: MAX 403 | kernel_size: 2 404 | stride: 2 405 | } 406 | } 407 | layer { 408 | bottom: "pool5" 409 | top: "fc6" 410 | name: "fc6" 411 | param { 412 | lr_mult: 1 413 | decay_mult: 1 414 | } 415 | param { 416 | lr_mult: 2 417 | decay_mult: 0 418 | } 419 | type: "InnerProduct" 420 | inner_product_param { 421 | num_output: 4096 422 | } 423 | } 424 | layer { 425 | bottom: "fc6" 426 | top: "fc6" 427 | name: "relu6" 428 | type: "ReLU" 429 | } 430 | layer { 431 | bottom: "fc6" 432 | top: "fc6" 433 | name: "drop6" 434 | type: "Dropout" 435 | dropout_param { 436 | dropout_ratio: 0.5 437 | } 438 | } 439 | layer { 440 | bottom: "fc6" 441 | top: "fc7" 442 | name: "fc7" 443 | param { 444 | lr_mult: 1 445 | decay_mult: 1 446 | } 447 | param { 448 | lr_mult: 2 449 | decay_mult: 0 450 | } 451 | type: "InnerProduct" 452 | inner_product_param { 453 | num_output: 4096 454 | } 455 | } 456 | layer { 457 | bottom: "fc7" 458 | top: "fc7" 459 | name: "relu7" 460 | type: "ReLU" 461 | } 462 | layer { 463 | bottom: "fc7" 464 | top: "fc7" 465 | name: "drop7" 466 | type: "Dropout" 467 | dropout_param { 468 | dropout_ratio: 0.5 469 | } 470 | } 471 | layer { 472 | bottom: "fc7" 473 | top: "fc8-3" 474 | name: "fc8-3" 475 | param { 476 | lr_mult: 1 477 | decay_mult: 1 478 | } 479 | param { 480 | lr_mult: 2 481 | decay_mult: 0 482 | } 483 | type: "InnerProduct" 484 | inner_product_param { 485 | num_output: 3 486 | } 487 | } 488 | layer { 489 | bottom: "fc8-3" 490 | top: "prob" 491 | name: "prob" 492 | type: "Softmax" 493 | } 494 | layer { 495 | name: "loss" 496 | type: "SoftmaxWithLoss" 497 | bottom: "fc8-3" 498 | bottom: "label" 499 | top: "loss" 500 | loss_weight: 1 501 | include: { phase: TRAIN } 502 | } 503 | layer { 504 | name:"acc" 505 | type: "Accuracy" 506 | bottom: "prob" 507 | bottom: "label" 508 | top: "acc" 509 | } -------------------------------------------------------------------------------- /models/joint_entropy_loss/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/joint_entropy_loss/train_val.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 20000 6 | clip_gradients: 20 7 | momentum: 0.9 8 | weight_decay: 0.0005 9 | # caffe solver snapshotting is disabled 10 | snapshot: 0 11 | snapshot_prefix: "vgg16_att_cls_entropy_loss" 12 | # shut-down caffe display 13 | display: 0 14 | # shut-down caffe validation 15 | test_iter: 0 16 | test_interval: 1000 17 | -------------------------------------------------------------------------------- /models/joint_entropy_loss/test.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | layer { 8 | bottom: "data" 9 | top: "conv1_1" 10 | name: "conv1_1" 11 | type: "Convolution" 12 | convolution_param { 13 | num_output: 64 14 | pad: 1 15 | kernel_size: 3 16 | } 17 | } 18 | layer { 19 | bottom: "conv1_1" 20 | top: "conv1_1" 21 | name: "relu1_1" 22 | type: "ReLU" 23 | } 24 | layer { 25 | bottom: "conv1_1" 26 | top: "conv1_2" 27 | name: "conv1_2" 28 | type: "Convolution" 29 | convolution_param { 30 | num_output: 64 31 | pad: 1 32 | kernel_size: 3 33 | } 34 | } 35 | layer { 36 | bottom: "conv1_2" 37 | top: "conv1_2" 38 | name: "relu1_2" 39 | type: "ReLU" 40 | } 41 | layer { 42 | bottom: "conv1_2" 43 | top: "pool1" 44 | name: "pool1" 45 | type: "Pooling" 46 | pooling_param { 47 | pool: MAX 48 | kernel_size: 2 49 | stride: 2 50 | } 51 | } 52 | layer { 53 | bottom: "pool1" 54 | top: "conv2_1" 55 | name: "conv2_1" 56 | type: "Convolution" 57 | convolution_param { 58 | num_output: 128 59 | pad: 1 60 | kernel_size: 3 61 | } 62 | } 63 | layer { 64 | bottom: "conv2_1" 65 | top: "conv2_1" 66 | name: "relu2_1" 67 | type: "ReLU" 68 | } 69 | layer { 70 | bottom: "conv2_1" 71 | top: "conv2_2" 72 | name: "conv2_2" 73 | type: "Convolution" 74 | convolution_param { 75 | num_output: 128 76 | pad: 1 77 | kernel_size: 3 78 | } 79 | } 80 | layer { 81 | bottom: "conv2_2" 82 | top: "conv2_2" 83 | name: "relu2_2" 84 | type: "ReLU" 85 | } 86 | layer { 87 | bottom: "conv2_2" 88 | top: "pool2" 89 | name: "pool2" 90 | type: "Pooling" 91 | pooling_param { 92 | pool: MAX 93 | kernel_size: 2 94 | stride: 2 95 | } 96 | } 97 | layer { 98 | bottom: "pool2" 99 | top: "conv3_1" 100 | name: "conv3_1" 101 | type: "Convolution" 102 | convolution_param { 103 | num_output: 256 104 | pad: 1 105 | kernel_size: 3 106 | } 107 | } 108 | layer { 109 | bottom: "conv3_1" 110 | top: "conv3_1" 111 | name: "relu3_1" 112 | type: "ReLU" 113 | } 114 | layer { 115 | bottom: "conv3_1" 116 | top: "conv3_2" 117 | name: "conv3_2" 118 | type: "Convolution" 119 | convolution_param { 120 | num_output: 256 121 | pad: 1 122 | kernel_size: 3 123 | } 124 | } 125 | layer { 126 | bottom: "conv3_2" 127 | top: "conv3_2" 128 | name: "relu3_2" 129 | type: "ReLU" 130 | } 131 | layer { 132 | bottom: "conv3_2" 133 | top: "conv3_3" 134 | name: "conv3_3" 135 | type: "Convolution" 136 | convolution_param { 137 | num_output: 256 138 | pad: 1 139 | kernel_size: 3 140 | } 141 | } 142 | layer { 143 | bottom: "conv3_3" 144 | top: "conv3_3" 145 | name: "relu3_3" 146 | type: "ReLU" 147 | } 148 | layer { 149 | bottom: "conv3_3" 150 | top: "pool3" 151 | name: "pool3" 152 | type: "Pooling" 153 | pooling_param { 154 | pool: MAX 155 | kernel_size: 2 156 | stride: 2 157 | } 158 | } 159 | layer { 160 | bottom: "pool3" 161 | top: "conv4_1" 162 | name: "conv4_1" 163 | type: "Convolution" 164 | convolution_param { 165 | num_output: 512 166 | pad: 1 167 | kernel_size: 3 168 | } 169 | } 170 | layer { 171 | bottom: "conv4_1" 172 | top: "conv4_1" 173 | name: "relu4_1" 174 | type: "ReLU" 175 | } 176 | layer { 177 | bottom: "conv4_1" 178 | top: "conv4_2" 179 | name: "conv4_2" 180 | type: "Convolution" 181 | convolution_param { 182 | num_output: 512 183 | pad: 1 184 | kernel_size: 3 185 | } 186 | } 187 | layer { 188 | bottom: "conv4_2" 189 | top: "conv4_2" 190 | name: "relu4_2" 191 | type: "ReLU" 192 | } 193 | layer { 194 | bottom: "conv4_2" 195 | top: "conv4_3" 196 | name: "conv4_3" 197 | type: "Convolution" 198 | convolution_param { 199 | num_output: 512 200 | pad: 1 201 | kernel_size: 3 202 | } 203 | } 204 | layer { 205 | bottom: "conv4_3" 206 | top: "conv4_3" 207 | name: "relu4_3" 208 | type: "ReLU" 209 | } 210 | layer { 211 | bottom: "conv4_3" 212 | top: "pool4" 213 | name: "pool4" 214 | type: "Pooling" 215 | pooling_param { 216 | pool: MAX 217 | kernel_size: 2 218 | stride: 2 219 | } 220 | } 221 | layer { 222 | bottom: "pool4" 223 | top: "conv5_1" 224 | name: "conv5_1" 225 | type: "Convolution" 226 | convolution_param { 227 | num_output: 512 228 | pad: 1 229 | kernel_size: 3 230 | } 231 | } 232 | layer { 233 | bottom: "conv5_1" 234 | top: "conv5_1" 235 | name: "relu5_1" 236 | type: "ReLU" 237 | } 238 | layer { 239 | bottom: "conv5_1" 240 | top: "conv5_2" 241 | name: "conv5_2" 242 | type: "Convolution" 243 | convolution_param { 244 | num_output: 512 245 | pad: 1 246 | kernel_size: 3 247 | } 248 | } 249 | layer { 250 | bottom: "conv5_2" 251 | top: "conv5_2" 252 | name: "relu5_2" 253 | type: "ReLU" 254 | } 255 | layer { 256 | bottom: "conv5_2" 257 | top: "conv5_3" 258 | name: "conv5_3" 259 | type: "Convolution" 260 | convolution_param { 261 | num_output: 512 262 | pad: 1 263 | kernel_size: 3 264 | } 265 | } 266 | layer { 267 | bottom: "conv5_3" 268 | top: "conv5_3" 269 | name: "relu5_3" 270 | type: "ReLU" 271 | } 272 | layer { 273 | bottom: "conv5_3" 274 | top: "pool5" 275 | name: "pool5" 276 | type: "Pooling" 277 | pooling_param { 278 | pool: MAX 279 | kernel_size: 2 280 | stride: 2 281 | } 282 | } 283 | layer { 284 | bottom: "pool5" 285 | top: "fc6" 286 | name: "fc6" 287 | type: "InnerProduct" 288 | inner_product_param { 289 | num_output: 4096 290 | } 291 | } 292 | layer { 293 | bottom: "fc6" 294 | top: "fc6" 295 | name: "relu6" 296 | type: "ReLU" 297 | } 298 | layer { 299 | bottom: "fc6" 300 | top: "fc6" 301 | name: "drop6" 302 | type: "Dropout" 303 | dropout_param { 304 | dropout_ratio: 0.5 305 | } 306 | } 307 | layer { 308 | bottom: "fc6" 309 | top: "fc7" 310 | name: "fc7" 311 | type: "InnerProduct" 312 | inner_product_param { 313 | num_output: 4096 314 | } 315 | } 316 | layer { 317 | bottom: "fc7" 318 | top: "fc7" 319 | name: "relu7" 320 | type: "ReLU" 321 | } 322 | layer { 323 | bottom: "fc7" 324 | top: "fc7" 325 | name: "drop7" 326 | type: "Dropout" 327 | dropout_param { 328 | dropout_ratio: 0.5 329 | } 330 | } 331 | layer { 332 | bottom: "fc7" 333 | top: "score" 334 | name: "score" 335 | type: "InnerProduct" 336 | inner_product_param { 337 | num_output: 40 338 | } 339 | } 340 | layer { 341 | bottom: "score" 342 | top: "prob" 343 | name: "prob" 344 | type: "Sigmoid" 345 | } 346 | -------------------------------------------------------------------------------- /models/joint_square_loss/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/joint_square_loss/train_val.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 20000 6 | clip_gradients: 20 7 | momentum: 0.9 8 | weight_decay: 0.0005 9 | # caffe solver snapshotting is disabled 10 | snapshot: 0 11 | snapshot_prefix: "vgg16_att_cls_square_loss" 12 | # shut-down caffe display 13 | display: 0 14 | # shut-down caffe validation 15 | test_iter: 0 16 | test_interval: 1000 17 | -------------------------------------------------------------------------------- /models/joint_square_loss/test.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | layer { 8 | bottom: "data" 9 | top: "conv1_1" 10 | name: "conv1_1" 11 | type: "Convolution" 12 | convolution_param { 13 | num_output: 64 14 | pad: 1 15 | kernel_size: 3 16 | } 17 | } 18 | layer { 19 | bottom: "conv1_1" 20 | top: "conv1_1" 21 | name: "relu1_1" 22 | type: "ReLU" 23 | } 24 | layer { 25 | bottom: "conv1_1" 26 | top: "conv1_2" 27 | name: "conv1_2" 28 | type: "Convolution" 29 | convolution_param { 30 | num_output: 64 31 | pad: 1 32 | kernel_size: 3 33 | } 34 | } 35 | layer { 36 | bottom: "conv1_2" 37 | top: "conv1_2" 38 | name: "relu1_2" 39 | type: "ReLU" 40 | } 41 | layer { 42 | bottom: "conv1_2" 43 | top: "pool1" 44 | name: "pool1" 45 | type: "Pooling" 46 | pooling_param { 47 | pool: MAX 48 | kernel_size: 2 49 | stride: 2 50 | } 51 | } 52 | layer { 53 | bottom: "pool1" 54 | top: "conv2_1" 55 | name: "conv2_1" 56 | type: "Convolution" 57 | convolution_param { 58 | num_output: 128 59 | pad: 1 60 | kernel_size: 3 61 | } 62 | } 63 | layer { 64 | bottom: "conv2_1" 65 | top: "conv2_1" 66 | name: "relu2_1" 67 | type: "ReLU" 68 | } 69 | layer { 70 | bottom: "conv2_1" 71 | top: "conv2_2" 72 | name: "conv2_2" 73 | type: "Convolution" 74 | convolution_param { 75 | num_output: 128 76 | pad: 1 77 | kernel_size: 3 78 | } 79 | } 80 | layer { 81 | bottom: "conv2_2" 82 | top: "conv2_2" 83 | name: "relu2_2" 84 | type: "ReLU" 85 | } 86 | layer { 87 | bottom: "conv2_2" 88 | top: "pool2" 89 | name: "pool2" 90 | type: "Pooling" 91 | pooling_param { 92 | pool: MAX 93 | kernel_size: 2 94 | stride: 2 95 | } 96 | } 97 | layer { 98 | bottom: "pool2" 99 | top: "conv3_1" 100 | name: "conv3_1" 101 | type: "Convolution" 102 | convolution_param { 103 | num_output: 256 104 | pad: 1 105 | kernel_size: 3 106 | } 107 | } 108 | layer { 109 | bottom: "conv3_1" 110 | top: "conv3_1" 111 | name: "relu3_1" 112 | type: "ReLU" 113 | } 114 | layer { 115 | bottom: "conv3_1" 116 | top: "conv3_2" 117 | name: "conv3_2" 118 | type: "Convolution" 119 | convolution_param { 120 | num_output: 256 121 | pad: 1 122 | kernel_size: 3 123 | } 124 | } 125 | layer { 126 | bottom: "conv3_2" 127 | top: "conv3_2" 128 | name: "relu3_2" 129 | type: "ReLU" 130 | } 131 | layer { 132 | bottom: "conv3_2" 133 | top: "conv3_3" 134 | name: "conv3_3" 135 | type: "Convolution" 136 | convolution_param { 137 | num_output: 256 138 | pad: 1 139 | kernel_size: 3 140 | } 141 | } 142 | layer { 143 | bottom: "conv3_3" 144 | top: "conv3_3" 145 | name: "relu3_3" 146 | type: "ReLU" 147 | } 148 | layer { 149 | bottom: "conv3_3" 150 | top: "pool3" 151 | name: "pool3" 152 | type: "Pooling" 153 | pooling_param { 154 | pool: MAX 155 | kernel_size: 2 156 | stride: 2 157 | } 158 | } 159 | layer { 160 | bottom: "pool3" 161 | top: "conv4_1" 162 | name: "conv4_1" 163 | type: "Convolution" 164 | convolution_param { 165 | num_output: 512 166 | pad: 1 167 | kernel_size: 3 168 | } 169 | } 170 | layer { 171 | bottom: "conv4_1" 172 | top: "conv4_1" 173 | name: "relu4_1" 174 | type: "ReLU" 175 | } 176 | layer { 177 | bottom: "conv4_1" 178 | top: "conv4_2" 179 | name: "conv4_2" 180 | type: "Convolution" 181 | convolution_param { 182 | num_output: 512 183 | pad: 1 184 | kernel_size: 3 185 | } 186 | } 187 | layer { 188 | bottom: "conv4_2" 189 | top: "conv4_2" 190 | name: "relu4_2" 191 | type: "ReLU" 192 | } 193 | layer { 194 | bottom: "conv4_2" 195 | top: "conv4_3" 196 | name: "conv4_3" 197 | type: "Convolution" 198 | convolution_param { 199 | num_output: 512 200 | pad: 1 201 | kernel_size: 3 202 | } 203 | } 204 | layer { 205 | bottom: "conv4_3" 206 | top: "conv4_3" 207 | name: "relu4_3" 208 | type: "ReLU" 209 | } 210 | layer { 211 | bottom: "conv4_3" 212 | top: "pool4" 213 | name: "pool4" 214 | type: "Pooling" 215 | pooling_param { 216 | pool: MAX 217 | kernel_size: 2 218 | stride: 2 219 | } 220 | } 221 | layer { 222 | bottom: "pool4" 223 | top: "conv5_1" 224 | name: "conv5_1" 225 | type: "Convolution" 226 | convolution_param { 227 | num_output: 512 228 | pad: 1 229 | kernel_size: 3 230 | } 231 | } 232 | layer { 233 | bottom: "conv5_1" 234 | top: "conv5_1" 235 | name: "relu5_1" 236 | type: "ReLU" 237 | } 238 | layer { 239 | bottom: "conv5_1" 240 | top: "conv5_2" 241 | name: "conv5_2" 242 | type: "Convolution" 243 | convolution_param { 244 | num_output: 512 245 | pad: 1 246 | kernel_size: 3 247 | } 248 | } 249 | layer { 250 | bottom: "conv5_2" 251 | top: "conv5_2" 252 | name: "relu5_2" 253 | type: "ReLU" 254 | } 255 | layer { 256 | bottom: "conv5_2" 257 | top: "conv5_3" 258 | name: "conv5_3" 259 | type: "Convolution" 260 | convolution_param { 261 | num_output: 512 262 | pad: 1 263 | kernel_size: 3 264 | } 265 | } 266 | layer { 267 | bottom: "conv5_3" 268 | top: "conv5_3" 269 | name: "relu5_3" 270 | type: "ReLU" 271 | } 272 | layer { 273 | bottom: "conv5_3" 274 | top: "pool5" 275 | name: "pool5" 276 | type: "Pooling" 277 | pooling_param { 278 | pool: MAX 279 | kernel_size: 2 280 | stride: 2 281 | } 282 | } 283 | layer { 284 | bottom: "pool5" 285 | top: "fc6" 286 | name: "fc6" 287 | type: "InnerProduct" 288 | inner_product_param { 289 | num_output: 4096 290 | } 291 | } 292 | layer { 293 | bottom: "fc6" 294 | top: "fc6" 295 | name: "relu6" 296 | type: "ReLU" 297 | } 298 | layer { 299 | bottom: "fc6" 300 | top: "fc6" 301 | name: "drop6" 302 | type: "Dropout" 303 | dropout_param { 304 | dropout_ratio: 0.5 305 | } 306 | } 307 | layer { 308 | bottom: "fc6" 309 | top: "fc7" 310 | name: "fc7" 311 | type: "InnerProduct" 312 | inner_product_param { 313 | num_output: 4096 314 | } 315 | } 316 | layer { 317 | bottom: "fc7" 318 | top: "fc7" 319 | name: "relu7" 320 | type: "ReLU" 321 | } 322 | layer { 323 | bottom: "fc7" 324 | top: "fc7" 325 | name: "drop7" 326 | type: "Dropout" 327 | dropout_param { 328 | dropout_ratio: 0.5 329 | } 330 | } 331 | layer { 332 | bottom: "fc7" 333 | top: "score" 334 | name: "score" 335 | type: "InnerProduct" 336 | inner_product_param { 337 | num_output: 40 338 | } 339 | } 340 | layer { 341 | bottom: "score" 342 | top: "prob" 343 | name: "porb" 344 | type: "Sigmoid" 345 | } 346 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # Initialize all modules 2 | 3 | # ------------------------------------------------------ 4 | # Facial Attribute Classifer 5 | # Written by Yongxi Lu 6 | # ------------------------------------------------------ 7 | 8 | import os.path as osp 9 | import sys 10 | 11 | def add_path(path): 12 | if path not in sys.path: 13 | sys.path.insert(0, path) 14 | 15 | this_dir = osp.dirname(__file__) 16 | 17 | # Add caffe to PYTHONPATH 18 | caffe_path = osp.join(this_dir, '..', 'caffe', 'python') 19 | add_path(caffe_path) 20 | 21 | # Add lib to PYTHONPATH 22 | lib_path = osp.join(this_dir, '..', 'lib') 23 | add_path(lib_path) 24 | -------------------------------------------------------------------------------- /tools/convert_bn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # --------------------- 4 | # Written by Yongxi Lu 5 | # --------------------- 6 | 7 | """ For a caffemodel with BN layers, modify the filter weights and bias so that 8 | all BN layers can be removed without affecting the performance at deployment time. 9 | """ 10 | 11 | import _init_paths 12 | from caffe.proto import caffe_pb2 13 | import numpy as np 14 | import argparse 15 | import cPickle 16 | from scipy.io import savemat 17 | import os.path as osp 18 | import sys 19 | 20 | from utils.config import cfg 21 | 22 | def parse_args(): 23 | """ 24 | Parse input arguments 25 | """ 26 | parser = argparse.ArgumentParser(description="Convert the caffemodel if there is any BN layers.") 27 | parser.add_argument('--inmodel', dest='inmodel', 28 | help='the input caffemodel', 29 | default=None, type=str) 30 | parser.add_argument('--outmodel', dest='outmodel', 31 | help='the output caffemodel', 32 | default=None, type=str) 33 | 34 | if len(sys.argv) == 1: 35 | parser.print_help() 36 | sys.exit(1) 37 | 38 | args = parser.parse_args() 39 | return args 40 | 41 | if __name__ == '__main__': 42 | 43 | args = parse_args() 44 | 45 | print('Called with args:') 46 | print(args) 47 | # load files 48 | print 'Loading caffemodel: {}'.format(args.inmodel) 49 | with open(args.inmodel, 'rb') as f: 50 | binary_content = f.read() 51 | 52 | protobuf = caffe_pb2.NetParameter() 53 | protobuf.ParseFromString(binary_content) 54 | layers = protobuf.layer 55 | 56 | _eps = 1e-5 57 | for layer in layers: 58 | if layer.type == 'BatchNorm': 59 | # the layer to be modified. 60 | layer_c = [l for l in layers if l.name == layer.name[3:]][0] 61 | # the parameters fo the computational layer 62 | w = np.reshape(np.array(layer_c.blobs[0].data), layer_c.blobs[0].shape.dim) 63 | b = np.reshape(np.array(layer_c.blobs[1].data), layer_c.blobs[1].shape.dim) 64 | # load the BN parameters 65 | factor = 0 if np.array(layer.blobs[2].data) == 0 else 1./np.array(layer.blobs[2].data) 66 | mean = np.array(layer.blobs[0].data) * factor 67 | var = np.array(layer.blobs[1].data) * factor 68 | 69 | # display information 70 | print 'Modifying layer {} based on information from {}'.format(layer_c.name, layer.name) 71 | # update weights 72 | if len(w.shape) == 4: 73 | w /= (_eps + np.sqrt(var)[:, np.newaxis, np.newaxis, np.newaxis]) 74 | elif len(w.shape) == 2: 75 | w /= (_eps + np.sqrt(var)[:, np.newaxis]) 76 | # update bias 77 | b -= mean 78 | b /= (_eps + np.sqrt(var)) 79 | # save the changes back to the model 80 | del layer_c.blobs[0].data[:] 81 | del layer_c.blobs[1].data[:] 82 | layer_c.blobs[0].data.extend(w.flatten().tolist()) 83 | layer_c.blobs[1].data.extend(b.flatten().tolist()) 84 | 85 | # save the model to out model 86 | new_binary_content = protobuf.SerializeToString() 87 | 88 | print 'Saving caffemodel: {}'.format(args.outmodel) 89 | with open(args.outmodel, 'wb') as f: 90 | f.write(new_binary_content) -------------------------------------------------------------------------------- /tools/convert_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # --------------------- 4 | # Written by Yongxi Lu 5 | # --------------------- 6 | 7 | """ Convert caffemodel into a format that is easier to interpret offline """ 8 | 9 | import _init_paths 10 | from caffe.proto import caffe_pb2 11 | import numpy as np 12 | import argparse 13 | import cPickle 14 | from scipy.io import savemat 15 | import os.path as osp 16 | import sys 17 | 18 | def parse_args(): 19 | """ 20 | Parse input arguments 21 | """ 22 | parser = argparse.ArgumentParser(description="Load caffemodel and save it as a dictionary.") 23 | parser.add_argument('--caffemodel', dest='caffemodel', 24 | help='th caffemodel', 25 | default=None, type=str) 26 | parser.add_argument('--types', dest='layer_types', 27 | help="types of layers to save", 28 | default=None, type=str, nargs='*') 29 | parser.add_argument('--output', dest='output', 30 | help="name of the output file", 31 | default=None, type=str) 32 | 33 | if len(sys.argv) == 1: 34 | parser.print_help() 35 | sys.exit(1) 36 | 37 | args = parser.parse_args() 38 | return args 39 | 40 | if __name__ == '__main__': 41 | 42 | args = parse_args() 43 | 44 | print('Called with args:') 45 | print(args) 46 | # load files 47 | print 'Loading caffemodel: {}'.format(args.caffemodel) 48 | with open(args.caffemodel, 'rb') as f: 49 | binary_content = f.read() 50 | 51 | protobuf = caffe_pb2.NetParameter() 52 | protobuf.ParseFromString(binary_content) 53 | layers = protobuf.layer 54 | 55 | params = {} 56 | for layer in layers: 57 | if layer.type in args.layer_types: 58 | print (layer.name, layer.type) 59 | params[layer.name+'_w'] = np.reshape(np.array(layer.blobs[0].data), layer.blobs[0].shape.dim) 60 | params[layer.name+'_b'] = np.reshape(np.array(layer.blobs[1].data), layer.blobs[1].shape.dim) 61 | print params[layer.name+'_w'].shape, params[layer.name+'_b'].shape 62 | 63 | # save the layers into a file 64 | # if the file name is .pkl, save to pickle file. 65 | # if the file name is .mat, save to mat file. 66 | # otherwise, report file type not recognized. 67 | file_type = osp.splitext(args.output)[1] 68 | if file_type == '.pkl': 69 | with open(args.output, 'wb') as f: 70 | cPickle.dump(params, f, cPickle.HIGHEST_PROTOCOL) 71 | print 'Wrote converted caffemodel to {}'.format(args.output) 72 | elif file_type == '.mat': 73 | # for key in params.keys(): 74 | # params[key.replace('-','_')] = params.pop(key) 75 | savemat(args.output, params) 76 | print 'Wrote converted caffemodel to {}'.format(args.output) 77 | else: 78 | print 'The output file type {} is not recognized!'.format(file_type) -------------------------------------------------------------------------------- /tools/ibm_simple_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Written by Yongxi Lu 4 | 5 | """ A lightweight testing procedure to evaluate the performance of 6 | individual attributes, exclusively for the IBMattributes datasest. 7 | """ 8 | 9 | import _init_paths 10 | from utils.config import cfg, cfg_from_file, cfg_set_path, get_output_dir 11 | from utils.blob import im_list_to_blob 12 | from utils.error import compute_mle 13 | import argparse 14 | import pprint 15 | import caffe 16 | import sys, os 17 | import os.path as osp 18 | import numpy as np 19 | import cPickle 20 | 21 | def parse_args(): 22 | """ 23 | Parse input arguments 24 | """ 25 | DATA_DIR = osp.join(osp.dirname(__file__), '..', 'data') 26 | 27 | parser = argparse.ArgumentParser(description="Test a caffemodel on IBM attributes dataset.") 28 | parser.add_argument('--gpu', dest='gpu_id', 29 | help='GPU device id to use [None]', 30 | default=None, type=int) 31 | parser.add_argument('--model', dest='model', 32 | help='test prototxt', 33 | default=None, type=str) 34 | parser.add_argument('--weights', dest='weights', 35 | help='trained caffemodel', 36 | default=None, type=str) 37 | parser.add_argument('--cfg', dest='cfg_file', 38 | help='optional config file', 39 | default=None, type=str) 40 | parser.add_argument('--mean_file', dest='mean_file', 41 | help='the path to the mean file to be used', 42 | default=None, type=str) 43 | parser.add_argument('--base_folder', dest='base_folder', 44 | help='the datapath to the root folder for the images', 45 | default=osp.join(DATA_DIR,'imdb_IBMAttributes', 'ValidationData'), type=str) 46 | parser.add_argument('--folders', dest='folders', 47 | help='the folders containing positive examples', 48 | default=None, type=str, nargs='*') 49 | 50 | if len(sys.argv) == 1: 51 | parser.print_help() 52 | sys.exit(1) 53 | 54 | args = parser.parse_args() 55 | return args 56 | 57 | if __name__ == '__main__': 58 | args = parse_args() 59 | 60 | print('Called with args:') 61 | print(args) 62 | 63 | if args.cfg_file is not None: 64 | cfg_from_file(args.cfg_file) 65 | 66 | # use mean file if provided 67 | if args.mean_file is not None: 68 | with open(args.mean_file, 'rb') as fid: 69 | cfg.PIXEL_MEANS = cPickle.load(fid) 70 | print 'mean values loaded from {}'.format(args.mean_file) 71 | 72 | print('Using config:') 73 | pprint.pprint(cfg) 74 | 75 | caffe.set_mode_gpu() 76 | caffe.set_device(args.gpu_id) 77 | net = caffe.Net(args.model, args.weights, caffe.TEST) 78 | net.name = os.path.splitext(os.path.basename(args.weights))[0] 79 | 80 | image_list = [] 81 | labels = np.zeros((0, ), dtype=np.int64) 82 | # number of classes 83 | num_classes = len(args.folders) 84 | for c in xrange(num_classes): 85 | src_folder = osp.join(args.base_folder, args.folders[c]) 86 | new_images = [osp.join(src_folder, fn) for fn in os.listdir(src_folder)] 87 | image_list.extend(new_images) 88 | new_labels = c*np.ones((len(new_images),), dtype=np.int64) 89 | labels = np.hstack((labels, new_labels)) 90 | 91 | # number of images 92 | num_images = len(image_list) 93 | # init error vector 94 | err = np.zeros((num_images, num_classes)) # in {0,1} format 95 | for i in xrange(num_images): 96 | # prepare blobs 97 | label_name = "prob" 98 | fn = image_list[i] 99 | data = im_list_to_blob([fn], cfg.PIXEL_MEANS, cfg.SCALE) 100 | net.blobs['data'].reshape(*(data.shape)) 101 | # forward the network 102 | blobs_out = net.forward(data=data.astype(np.float32, copy=False)) 103 | # get results 104 | scores = blobs_out[label_name] 105 | # evaluate the scores 106 | score_max = np.argmax(scores, axis=1) 107 | pred = np.zeros((1, num_classes)) 108 | pred[:, score_max] = 1.0 109 | target = np.zeros((1, num_classes)) 110 | target[:, labels[i]] = 1.0 111 | err[i,:] = compute_mle(pred, target) 112 | 113 | # print infos 114 | print 'Image {}/{}.'.format(i, num_images) 115 | 116 | # print out basic dataset information 117 | print '---------------------------------------------------------------' 118 | print '!!! Summary of results.' 119 | # get error for each class 120 | class_names = args.folders 121 | mean_err = np.mean(err, axis=0) 122 | for i in xrange(len(class_names)): 123 | print '!!! Error rate for class {} is: {}'.\ 124 | format(class_names[i], mean_err[i]) 125 | 126 | print '!!! The average error rate is {}.'.format(mean_err.mean()) 127 | print '---------------------------------------------------------------' 128 | -------------------------------------------------------------------------------- /tools/load_person.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #------------------------------- 4 | # Written by Yongxi Lu 5 | #------------------------------- 6 | 7 | """Load soft labels for the PersonAttributes dataset""" 8 | 9 | import _init_paths 10 | from utils.config import cfg, cfg_from_file, cfg_set_path, get_output_dir 11 | from datasets.factory import get_imdb 12 | import argparse 13 | import pprint 14 | import caffe 15 | import sys, os 16 | import json 17 | 18 | from evaluation.test import eval_and_save 19 | 20 | def parse_args(): 21 | """ 22 | Parse input arguments 23 | """ 24 | parser = argparse.ArgumentParser(description="Collect evaluation results as soft labels.") 25 | parser.add_argument('--gpu', dest='gpu_id', 26 | help='GPU device id to use [None]', 27 | default=None, type=int) 28 | parser.add_argument('--cfg', dest='cfg_file', 29 | help='optional config file', 30 | default=None, type=str) 31 | parser.add_argument('--model_face', dest='model_face', 32 | help='prototxt for face model', 33 | default=None, type=str) 34 | parser.add_argument('--weights_face', dest='weights_face', 35 | help='caffemodel for face model', 36 | default=None, type=str) 37 | parser.add_argument('--model_clothes', dest='model_clothes', 38 | help='prototxt for clothes model', 39 | default=None, type=str) 40 | parser.add_argument('--weights_clothes', dest='weights_clothes', 41 | help='caffemodel for clothes model', 42 | default=None, type=str) 43 | parser.add_argument('--imdb_face', dest='imdb_face', 44 | help='dataset to evaluate the face model', 45 | default='person_clothes_train', type=str) 46 | parser.add_argument('--imdb_clothes', dest='imdb_clothes', 47 | help='dataset to evaluate the clothes model', 48 | default='person_face_train', type=str) 49 | parser.add_argument('--mean_file', dest='mean_file', 50 | help='the path to the mean file to be used', 51 | default=None, type=str) 52 | 53 | if len(sys.argv) == 1: 54 | parser.print_help() 55 | sys.exit(1) 56 | 57 | args = parser.parse_args() 58 | return args 59 | 60 | if __name__ == '__main__': 61 | args = parse_args() 62 | 63 | print('Called with args:') 64 | print(args) 65 | 66 | if args.cfg_file is not None: 67 | cfg_from_file(args.cfg_file) 68 | 69 | # use mean file if provided 70 | if args.mean_file is not None: 71 | with open(args.mean_file, 'rb') as fid: 72 | cfg.PIXEL_MEANS = cPickle.load(fid) 73 | print 'mean values loaded from {}'.format(args.mean_file) 74 | 75 | print('Using config:') 76 | pprint.pprint(cfg) 77 | 78 | caffe.set_mode_gpu() 79 | caffe.set_device(args.gpu_id) 80 | 81 | # save soft labels for face model if the file does not already exists 82 | imdb = get_imdb(args.imdb_face) 83 | fn = os.path.join(imdb.data_path, imdb.name+'.pkl') 84 | if not os.path.exists(fn): 85 | net = caffe.Net(args.model_face, args.weights_face, caffe.TEST) 86 | net.name = os.path.splitext(os.path.basename(args.weights_face))[0] 87 | # parse class_id 88 | classid_name = os.path.splitext(args.weights_face)[0] + '.clsid' 89 | with open(classid_name, 'rb') as f: 90 | class_id = json.loads(f.read()) 91 | 92 | eval_and_save(net, imdb, class_id) 93 | else: 94 | print '{} already exists!'.format(fn) 95 | 96 | # save soft labels for face model 97 | imdb = get_imdb(args.imdb_clothes) 98 | fn = os.path.join(imdb.data_path, imdb.name+'.pkl') 99 | if not os.path.exists(fn): 100 | net = caffe.Net(args.model_clothes, args.weights_clothes, caffe.TEST) 101 | net.name = os.path.splitext(os.path.basename(args.weights_clothes))[0] 102 | # parse class_id 103 | classid_name = os.path.splitext(args.weights_clothes)[0] + '.clsid' 104 | with open(classid_name, 'rb') as f: 105 | class_id = json.loads(f.read()) 106 | eval_and_save(net, imdb, class_id) 107 | else: 108 | print '{} already exists!'.format(fn) 109 | -------------------------------------------------------------------------------- /tools/parse_log.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #------------------------------- 4 | # Written by Yongxi Lu 5 | #------------------------------- 6 | 7 | 8 | """ 9 | Parse a given log 10 | """ 11 | 12 | import _init_paths 13 | from utils.log import parse_mle_and_plot 14 | import argparse 15 | import pprint 16 | import numpy as np 17 | import sys, os 18 | 19 | def parse_args(): 20 | """ 21 | Parse input arguments 22 | """ 23 | parser = argparse.ArgumentParser(description="Train a model for Facial Attribute Classification") 24 | parser.add_argument('--log', dest='log_file', 25 | help="name of the log file", 26 | default=None, type=str, nargs='*') 27 | parser.add_argument('--output', dest='output', 28 | help="name of the output file", 29 | default='loss.png', type=str) 30 | parser.add_argument('--run', dest='run', 31 | help="run length in smoothing the training set", 32 | default=50, type=int) 33 | parser.add_argument('--max_y', dest='max_y', 34 | help="maximum value of y axis", 35 | default=None, type=float) 36 | parser.add_argument('--metric', dest='metric', 37 | help="the type metric used in evaluation", 38 | default='error', type=str) 39 | 40 | if len(sys.argv) == 1: 41 | parser.print_help() 42 | sys.exit(1) 43 | 44 | args = parser.parse_args() 45 | return args 46 | 47 | if __name__ == '__main__': 48 | args = parse_args() 49 | 50 | print('Called with args:') 51 | print(args) 52 | 53 | if args.log_file is not None: 54 | parse_mle_and_plot(args.log_file, ['training','validation'], metric=args.metric, 55 | output=args.output, run_length=args.run, max_y=args.max_y) -------------------------------------------------------------------------------- /tools/parse_log_and_save.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #------------------------------- 4 | # Written by Yongxi Lu 5 | #------------------------------- 6 | 7 | 8 | """Parse a given log, and save to a .mat file. """ 9 | 10 | import _init_paths 11 | from utils.log import parse_mle 12 | import argparse 13 | import pprint 14 | import numpy as np 15 | import sys, os 16 | import os.path as osp 17 | 18 | from scipy.io import savemat 19 | 20 | def parse_args(): 21 | """Parse input arguments """ 22 | parser = argparse.ArgumentParser(description="Parse the log file and save it as a .mat file") 23 | parser.add_argument('--log', dest='log_file', 24 | help="name of the log file", 25 | default=None, type=str, nargs='*') 26 | parser.add_argument('--outpath', dest='outpath', 27 | help="name of the output path", 28 | default='.', type=str) 29 | parser.add_argument('--split', dest='split', 30 | help="the split to parse", 31 | default=None, type=str, nargs='*') 32 | parser.add_argument('--metric', dest='metric', 33 | help="the type metric used in evaluation", 34 | default='error', type=str) 35 | 36 | if len(sys.argv) == 1: 37 | parser.print_help() 38 | sys.exit(1) 39 | 40 | args = parser.parse_args() 41 | return args 42 | 43 | if __name__ == '__main__': 44 | args = parse_args() 45 | 46 | print('Called with args:') 47 | print(args) 48 | 49 | for log_file in args.log_file: 50 | params = {} 51 | filename = osp.splitext(log_file)[0] + '.mat' 52 | for split in args.split: 53 | iters, err = parse_mle(log_file, split, args.metric) 54 | params[split+'iters'] = iters 55 | params[split+'err'] = err 56 | savemat(filename, params) -------------------------------------------------------------------------------- /tools/pixel_means.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ---------------------- 4 | # Written by Yongxi Lu 5 | # ---------------------- 6 | 7 | """Compute the pixel means of a given dataset, and save the results to a path""" 8 | 9 | import _init_paths 10 | from datasets.factory import get_imdb 11 | import numpy as np 12 | import argparse 13 | import sys 14 | import cv2 15 | import cPickle 16 | import os.path as osp 17 | 18 | def parse_args(): 19 | """ 20 | Parse input arguments 21 | """ 22 | parser = argparse.ArgumentParser(description='Compute pixel means of imdb') 23 | parser.add_argument('--imdb', dest='imdb_name', 24 | help='dataset to compute pixel means', 25 | default='celeba_trainval', type=str) 26 | parser.add_argument('--path', dest='path', 27 | help='the path to save the mean file', 28 | default=osp.join(osp.dirname(__file__),'..','data','cache'), 29 | type=str) 30 | 31 | args = parser.parse_args() 32 | return args 33 | 34 | if __name__ == '__main__': 35 | args = parse_args() 36 | 37 | print('Called with args:') 38 | print(args) 39 | 40 | imdb = get_imdb(args.imdb_name) 41 | num_images = imdb.num_images 42 | 43 | # means of pixel values, in BGR order 44 | means = np.zeros((3,)) 45 | num_pixels = 0.0 46 | 47 | for i in xrange(num_images): 48 | im = cv2.imread(imdb.image_path_at(i)) 49 | im_means = im.mean(axis=(0,1)) 50 | im_num_pixels = float(im.shape[0] * im.shape[1]) 51 | means = means * num_pixels / (num_pixels + im_num_pixels) \ 52 | + im_means * im_num_pixels / (num_pixels + im_num_pixels) 53 | num_pixels = num_pixels + im_num_pixels 54 | 55 | if i % 1000 == 0 or i == num_images-1: 56 | print 'Processing {}/{}, the mean is ({})'.format(i, num_images,means) 57 | 58 | # convert means to (1,1,3) array 59 | means = means[np.newaxis, np.newaxis, :] 60 | 61 | # save to cache 62 | mean_file = osp.join(args.path, args.imdb_name+'_mean_file.pkl') 63 | with open(mean_file, 'wb') as fid: 64 | cPickle.dump(means, fid, cPickle.HIGHEST_PROTOCOL) 65 | print 'wrote mean values to {}'.format(mean_file) -------------------------------------------------------------------------------- /tools/save_softlabels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #------------------------------- 4 | # Written by Yongxi Lu 5 | #------------------------------- 6 | 7 | """Collect labels from individual attributes to be used as soft labels""" 8 | 9 | import _init_paths 10 | from utils.config import cfg, cfg_from_file, cfg_set_path, get_output_dir 11 | from datasets.factory import get_imdb 12 | import argparse 13 | import pprint 14 | import caffe 15 | import sys, os 16 | import json 17 | from evaluation.test import save_softlabels 18 | 19 | def parse_args(): 20 | """ 21 | Parse input arguments 22 | """ 23 | parser = argparse.ArgumentParser(description="Collect evaluation results as soft labels.") 24 | parser.add_argument('--gpu', dest='gpu_id', 25 | help='GPU device id to use [None]', 26 | default=None, type=int) 27 | parser.add_argument('--cfg', dest='cfg_file', 28 | help='optional config file', 29 | default=None, type=str) 30 | parser.add_argument('--imdb', dest='imdb_name', 31 | help='datasets to test on', 32 | default='celeba_test', type=str) 33 | parser.add_argument('--mean_file', dest='mean_file', 34 | help='the path to the mean file to be used', 35 | default=None, type=str) 36 | parser.add_argument('--test_class', dest='test_class', 37 | help='the index to the soft labels requested', 38 | default=None, type=int) 39 | 40 | if len(sys.argv) == 1: 41 | parser.print_help() 42 | sys.exit(1) 43 | 44 | args = parser.parse_args() 45 | return args 46 | 47 | if __name__ == '__main__': 48 | args = parse_args() 49 | 50 | print('Called with args:') 51 | print(args) 52 | 53 | if args.cfg_file is not None: 54 | cfg_from_file(args.cfg_file) 55 | 56 | # use mean file if provided 57 | if args.mean_file is not None: 58 | with open(args.mean_file, 'rb') as fid: 59 | cfg.PIXEL_MEANS = cPickle.load(fid) 60 | print 'mean values loaded from {}'.format(args.mean_file) 61 | 62 | print('Using config:') 63 | pprint.pprint(cfg) 64 | 65 | caffe.set_mode_gpu() 66 | caffe.set_device(args.gpu_id) 67 | 68 | # get imdb 69 | imdb = get_imdb(args.imdb_name) 70 | # iterate over classes 71 | if args.test_class is None: 72 | class_list = xrange(imdb.num_classes) 73 | else: 74 | class_list = [args.test_class] 75 | for c in class_list: 76 | # get file name 77 | score_file = imdb.score_file_name(c) 78 | # skip if there is no reason for this purpose. 79 | if os.path.exists(score_file): 80 | print 'Skipping saving soft labels for {}: {} already exists...'.\ 81 | format(imdb.classes[c], score_file) 82 | continue 83 | 84 | # find out the caffemodels [caffemodel, score_name, score_idx] 85 | src_name, labeler = imdb.find_labeler(c) 86 | weights = labeler[0] 87 | prototxt = os.path.splitext(labeler[0])[0] + '.prototxt' 88 | # caffemodel is the first entry 89 | net = caffe.Net(prototxt, weights, caffe.TEST) 90 | net.name = os.path.splitext(os.path.basename(weights))[0] 91 | # list of images that need to be evaluated 92 | image_list = imdb.image_path_at_inds(imdb.list_incomplete(c)) 93 | print 'Start saving soft labels for {} from {}, using layer {} index {}'.\ 94 | format(imdb.classes[c], weights, labeler[1], labeler[2]) 95 | save_softlabels(net, image_list, score_file, labeler) 96 | -------------------------------------------------------------------------------- /tools/test_cls.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #------------------------------- 4 | # Written by Yongxi Lu 5 | #------------------------------- 6 | 7 | """Test classificaiton accuracy""" 8 | 9 | # TODO: add top-k recall as another metric. 10 | 11 | import _init_paths 12 | from utils.config import cfg, cfg_from_file, cfg_set_path, get_output_dir 13 | from datasets.factory import get_imdb 14 | import argparse 15 | import pprint 16 | import caffe 17 | import sys, os 18 | import json 19 | from evaluation.test import test_cls_error, test_cls_topk 20 | 21 | import yaml 22 | 23 | def parse_args(): 24 | """ 25 | Parse input arguments 26 | """ 27 | parser = argparse.ArgumentParser(description="Test the performance on a dataset.") 28 | parser.add_argument('--gpu', dest='gpu_id', 29 | help='GPU device id to use [None]', 30 | default=None, type=int) 31 | parser.add_argument('--model', dest='model', 32 | help='test prototxt', 33 | default=None, type=str) 34 | parser.add_argument('--weights', dest='weights', 35 | help='trained caffemodel', 36 | default=None, type=str) 37 | parser.add_argument('--metric', dest='metric', 38 | help='the metric used in evaluatoin', 39 | default='error_rate',type=str) 40 | parser.add_argument('--cfg', dest='cfg_file', 41 | help='optional config file', 42 | default=None, type=str) 43 | parser.add_argument('--imdb', dest='imdb_name', 44 | help='datasets to test on', 45 | default='celeba_test', type=str) 46 | parser.add_argument('--mean_file', dest='mean_file', 47 | help='the path to the mean file to be used', 48 | default=None, type=str) 49 | 50 | if len(sys.argv) == 1: 51 | parser.print_help() 52 | sys.exit(1) 53 | 54 | args = parser.parse_args() 55 | return args 56 | 57 | if __name__ == '__main__': 58 | args = parse_args() 59 | 60 | print('Called with args:') 61 | print(args) 62 | 63 | if args.cfg_file is not None: 64 | cfg_from_file(args.cfg_file) 65 | 66 | # use mean file if provided 67 | if args.mean_file is not None: 68 | with open(args.mean_file, 'rb') as fid: 69 | cfg.PIXEL_MEANS = cPickle.load(fid) 70 | print 'mean values loaded from {}'.format(args.mean_file) 71 | 72 | print('Using config:') 73 | pprint.pprint(cfg) 74 | 75 | caffe.set_mode_gpu() 76 | caffe.set_device(args.gpu_id) 77 | net = caffe.Net(args.model, args.weights, caffe.TEST) 78 | net.name = os.path.splitext(os.path.basename(args.weights))[0] 79 | 80 | # get imdb 81 | imdb = get_imdb(args.imdb_name) 82 | # parse class_id 83 | classid_name = os.path.splitext(args.weights)[0] + '.clsid' 84 | with open(classid_name, 'rb') as f: 85 | class_id = json.loads(f.read()) 86 | 87 | if args.metric == 'error_rate': 88 | test_cls_error(net, imdb, class_id) 89 | elif args.metric == 'top-3': 90 | test_cls_topk(net, imdb, class_id, k=3) 91 | elif args.metric == 'top-5': 92 | test_cls_topk(net, imdb, class_id, k=5) 93 | elif args.metric == 'top-10': 94 | test_cls_topk(net, imdb, class_id, k=10) -------------------------------------------------------------------------------- /tools/test_cluster.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Written by Yongxi Lu 4 | 5 | """ Test clustering of tasks """ 6 | 7 | import _init_paths 8 | from evaluation.cluster import MultiLabel_ECM_cluster 9 | from utils.config import cfg, cfg_from_file, cfg_set_path, get_output_dir 10 | from datasets.factory import get_imdb 11 | import caffe 12 | import argparse 13 | import pprint 14 | import numpy as np 15 | import sys, os 16 | 17 | import json 18 | 19 | def parse_args(): 20 | """ 21 | Parse input arguments 22 | """ 23 | parser = argparse.ArgumentParser(description="Find clusters.") 24 | parser.add_argument('--gpu', dest='gpu_id', 25 | help='GPU device id to use [None]', 26 | default=None, type=int) 27 | parser.add_argument('--model', dest='model', 28 | help='test prototxt', 29 | default=None, type=str) 30 | parser.add_argument('--weights', dest='weights', 31 | help='trained caffemodel', 32 | default=None, type=str) 33 | parser.add_argument('--cfg', dest='cfg_file', 34 | help='optional config file', 35 | default=None, type=str) 36 | parser.add_argument('--imdb', dest='imdb_name', 37 | help='dataset to test on', 38 | default='celeba_val', type=str) 39 | parser.add_argument('--method', dest='method', 40 | help='the method used for clustering', 41 | default='ecm_pos', type=str) 42 | parser.add_argument('--cls_id', dest='cls_id', 43 | help='comma-separated list of classes to test', 44 | default=None, type=str) 45 | parser.add_argument('--n_cluster', dest='n_cluster', 46 | help='number of clusters', 47 | default=2, type=int) 48 | parser.add_argument('--mean_file', dest='mean_file', 49 | help='the path to the mean file to be used', 50 | default=None, type=str) 51 | 52 | if len(sys.argv) == 1: 53 | parser.print_help() 54 | sys.exit(1) 55 | 56 | args = parser.parse_args() 57 | return args 58 | 59 | if __name__ == '__main__': 60 | args = parse_args() 61 | 62 | print('Called with args:') 63 | print(args) 64 | 65 | if args.cfg_file is not None: 66 | cfg_from_file(args.cfg_file) 67 | 68 | # use mean file if provided 69 | if args.mean_file is not None: 70 | with open(args.mean_file, 'rb') as fid: 71 | cfg.PIXEL_MEANS = cPickle.load(fid) 72 | print 'mean values loaded from {}'.format(args.mean_file) 73 | 74 | print('Using config:') 75 | pprint.pprint(cfg) 76 | 77 | # set up caffe 78 | if args.gpu_id is not None: 79 | caffe.set_mode_gpu() 80 | caffe.set_device(args.gpu_id) 81 | else: 82 | caffe.set_mode_cpu() 83 | 84 | # set up the network model 85 | net = caffe.Net(args.model, args.weights, caffe.TEST) 86 | 87 | imdb = get_imdb(args.imdb_name) 88 | print 'Loaded dataset `{:s}` for testing'.format(imdb.name) 89 | 90 | # parse class_id 91 | classid_name = os.path.splitext(args.weights)[0] + '.clsid' 92 | with open(classid_name, 'rb') as f: 93 | class_id = json.loads(f.read()) 94 | 95 | if args.method == 'ecm': 96 | labels=MultiLabel_ECM_cluster(net, k=args.n_cluster, imdb=imdb, 97 | cls_idx=class_id, reverse=False) 98 | elif args.method == 'ecm_reverse': 99 | labels=MultiLabel_ECM_cluster(net, k=args.n_cluster, imdb=imdb, 100 | cls_idx=class_id, reverse=True) 101 | 102 | for i in xrange(args.n_cluster): 103 | print 'Cluster {} is: {}'.format(i, [class_id[j] for j in np.where(labels==i)[0]]) --------------------------------------------------------------------------------