├── cls ├── accuracy.png ├── inception │ └── README.md ├── resnet-v2 │ └── README.md ├── cls_lite │ └── README.md ├── vgg │ ├── deploy_vgg13-pytorch.prototxt │ ├── deploy_vgg16-5x.prototxt │ ├── deploy_vgg16-dsd.prototxt │ ├── deploy_vgg16-pytorch.prototxt │ ├── deploy_vgg16-tf.prototxt │ ├── deploy_vgg19-pytorch.prototxt │ └── deploy_vgg13bn-pytorch.prototxt ├── evaluation_cls.py └── README.md ├── .gitmodules ├── det ├── rfcn │ ├── README.md │ ├── models │ │ ├── pascal_voc │ │ │ ├── solver.prototxt │ │ │ ├── air101 │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_rfcn_voc_air101-merge.prototxt │ │ │ ├── resnet101-v2 │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_rfcn_voc_resnet101-v2-merge.prototxt │ │ │ ├── resnet18 │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_rfcn_voc_resnet18-priv-merge.prototxt │ │ │ ├── inception-v4 │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_rfcn_voc_inception-v4-merge-aligned.prototxt │ │ │ ├── resnext101-32x4d │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_rfcn_voc_resnext101-32x4d-merge.prototxt │ │ │ ├── resnext101-64x4d │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_rfcn_voc_resnext101-64x4d-merge.prototxt │ │ │ ├── se-inception-v2 │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_rfcn_voc_se-inception-v2-merge.prototxt │ │ │ └── resnext26-32x4d │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ └── rcnn_deploy_rfcn_voc_resnext26-32x4d-priv-merge.prototxt │ │ └── coco │ │ │ ├── air101 │ │ │ └── rpn_rcnn_deploys │ │ │ │ └── rcnn_deploy_rfcn_coco_air101-merge.prototxt │ │ │ ├── inception-v4 │ │ │ └── rpn_rcnn_deploys │ │ │ │ └── rcnn_deploy_rfcn_coco_inception-v4-merge-aligned.prototxt │ │ │ └── resnext101-32x4d │ │ │ └── rpn_rcnn_deploys │ │ │ └── rcnn_deploy_rfcn_coco_resnext101-32x4d-merge.prototxt │ ├── experiments │ │ └── cfgs │ │ │ ├── rfcn_end2end.yml │ │ │ └── rfcn_end2end_ohem.yml │ └── tools │ │ ├── train_net_multi_gpu.py │ │ └── score.py ├── faster_rcnn │ ├── models │ │ ├── pascal_voc │ │ │ ├── solver.prototxt │ │ │ ├── airx101-32x4d │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_faster_voc_airx101-32x4d-merge-fc2-ohem-multigrid.prototxt │ │ │ ├── 2007test400.txt │ │ │ ├── resnet18 │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_faster_voc_resnet18-priv-merge.prototxt │ │ │ ├── mobilenet │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_faster_voc_mobilenet-dw.prototxt │ │ │ ├── xception │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_faster_voc_xception-dw-merge-aligned.prototxt │ │ │ ├── resnet38a │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_faster_voc_resnet38a-merge.prototxt │ │ │ ├── resnet101-v2 │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ │ └── rcnn_deploy_faster_voc_resnet101-v2-merge.prototxt │ │ │ └── resnet152-v2 │ │ │ │ └── rpn_rcnn_deploys │ │ │ │ └── rcnn_deploy_faster_voc_resnet152-v2-merge.prototxt │ │ └── coco │ │ │ ├── air101 │ │ │ └── rpn_rcnn_deploys │ │ │ │ └── rcnn_deploy_faster_voc_air101-merge-fc2-multigrid.prototxt │ │ │ └── inception-v4 │ │ │ └── rpn_rcnn_deploys │ │ │ └── rcnn_deploy_faster_coco_inception-v4-merge-aligned-fpn.prototxt │ ├── experiments │ │ └── cfgs │ │ │ ├── faster_rcnn_end2end.yml │ │ │ └── faster_rcnn_end2end_ohem.yml │ ├── README.md │ └── tools │ │ ├── train_net_multi_gpu.py │ │ └── score.py ├── MSCOCO_Benchmark.md ├── README.md └── VOC_Benchmark.md ├── LICENSE ├── seg ├── pspnet │ └── tools │ │ ├── train_net_multi.py │ │ └── image_seg_data.py ├── score_seg.py ├── README.md └── evaluation_seg.py └── README.md /cls/accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soeaver/caffe-model/HEAD/cls/accuracy.png -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pypriv"] 2 | path = pypriv 3 | url = https://github.com/soeaver/pypriv.git 4 | -------------------------------------------------------------------------------- /det/rfcn/README.md: -------------------------------------------------------------------------------- 1 | ## RFCN 2 | ### Training rfcn networks on pascal voc 3 | 4 | 1.Download the network weights trained on imagenet. 5 | -------------------------------------------------------------------------------- /cls/inception/README.md: -------------------------------------------------------------------------------- 1 | ### Note 2 | We adopt [convolution depthwise layer](https://github.com/BVLC/caffe/pull/5665/files) in deploy_xception-dw.prototxt for speeding up. 3 | -------------------------------------------------------------------------------- /det/rfcn/models/pascal_voc/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "./rfcn_voc_resnet101-v2-merge.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "multistep" 4 | gamma: 0.1 5 | # stepsize: 30000 6 | stepvalue: 80000 7 | # stepvalue: 70000 8 | display: 20 9 | average_loss: 100 10 | # iter_size: 1 11 | momentum: 0.9 12 | weight_decay: 0.0001 13 | # We disable standard caffe solver snapshotting and implement our own snapshot 14 | # function 15 | snapshot: 0 16 | # We still use the snapshot prefix, though 17 | snapshot_prefix: "rfcn_voc_resnet101-v2" 18 | iter_size: 1 19 | -------------------------------------------------------------------------------- /det/faster_rcnn/models/pascal_voc/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "~/caffe-model/det/faster_rcnn/models/pascal_voc/resnet101-v2/faster_voc_resnet101-v2-merge.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "multistep" 4 | gamma: 0.1 5 | stepvalue: 50000 6 | display: 20 7 | average_loss: 100 8 | 9 | momentum: 0.9 10 | weight_decay: 0.0001 11 | # We disable standard caffe solver snapshotting and implement our own snapshot 12 | # function 13 | snapshot: 0 14 | # We still use the snapshot prefix, though 15 | snapshot_prefix: "faster_voc_resnet101-v2" 16 | iter_size: 1 17 | -------------------------------------------------------------------------------- /det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml: -------------------------------------------------------------------------------- 1 | ROOT_DIR: ~/caffe-model/det/faster_rcnn/models/pascal_voc/resnet101-v2 2 | EXP_DIR: faster_rcnn_end2end 3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]] 4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]] 5 | TRAIN: 6 | SNAPSHOT_INFIX: 'ss' 7 | SNAPSHOT_ITERS: 10000 8 | # SCALES: [400, 600, 800, 1000, 1200] # for multi-scale training 9 | SCALES: [600] 10 | MAX_SIZE: 1000 11 | HAS_RPN: True 12 | IMS_PER_BATCH: 1 13 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 14 | RPN_POSITIVE_OVERLAP: 0.7 15 | RPN_BATCHSIZE: 256 16 | BATCH_SIZE: 128 17 | PROPOSAL_METHOD: gt 18 | BG_THRESH_LO: 0.0 19 | ASPECT_GROUPING: True 20 | TEST: 21 | HAS_RPN: True 22 | SCALES: [600] 23 | MAX_SIZE: 1000 24 | 25 | -------------------------------------------------------------------------------- /cls/resnet-v2/README.md: -------------------------------------------------------------------------------- 1 | ### Resnet-v2 2 | 3 | At present, We have not finished the generator scripts of resnet-v2 yet. Maybe [ResNet_with_IdentityMapping](https://github.com/MichaelHunson/ResNet_with_IdentityMapping) is useful. 4 | 5 | The detail is described in the paper **Identity Mappings in Deep Residual Networks** (https://arxiv.org/abs/1603.05027). 6 | 7 | The caffe models are converted from **craftGBD** (https://github.com/craftGBD/craftGBD). 8 | Models in craftGBD are different in BN layer, we manually converted the modified 'bn_layer' to offical 'batch_norm_layer and scale_layer'. 9 | 10 | ### Notes 11 | - I appreciate **craftGBD** (https://github.com/craftGBD/craftGBD) for training the models. 12 | - There are some differences in layer naming with craftGBD version. 13 | -------------------------------------------------------------------------------- /det/rfcn/experiments/cfgs/rfcn_end2end.yml: -------------------------------------------------------------------------------- 1 | ROOT_DIR: ~/caffe-model/det/rfcn/models/pascal_voc/resnet101-v2/ss 2 | EXP_DIR: rfcn_end2end 3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]] 4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]] 5 | TRAIN: 6 | SNAPSHOT_INFIX: 'ss' 7 | SNAPSHOT_ITERS: 10000 8 | # SCALES: [200, 400, 600, 800] 9 | SCALES: [600] 10 | MAX_SIZE: 1000 11 | HAS_RPN: True 12 | IMS_PER_BATCH: 1 13 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 14 | RPN_POSITIVE_OVERLAP: 0.7 15 | # ONLY_INSIDE_ANCHORS: False 16 | RPN_BATCHSIZE: 256 17 | BATCH_SIZE: 128 18 | PROPOSAL_METHOD: gt 19 | BG_THRESH_LO: 0.1 20 | AGNOSTIC: True 21 | RPN_PRE_NMS_TOP_N: 6000 22 | RPN_POST_NMS_TOP_N: 300 23 | TEST: 24 | HAS_RPN: True 25 | SCALES: [600] 26 | MAX_SIZE: 1000 27 | 28 | -------------------------------------------------------------------------------- /det/rfcn/experiments/cfgs/rfcn_end2end_ohem.yml: -------------------------------------------------------------------------------- 1 | ROOT_DIR: ~/caffe-model/det/rfcn/models/pascal_voc/resnet101-v2/ss-ohem 2 | EXP_DIR: rfcn_end2end 3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]] 4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]] 5 | TRAIN: 6 | SNAPSHOT_INFIX: 'ss-ohem' 7 | SNAPSHOT_ITERS: 10000 8 | # SCALES: [200, 400, 600, 800] 9 | SCALES: [600] 10 | MAX_SIZE: 1000 11 | HAS_RPN: True 12 | IMS_PER_BATCH: 1 13 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 14 | RPN_POSITIVE_OVERLAP: 0.7 15 | RPN_NORMALIZE_TARGETS: True 16 | # ONLY_INSIDE_ANCHORS: False 17 | RPN_BATCHSIZE: 256 18 | BATCH_SIZE: -1 19 | PROPOSAL_METHOD: gt 20 | BG_THRESH_LO: 0.0 21 | AGNOSTIC: True 22 | RPN_PRE_NMS_TOP_N: 6000 23 | RPN_POST_NMS_TOP_N: 300 24 | TEST: 25 | HAS_RPN: True 26 | SCALES: [600] 27 | MAX_SIZE: 1000 28 | 29 | -------------------------------------------------------------------------------- /det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end_ohem.yml: -------------------------------------------------------------------------------- 1 | ROOT_DIR: ~/caffe-model/det/faster_rcnn/models/pascal_voc/resnet101-v2 2 | EXP_DIR: faster_rcnn_end2end 3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]] 4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]] 5 | TRAIN: 6 | SNAPSHOT_INFIX: 'ss-ohem' 7 | SNAPSHOT_ITERS: 10000 8 | # SCALES: [400, 600, 800, 1000, 1200] # for multi-scale training 9 | SCALES: [600] 10 | MAX_SIZE: 1000 11 | HAS_RPN: True 12 | IMS_PER_BATCH: 1 13 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 14 | RPN_POSITIVE_OVERLAP: 0.7 15 | RPN_NORMALIZE_TARGETS: True 16 | # ONLY_INSIDE_ANCHORS: False 17 | RPN_BATCHSIZE: 256 18 | BATCH_SIZE: -1 19 | RPN_PRE_NMS_TOP_N: 6000 20 | RPN_POST_NMS_TOP_N: 300 21 | PROPOSAL_METHOD: gt 22 | BG_THRESH_LO: 0.0 23 | TEST: 24 | HAS_RPN: True 25 | SCALES: [600] 26 | MAX_SIZE: 1000 27 | 28 | -------------------------------------------------------------------------------- /det/faster_rcnn/README.md: -------------------------------------------------------------------------------- 1 | ## Faster RCNN 2 | ### Training faster rcnn networks on pascal voc 3 | 4 | 1.Download the network weights trained on imagenet. 5 | 6 | 7 | 2.Modify solver file 8 | ``` 9 | caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt 10 | ``` 11 | - You need modify 'train_net' and 'snapshot_prefix' to the correct path or name. 12 |   13 | 14 | 3.Modify yml file 15 | ``` 16 | caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml 17 | ``` 18 | - The faster rcnn models will saved in '{ROOT_DIR}/output/{EXP_DIR}/{imdb.name}/' folder. 19 | 20 | 21 | 4.Training 22 | ``` 23 | python train_net_multi_gpu.py --gpu_id 0,1 --solver ~/caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt --iters 80000 --weights ~/caffe-model/cls/ilsvrc/resnet-v2/resnet101-v2/resnet101-v2_merge.caffemodel --cfg ~/caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml --imdb voc_0712_trainval 24 | ``` 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 soeaver Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /det/rfcn/models/coco/air101/rpn_rcnn_deploys/rcnn_deploy_rfcn_coco_air101-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 3969 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 81 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /det/rfcn/models/pascal_voc/air101/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_air101-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 1029 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 21 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /det/rfcn/models/pascal_voc/resnet101-v2/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnet101-v2-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 1029 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 21 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /det/rfcn/models/pascal_voc/resnet18/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnet18-priv-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 1029 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 21 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /det/rfcn/models/coco/inception-v4/rpn_rcnn_deploys/rcnn_deploy_rfcn_coco_inception-v4-merge-aligned.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 3969 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 81 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /det/rfcn/models/coco/resnext101-32x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_coco_resnext101-32x4d-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 3969 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 81 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /det/rfcn/models/pascal_voc/inception-v4/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_inception-v4-merge-aligned.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 1029 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 21 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /det/rfcn/models/pascal_voc/resnext101-32x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnext101-32x4d-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 1029 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 21 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /det/rfcn/models/pascal_voc/resnext101-64x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnext101-64x4d-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 1029 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 21 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /det/rfcn/models/pascal_voc/se-inception-v2/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_se-inception-v2-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 1029 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 21 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /det/rfcn/models/pascal_voc/resnext26-32x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnext26-32x4d-priv-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "rfcn_cls" 2 | input_shape { 3 | dim: 1 4 | dim: 1029 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rfcn_bbox" 10 | input_shape { 11 | dim: 1 12 | dim: 392 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "rois" 18 | input_shape { 19 | dim: 300 20 | dim: 5 21 | } 22 | 23 | 24 | #======= position sensitive RoI pooling ======== 25 | layer { 26 | bottom: "rfcn_cls" 27 | bottom: "rois" 28 | top: "psroipooled_cls_rois" 29 | name: "psroipooled_cls_rois" 30 | type: "PSROIPooling" 31 | psroi_pooling_param { 32 | spatial_scale: 0.0625 33 | output_dim: 21 34 | group_size: 7 35 | } 36 | } 37 | layer { 38 | bottom: "psroipooled_cls_rois" 39 | top: "cls_score" 40 | name: "ave_cls_score_rois" 41 | type: "Pooling" 42 | pooling_param { 43 | pool: AVE 44 | global_pooling: true 45 | } 46 | } 47 | layer { 48 | bottom: "rfcn_bbox" 49 | bottom: "rois" 50 | top: "psroipooled_loc_rois" 51 | name: "psroipooled_loc_rois" 52 | type: "PSROIPooling" 53 | psroi_pooling_param { 54 | spatial_scale: 0.0625 55 | output_dim: 8 56 | group_size: 7 57 | } 58 | } 59 | layer { 60 | bottom: "psroipooled_loc_rois" 61 | top: "bbox_pred" 62 | name: "ave_bbox_pred_rois" 63 | type: "Pooling" 64 | pooling_param { 65 | pool: AVE 66 | global_pooling: true 67 | } 68 | } 69 | layer { 70 | name: "cls_prob" 71 | type: "Softmax" 72 | bottom: "cls_score" 73 | top: "cls_prob" 74 | } 75 | 76 | -------------------------------------------------------------------------------- /seg/pspnet/tools/train_net_multi.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from multiprocessing import Process 3 | 4 | # sys.setrecursionlimit(100000) 5 | 6 | sys.path.append('/home/prmct/workspace/py-RFCN-priv/caffe-priv/python') 7 | # sys.path.append('/home/yanglu/workspace/py-R-FCN-multiGPU-master-0619/caffe/python') 8 | import caffe 9 | 10 | # _snapshot='./aug_single_resnet101_iter_5000.solverstate' 11 | _weights = '/home/prmct/Program/classification/ilsvrc/resnet_v2/resnet101_v2/resnet101_v2_merge_bn_scale.caffemodel' 12 | 13 | solver_prototxt = './solver.prototxt' 14 | gpus = [0,1,2,3] 15 | max_iter = 200000 16 | 17 | def solve(proto, gpus, uid, rank, max_iter): 18 | caffe.set_mode_gpu() 19 | caffe.set_device(gpus[rank]) 20 | caffe.set_solver_count(len(gpus)) 21 | caffe.set_solver_rank(rank) 22 | caffe.set_multiprocess(True) 23 | 24 | solver = caffe.SGDSolver(proto) 25 | if rank == 0: 26 | # solver.restore(_snapshot) 27 | solver.net.copy_from(_weights) 28 | 29 | solver.net.layers[0].get_gpu_id(gpus[rank]) 30 | 31 | nccl = caffe.NCCL(solver, uid) 32 | nccl.bcast() 33 | solver.add_callback(nccl) 34 | 35 | if solver.param.layer_wise_reduce: 36 | solver.net.after_backward(nccl) 37 | 38 | for _ in range(max_iter): 39 | solver.step(1) 40 | 41 | 42 | if __name__ == '__main__': 43 | uid = caffe.NCCL.new_uid() 44 | caffe.init_log() 45 | caffe.log('Using devices %s' % str(gpus)) 46 | procs = [] 47 | 48 | for rank in range(len(gpus)): 49 | p = Process(target=solve, 50 | args=(solver_prototxt, gpus, uid, rank, max_iter)) 51 | p.daemon = False 52 | p.start() 53 | procs.append(p) 54 | for p in procs: 55 | p.join() 56 | 57 | -------------------------------------------------------------------------------- /seg/score_seg.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | gt_root = '/home/prmct/Database/VOC_PASCAL/VOC2012_test/SegmentationClassAug/' 5 | pre_root = './predict/' 6 | val_pth = './val.txt' 7 | n_class = 21 8 | 9 | 10 | def fast_hist(a, b, n): 11 | k = (a >= 0) & (a < n) 12 | return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n) 13 | 14 | 15 | def compute_hist(val_list): 16 | hist = np.zeros((n_class, n_class)) 17 | for idx in val_list: 18 | print idx 19 | label = cv2.imread(gt_root + idx + '.png', 0) 20 | gt = label.flatten() 21 | tmp = cv2.imread(pre_root + idx + '.png', 0) 22 | 23 | if label.shape != tmp.shape: 24 | pre = cv2.resize(tmp, (label.shape[1], label.shape[0]), interpolation=cv2.cv.CV_INTER_NN) 25 | pre = pre.flatten() 26 | else: 27 | pre = tmp.flatten() 28 | 29 | hist += fast_hist(gt, pre, n_class) 30 | 31 | # return hist[1:, 1:] 32 | return hist 33 | 34 | 35 | def mean_IoU(overall_h): 36 | iu = np.diag(overall_h) / (overall_h.sum(1) + overall_h.sum(0) - np.diag(overall_h)) 37 | return np.nanmean(iu) 38 | 39 | 40 | def per_class_acc(overall_h): 41 | acc = np.diag(overall_h) / overall_h.sum(1) 42 | return np.nanmean(acc) 43 | 44 | 45 | def pixel_wise_acc(overall_h): 46 | return np.diag(overall_h).sum() / overall_h.sum() 47 | 48 | 49 | if __name__ == '__main__': 50 | val_list = [] 51 | 52 | f = open(val_pth, 'r') 53 | for i in f: 54 | val_list.append(i.strip().split(' ')[-1].split('/')[-1]) 55 | 56 | hist = compute_hist(val_list) 57 | 58 | print 'Mean IoU:', mean_IoU(hist) 59 | print 'Pixel Acc:', np.diag(hist).sum() / hist.sum() 60 | print 'Mean Acc:', per_class_acc(hist) 61 | 62 | # print np.diag(hist).sum() / hist.sum() 63 | -------------------------------------------------------------------------------- /seg/README.md: -------------------------------------------------------------------------------- 1 | ## Object Segmentation 2 | 3 | ### We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) 4 | we are releasing the training code and files, the models and more experiments will come soon. 5 | 6 | ### Object Segmentation Performance on PASCAL VOC. 7 | **1. PSPNet training on [SBD](http://home.bharathh.info/pubs/pdfs/BharathICCV2011.pdf) (10,582 images) and testing on VOC 2012 validation (1,449 images).** 8 | 9 | Network|mIoU(%)|pixel acc(%)|training
speed|training
memory|testing
speed|testing
memory 10 | :---:|:---:|:---:|:---:|:---:|:---:|:---: 11 | resnet101-v2| 77.94 | 94.94 | 1.6 img/s | 8,023MB | 3.0 img/s | 4,071MB 12 | resnet101-v2-selu| 77.10 | 94.80 | 1.6 img/s | 8,017MB | 3.0 img/s | 4,065MB 13 | resnext101-32x4d| 77.79 | 94.92 | 1.3 img/s | 8,891MB | 2.6 img/s | 5,241MB 14 | air101| 77.64 | 94.93 | 1.3 img/s | 10,017MB | 2.5 img/s | 5,241MB 15 | inception-v4| 77.58 | 94.83 | -- img/s | --MB | -- img/s | --MB 16 | se-resnet50| 75.80 | 94.30 | -- img/s | --MB | -- img/s | --MB 17 | - To reduce memory usage, we merge all the models batchnorm layer parameters into scale layer, more details please refer to [faster-rcnn-resnet](https://github.com/Eniac-Xie/faster-rcnn-resnet#modification) or [pva-faster-rcnn](https://github.com/sanghoon/pva-faster-rcnn/blob/master/tools/gen_merged_model.py); 18 | - PSP module without batch normlization, the kernel_size of avepooling is 64, 32, 16 and 8 respectively; 19 | - All the models use 513x513 input with random crop, multi-scale traing (0.75x, 1.0x, 1.25x, 1.5x, 2.0x) and horizantal flipping; 20 | - The training and testing speed is calculated on a single Nvidia Titan pascal GPU with batch_size=1; 21 | - Training batch_size=16 for 2,0000 iterations, base_lr=0.001 with 'poly' learning rate policy (power=0.9); 22 | - Testing with single scale, base_size=555 and crop_size=513, no flipping, no crf; 23 | -------------------------------------------------------------------------------- /det/faster_rcnn/models/coco/air101/rpn_rcnn_deploys/rcnn_deploy_faster_voc_air101-merge-fc2-multigrid.prototxt: -------------------------------------------------------------------------------- 1 | input: "conv_new_1" 2 | input_shape { 3 | dim: 1 4 | dim: 256 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rois" 10 | input_shape { 11 | dim: 300 12 | dim: 5 13 | } 14 | 15 | #============== RCNN =============== 16 | layer { 17 | bottom: "conv_new_1" 18 | bottom: "rois" 19 | top: "roi_pool" 20 | name: "roi_pool" 21 | type: "ROIPooling" 22 | roi_pooling_param { 23 | pooled_w: 7 24 | pooled_h: 7 25 | spatial_scale: 0.062500 26 | } 27 | } 28 | layer { 29 | name: "fc1" 30 | type: "InnerProduct" 31 | bottom: "roi_pool" 32 | top: "fc1" 33 | param { 34 | lr_mult: 1 35 | decay_mult: 1 36 | } 37 | param { 38 | lr_mult: 2 39 | decay_mult: 0 40 | } 41 | inner_product_param { 42 | num_output: 1024 43 | weight_filler { 44 | type: "gaussian" 45 | std: 0.01 46 | } 47 | bias_filler { 48 | type: "constant" 49 | value: 0 50 | } 51 | } 52 | } 53 | layer { 54 | name: "fc1_relu" 55 | type: "ReLU" 56 | bottom: "fc1" 57 | top: "fc1" 58 | } 59 | layer { 60 | name: "fc2" 61 | type: "InnerProduct" 62 | bottom: "fc1" 63 | top: "fc2" 64 | param { 65 | lr_mult: 1 66 | decay_mult: 1 67 | } 68 | param { 69 | lr_mult: 2 70 | decay_mult: 0 71 | } 72 | inner_product_param { 73 | num_output: 1024 74 | weight_filler { 75 | type: "gaussian" 76 | std: 0.01 77 | } 78 | bias_filler { 79 | type: "constant" 80 | value: 0 81 | } 82 | } 83 | } 84 | layer { 85 | name: "fc2_relu" 86 | type: "ReLU" 87 | bottom: "fc2" 88 | top: "fc2" 89 | } 90 | layer { 91 | name: "cls_score" 92 | type: "InnerProduct" 93 | bottom: "fc2" 94 | top: "cls_score" 95 | param { 96 | lr_mult: 1 97 | decay_mult: 1 98 | } 99 | param { 100 | lr_mult: 2 101 | decay_mult: 0 102 | } 103 | inner_product_param { 104 | num_output: 81 105 | weight_filler { 106 | type: "msra" 107 | std: 0.01 108 | } 109 | bias_filler { 110 | type: "constant" 111 | value: 0 112 | } 113 | } 114 | } 115 | layer { 116 | name: "bbox_pred" 117 | type: "InnerProduct" 118 | bottom: "fc2" 119 | top: "bbox_pred" 120 | param { 121 | lr_mult: 1 122 | decay_mult: 1 123 | } 124 | param { 125 | lr_mult: 2 126 | decay_mult: 0 127 | } 128 | inner_product_param { 129 | num_output: 324 130 | weight_filler { 131 | type: "msra" 132 | std: 0.01 133 | } 134 | bias_filler { 135 | type: "constant" 136 | value: 0 137 | } 138 | } 139 | } 140 | layer { 141 | name: "cls_prob" 142 | type: "Softmax" 143 | bottom: "cls_score" 144 | top: "cls_prob" 145 | } 146 | 147 | -------------------------------------------------------------------------------- /det/faster_rcnn/models/pascal_voc/airx101-32x4d/rpn_rcnn_deploys/rcnn_deploy_faster_voc_airx101-32x4d-merge-fc2-ohem-multigrid.prototxt: -------------------------------------------------------------------------------- 1 | input: "conv_new_1" 2 | input_shape { 3 | dim: 1 4 | dim: 256 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rois" 10 | input_shape { 11 | dim: 300 12 | dim: 5 13 | } 14 | 15 | #============== RCNN =============== 16 | layer { 17 | bottom: "conv_new_1" 18 | bottom: "rois" 19 | top: "roi_pool" 20 | name: "roi_pool" 21 | type: "ROIPooling" 22 | roi_pooling_param { 23 | pooled_w: 7 24 | pooled_h: 7 25 | spatial_scale: 0.062500 26 | } 27 | } 28 | layer { 29 | name: "fc1" 30 | type: "InnerProduct" 31 | bottom: "roi_pool" 32 | top: "fc1" 33 | param { 34 | lr_mult: 1 35 | decay_mult: 1 36 | } 37 | param { 38 | lr_mult: 2 39 | decay_mult: 0 40 | } 41 | inner_product_param { 42 | num_output: 1024 43 | weight_filler { 44 | type: "gaussian" 45 | std: 0.01 46 | } 47 | bias_filler { 48 | type: "constant" 49 | value: 0 50 | } 51 | } 52 | } 53 | layer { 54 | name: "fc1_relu" 55 | type: "ReLU" 56 | bottom: "fc1" 57 | top: "fc1" 58 | } 59 | layer { 60 | name: "fc2" 61 | type: "InnerProduct" 62 | bottom: "fc1" 63 | top: "fc2" 64 | param { 65 | lr_mult: 1 66 | decay_mult: 1 67 | } 68 | param { 69 | lr_mult: 2 70 | decay_mult: 0 71 | } 72 | inner_product_param { 73 | num_output: 1024 74 | weight_filler { 75 | type: "gaussian" 76 | std: 0.01 77 | } 78 | bias_filler { 79 | type: "constant" 80 | value: 0 81 | } 82 | } 83 | } 84 | layer { 85 | name: "fc2_relu" 86 | type: "ReLU" 87 | bottom: "fc2" 88 | top: "fc2" 89 | } 90 | layer { 91 | name: "cls_score" 92 | type: "InnerProduct" 93 | bottom: "fc2" 94 | top: "cls_score" 95 | param { 96 | lr_mult: 1 97 | decay_mult: 1 98 | } 99 | param { 100 | lr_mult: 2 101 | decay_mult: 0 102 | } 103 | inner_product_param { 104 | num_output: 21 105 | weight_filler { 106 | type: "msra" 107 | std: 0.01 108 | } 109 | bias_filler { 110 | type: "constant" 111 | value: 0 112 | } 113 | } 114 | } 115 | layer { 116 | name: "bbox_pred" 117 | type: "InnerProduct" 118 | bottom: "fc2" 119 | top: "bbox_pred" 120 | param { 121 | lr_mult: 1 122 | decay_mult: 1 123 | } 124 | param { 125 | lr_mult: 2 126 | decay_mult: 0 127 | } 128 | inner_product_param { 129 | num_output: 84 130 | weight_filler { 131 | type: "msra" 132 | std: 0.01 133 | } 134 | bias_filler { 135 | type: "constant" 136 | value: 0 137 | } 138 | } 139 | } 140 | layer { 141 | name: "cls_prob" 142 | type: "Softmax" 143 | bottom: "cls_score" 144 | top: "cls_prob" 145 | } 146 | 147 | -------------------------------------------------------------------------------- /det/faster_rcnn/models/coco/inception-v4/rpn_rcnn_deploys/rcnn_deploy_faster_coco_inception-v4-merge-aligned-fpn.prototxt: -------------------------------------------------------------------------------- 1 | input: "p2_elewise" 2 | input_shape { 3 | dim: 1 4 | dim: 256 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "p3_elewise" 10 | input_shape { 11 | dim: 1 12 | dim: 256 13 | dim: 40 14 | dim: 40 15 | } 16 | 17 | input: "p4_elewise" 18 | input_shape { 19 | dim: 1 20 | dim: 256 21 | dim: 40 22 | dim: 40 23 | } 24 | 25 | input: "p5_conv_1x1" 26 | input_shape { 27 | dim: 1 28 | dim: 256 29 | dim: 40 30 | dim: 40 31 | } 32 | 33 | input: "rois" 34 | input_shape { 35 | dim: 300 36 | dim: 5 37 | } 38 | 39 | #============== ROI Pooling =============== 40 | layer { 41 | name: "roi_pool4" 42 | type: "ROIPooling" 43 | bottom: "p2_elewise" 44 | bottom: "rois" 45 | top: "roi_pool4" 46 | roi_pooling_param { 47 | pooled_w: 8 48 | pooled_h: 8 49 | spatial_scale: 0.25 50 | } 51 | } 52 | layer { 53 | name: "roi_pool8" 54 | type: "ROIPooling" 55 | bottom: "p3_elewise" 56 | bottom: "rois" 57 | top: "roi_pool8" 58 | roi_pooling_param { 59 | pooled_w: 8 60 | pooled_h: 8 61 | spatial_scale: 0.125 62 | } 63 | } 64 | layer { 65 | name: "roi_pool16" 66 | type: "ROIPooling" 67 | bottom: "p4_elewise" 68 | bottom: "rois" 69 | top: "roi_pool16" 70 | roi_pooling_param { 71 | pooled_w: 8 72 | pooled_h: 8 73 | spatial_scale: 0.0625 74 | } 75 | } 76 | layer { 77 | name: "roi_pool32" 78 | type: "ROIPooling" 79 | bottom: "p5_conv_1x1" 80 | bottom: "rois" 81 | top: "roi_pool32" 82 | roi_pooling_param { 83 | pooled_w: 8 84 | pooled_h: 8 85 | spatial_scale: 0.03125 86 | } 87 | } 88 | layer { 89 | name: "roi_pool" 90 | type: "Eltwise" 91 | bottom: "roi_pool4" 92 | bottom: "roi_pool8" 93 | bottom: "roi_pool16" 94 | bottom: "roi_pool32" 95 | top: "roi_pool" 96 | eltwise_param { 97 | operation: SUM 98 | } 99 | } 100 | layer { 101 | name: "fc1" 102 | type: "InnerProduct" 103 | bottom: "roi_pool" 104 | top: "fc1" 105 | param { 106 | lr_mult: 1 107 | decay_mult: 1 108 | } 109 | param { 110 | lr_mult: 2 111 | decay_mult: 0 112 | } 113 | inner_product_param { 114 | num_output: 1024 115 | weight_filler { 116 | type: "gaussian" 117 | std: 0.01 118 | } 119 | bias_filler { 120 | type: "constant" 121 | value: 0 122 | } 123 | } 124 | } 125 | layer { 126 | name: "fc1_relu" 127 | type: "ReLU" 128 | bottom: "fc1" 129 | top: "fc1" 130 | } 131 | layer { 132 | name: "fc2" 133 | type: "InnerProduct" 134 | bottom: "fc1" 135 | top: "fc2" 136 | param { 137 | lr_mult: 1 138 | decay_mult: 1 139 | } 140 | param { 141 | lr_mult: 2 142 | decay_mult: 0 143 | } 144 | inner_product_param { 145 | num_output: 1024 146 | weight_filler { 147 | type: "gaussian" 148 | std: 0.01 149 | } 150 | bias_filler { 151 | type: "constant" 152 | value: 0 153 | } 154 | } 155 | } 156 | layer { 157 | name: "fc2_relu" 158 | type: "ReLU" 159 | bottom: "fc2" 160 | top: "fc2" 161 | } 162 | layer { 163 | name: "cls_score" 164 | type: "InnerProduct" 165 | bottom: "fc2" 166 | top: "cls_score" 167 | param { 168 | lr_mult: 1 169 | decay_mult: 1 170 | } 171 | param { 172 | lr_mult: 2 173 | decay_mult: 0 174 | } 175 | inner_product_param { 176 | num_output: 81 177 | weight_filler { 178 | type: "msra" 179 | std: 0.01 180 | } 181 | bias_filler { 182 | type: "constant" 183 | value: 0 184 | } 185 | } 186 | } 187 | layer { 188 | name: "bbox_pred" 189 | type: "InnerProduct" 190 | bottom: "fc2" 191 | top: "bbox_pred" 192 | param { 193 | lr_mult: 1 194 | decay_mult: 1 195 | } 196 | param { 197 | lr_mult: 2 198 | decay_mult: 0 199 | } 200 | inner_product_param { 201 | num_output: 324 202 | weight_filler { 203 | type: "msra" 204 | std: 0.01 205 | } 206 | bias_filler { 207 | type: "constant" 208 | value: 0 209 | } 210 | } 211 | } 212 | layer { 213 | name: "cls_prob" 214 | type: "Softmax" 215 | bottom: "cls_score" 216 | top: "cls_prob" 217 | } 218 | 219 | 220 | -------------------------------------------------------------------------------- /det/rfcn/tools/train_net_multi_gpu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Written by soeaver 5 | # Modified version of py-R-FCN-multiGPU 6 | # -------------------------------------------------------- 7 | 8 | """Train a Fast R-CNN network on a region of interest database.""" 9 | 10 | # import _init_paths 11 | import sys 12 | sys.path.append('~/py-RFCN-priv/caffe-priv/python') 13 | sys.path.append('~/py-RFCN-priv/lib') 14 | from fast_rcnn.train_multi_gpu import get_training_roidb, train_net_multi_gpu 15 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir 16 | from datasets.factory import get_imdb 17 | import datasets.imdb 18 | import caffe 19 | import argparse 20 | import pprint 21 | import numpy as np 22 | 23 | 24 | def parse_args(): 25 | """ 26 | Parse input arguments 27 | """ 28 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 29 | parser.add_argument("--gpu_id", type=str, 30 | default='0,1', 31 | help="List of device ids.") 32 | parser.add_argument('--solver', dest='solver', 33 | help='solver prototxt', 34 | default='~/caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt', type=str) 35 | parser.add_argument('--iters', dest='max_iters', 36 | help='number of iterations to train', 37 | default=80000, type=int) 38 | parser.add_argument('--weights', dest='pretrained_model', 39 | help='initialize with pretrained model weights', 40 | default='~/caffe-model/cls/ilsvrc/resnet-v2/resnet101-v2/resnet101-v2_merge.caffemodel', type=str) 41 | parser.add_argument('--cfg', dest='cfg_file', 42 | help='optional config file', 43 | default='~/caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml', type=str) 44 | parser.add_argument('--imdb', dest='imdb_name', 45 | help='dataset to train on', 46 | default='voc_0712_trainval', type=str) 47 | parser.add_argument('--rand', dest='randomize', 48 | help='randomize (do not use a fixed seed)', 49 | action='store_true') 50 | parser.add_argument('--set', dest='set_cfgs', 51 | help='set config keys', default=None, 52 | nargs=argparse.REMAINDER) 53 | 54 | if len(sys.argv) == 1: 55 | parser.print_help() 56 | sys.exit(1) 57 | 58 | args = parser.parse_args() 59 | return args 60 | 61 | def combined_roidb(imdb_names): 62 | def get_roidb(imdb_name): 63 | imdb = get_imdb(imdb_name) 64 | print 'Loaded dataset `{:s}` for training'.format(imdb.name) 65 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 66 | print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD) 67 | roidb = get_training_roidb(imdb) 68 | return roidb 69 | 70 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 71 | roidb = roidbs[0] 72 | if len(roidbs) > 1: 73 | for r in roidbs[1:]: 74 | roidb.extend(r) 75 | imdb = datasets.imdb.imdb(imdb_names) 76 | else: 77 | imdb = get_imdb(imdb_names) 78 | return imdb, roidb 79 | 80 | if __name__ == '__main__': 81 | args = parse_args() 82 | 83 | print('Called with args:') 84 | print(args) 85 | 86 | if args.cfg_file is not None: 87 | cfg_from_file(args.cfg_file) 88 | if args.set_cfgs is not None: 89 | cfg_from_list(args.set_cfgs) 90 | 91 | gpu_id = args.gpu_id 92 | gpu_list = gpu_id.split(',') 93 | gpus = [int(i) for i in gpu_list] 94 | 95 | print('Using config:') 96 | pprint.pprint(cfg) 97 | 98 | if not args.randomize: 99 | # fix the random seeds (numpy and caffe) for reproducibility 100 | np.random.seed(cfg.RNG_SEED) 101 | #caffe.set_random_seed(cfg.RNG_SEED) 102 | 103 | # set up caffe 104 | 105 | imdb, roidb = combined_roidb(args.imdb_name) 106 | print '{:d} roidb entries'.format(len(roidb)) 107 | 108 | output_dir = get_output_dir(imdb) 109 | print 'Output will be saved to `{:s}`'.format(output_dir) 110 | 111 | train_net_multi_gpu(args.solver, roidb, output_dir, 112 | pretrained_model=args.pretrained_model, 113 | max_iter=args.max_iters, gpus=gpus) 114 | -------------------------------------------------------------------------------- /det/faster_rcnn/tools/train_net_multi_gpu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Written by soeaver 5 | # Modified version of py-R-FCN-multiGPU 6 | # -------------------------------------------------------- 7 | 8 | """Train a Fast R-CNN network on a region of interest database.""" 9 | 10 | # import _init_paths 11 | import sys 12 | sys.path.append('~/py-RFCN-priv/caffe-priv/python') 13 | sys.path.append('~/py-RFCN-priv/lib') 14 | from fast_rcnn.train_multi_gpu import get_training_roidb, train_net_multi_gpu 15 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir 16 | from datasets.factory import get_imdb 17 | import datasets.imdb 18 | import caffe 19 | import argparse 20 | import pprint 21 | import numpy as np 22 | 23 | 24 | def parse_args(): 25 | """ 26 | Parse input arguments 27 | """ 28 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 29 | parser.add_argument("--gpu_id", type=str, 30 | default='0,1', 31 | help="List of device ids.") 32 | parser.add_argument('--solver', dest='solver', 33 | help='solver prototxt', 34 | default='~/caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt', type=str) 35 | parser.add_argument('--iters', dest='max_iters', 36 | help='number of iterations to train', 37 | default=80000, type=int) 38 | parser.add_argument('--weights', dest='pretrained_model', 39 | help='initialize with pretrained model weights', 40 | default='~/caffe-model/cls/ilsvrc/resnet-v2/resnet101-v2/resnet101-v2_merge.caffemodel', type=str) 41 | parser.add_argument('--cfg', dest='cfg_file', 42 | help='optional config file', 43 | default='~/caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml', type=str) 44 | parser.add_argument('--imdb', dest='imdb_name', 45 | help='dataset to train on', 46 | default='voc_0712_trainval', type=str) 47 | parser.add_argument('--rand', dest='randomize', 48 | help='randomize (do not use a fixed seed)', 49 | action='store_true') 50 | parser.add_argument('--set', dest='set_cfgs', 51 | help='set config keys', default=None, 52 | nargs=argparse.REMAINDER) 53 | 54 | if len(sys.argv) == 1: 55 | parser.print_help() 56 | sys.exit(1) 57 | 58 | args = parser.parse_args() 59 | return args 60 | 61 | def combined_roidb(imdb_names): 62 | def get_roidb(imdb_name): 63 | imdb = get_imdb(imdb_name) 64 | print 'Loaded dataset `{:s}` for training'.format(imdb.name) 65 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 66 | print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD) 67 | roidb = get_training_roidb(imdb) 68 | return roidb 69 | 70 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 71 | roidb = roidbs[0] 72 | if len(roidbs) > 1: 73 | for r in roidbs[1:]: 74 | roidb.extend(r) 75 | imdb = datasets.imdb.imdb(imdb_names) 76 | else: 77 | imdb = get_imdb(imdb_names) 78 | return imdb, roidb 79 | 80 | if __name__ == '__main__': 81 | args = parse_args() 82 | 83 | print('Called with args:') 84 | print(args) 85 | 86 | if args.cfg_file is not None: 87 | cfg_from_file(args.cfg_file) 88 | if args.set_cfgs is not None: 89 | cfg_from_list(args.set_cfgs) 90 | 91 | gpu_id = args.gpu_id 92 | gpu_list = gpu_id.split(',') 93 | gpus = [int(i) for i in gpu_list] 94 | 95 | print('Using config:') 96 | pprint.pprint(cfg) 97 | 98 | if not args.randomize: 99 | # fix the random seeds (numpy and caffe) for reproducibility 100 | np.random.seed(cfg.RNG_SEED) 101 | #caffe.set_random_seed(cfg.RNG_SEED) 102 | 103 | # set up caffe 104 | 105 | imdb, roidb = combined_roidb(args.imdb_name) 106 | print '{:d} roidb entries'.format(len(roidb)) 107 | 108 | output_dir = get_output_dir(imdb) 109 | print 'Output will be saved to `{:s}`'.format(output_dir) 110 | 111 | train_net_multi_gpu(args.solver, roidb, output_dir, 112 | pretrained_model=args.pretrained_model, 113 | max_iter=args.max_iters, gpus=gpus) 114 | -------------------------------------------------------------------------------- /det/MSCOCO_Benchmark.md: -------------------------------------------------------------------------------- 1 | ## MSCOCO Detection Benchmark 2 | 3 | **We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)** 4 | 5 | ### **1. Results training on MSCOCO2017-trainval and testing on test-dev2017.** 6 | 7 | Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L 8 | :---:|:---:|:---:|:---:|:---:|:---:|:---: 9 | **RFCN-se-inception-v2**
with ms-train & ohem & multigrid | 32.6 | 53.6 | 34.5 | 12.5 | 35.1 | 48.4 10 | **RFCN-se-inception-v2**
with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & ms-test | 36.8 | 59.8 | 38.7 | 19.7 | 39.8 | 49.1 11 | **RFCN-se-resnet50**
with ms-train & ohem & multigrid | 32.9 | 54.4 | 34.8 | 13.0 | 35.3 | 48.1 12 | **FPN-Faster-inception-v4**
with ms-train | 36.5 | 58.5 | 38.8 | 16.5 | 38.8 | 52.1 13 | **FPN-Faster-inception-v4**
with ms-train & bbox-voting & soft-nms | 38.3 | 61.0 | 40.8 | 20.0 | 41.5 | 51.4 14 | **FPN-Faster-inception-v4**
with ms-train & bbox-voting & soft-nms & flipping & ms-test | 39.5 | 62.5 | 42.3 | 23.3 | 43.2 | 51.0 15 | **RFCN-air101**
with ms-train & ohem & multigrid | 38.2 | 60.1 | 41.2 | 18.2 | 41.9 | 53.0 16 | **RFCN-air101**
with extra-7-epochs & ms-train & ohem & multigrid | 38.5 | 60.2 | 41.4 | 18.3 | 42.1 | 53.4 17 | **RFCN-air101**
with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping | 40.4 | 63.5 | 43.5 | 22.6 | 44.4 | 52.0 18 | **RFCN-air101**
with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & ms-test | 41.8 | 65.3 | 45.3 | 26.1 | 45.6 | 52.4 19 | **RFCN-air101**
with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & assign-ms-test | 42.1 | 64.6 | 45.6 | 25.6 | 44.5 | 54.1 20 | **RFCN-air101**
with ms-train & ohem & multigrid & deformpsroi & bbox-voting & soft-nms & flipping & assign-ms-test | 43.2 | 66.0 | 46.7 | 25.6 | 46.3 | 55.9 21 | **Faster-2fc-air101**
with ms-train & ohem & multigrid | 36.5 | 60.4 | 38.1 | 15.5 | 39.5 | 53.5 22 | 23 | - All the models are test on a single scale (600*1000) without any bells and whistles; 24 | 25 | 26 | ### **2. Context Pyramid Attention Network (CPANet) results training on MSCOCO2017-trainval and testing on test-dev2017.** 27 | 28 | Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L 29 | :---:|:---:|:---:|:---:|:---:|:---:|:---: 30 | **CPANet-air101**
with ms-train & ohem & multigrid & 600-scale-test | 40.1 | 62.2 | 43.4 | 19.4 | 44.4 | 55.9 31 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test | 41.9 | 64.8 | 45.5 | 24.0 | 45.9 | 54.6 32 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test & snms | 42.7 | 65.4 | 46.7 | 24.6 | 46.8 | 55.6 33 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test & snms & flipping | 43.5 | 65.9 | 47.5 | 25.1 | 47.7 | 56.6 34 | 35 | 36 | ### **3. COCOPerson results training on MSCOCO2017-trainval and testing on test-dev2017.** 37 | 38 | Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L|mAR@10 39 | :---:|:---:|:---:|:---:|:---:|:---:|:---:|:---: 40 | **RFCN-se-air14-thin-specific**
with ms-train & ohem & multigrid | 21.5 | 48.9 | 16.5 | 12.3 | 27.3 | 30.8 | 28.6 41 | **RFCN-resnet18-specific**
with ms-train & ohem & multigrid | 38.5 | 66.1 | 39.8 | 16.8 | 47.1 | 63.0 | 41.9 42 | **RFCN-se-resnet50-specific**
with 800-scale-train & ohem & multigrid | 39.0 | 64.1 | 41.1 | 13.5 | 48.4 | 66.4 | 43.9 43 | **RFCN-se-resnet50-specific**
with ms-train & ohem & multigrid | 41.9 | 67.7 | 44.3 | 18.6 | 51.0 | 67.9 | 46.0 44 | **RFCN-se-resnet50-specific**
with ms-train & ohem & multigrid & snms & flip & ms-test | 44.6 | 72.8 | 47.3 | 25.3 | 54.4 | 63.3 | 49.8 45 | **RFCN-se-resnet50**
with ms-train & ohem & multigrid | 42.7 | 72.0 | 44.5 | 21.0 | 51.1 | 66.4 | 45.4 46 | **RFCN-se-inception-v2-specific**
with ms-train & ohem & multigrid | 41.2 | 66.7 | 43.2 | 17.6 | 50.0 | 68.3 | 45.1 47 | **RFCN-se-inception-v2**
with ms-train & ohem & multigrid | 42.3 | 71.4 | 44.2 | 19.5 | 50.7 | 67.2 | 44.9 48 | **RFCN-se-inception-v2**
with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & ms-test | 48.0 | 79.5 | 50.0 | 28.3 | 55.8 | 67.5 | 50.8 49 | **RFCN-air101**
with ms-train & ohem & multigrid & deformpsroi & bbox-voting & soft-nms & flipping & assign-ms-test | 54.0 | 83.9 | 58.2 | 35.2 | 61.6 | 73.0 | 55.1 50 | **CPANet-air101**
with ms-train & ohem & multigrid & 600-scale-test | 47.7 | 76.4 | 51.1 | 25.3 | 56.8 | 70.6 | 50.2 51 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test & snms & flipping | 53.4 | 82.7 | 58.0 | 33.1 | 61.8 | 73.3 | 55.0 52 | 53 | -------------------------------------------------------------------------------- /cls/cls_lite/README.md: -------------------------------------------------------------------------------- 1 | ## CLS Lite (Classification lite) 2 | 3 | Please install [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) for evaluating and finetuning. 4 | 5 | 6 | ### Performance of lite models on imagenet validation. 7 | **1. Top-1/5 error and CPU/GPU speed of lite models in this repository.** 8 | 9 | Network|Top-1/5 error|F/B on GPU|F/B on CPU|Source 10 | :---:|:---:|:---:|:---:|:---: 11 | resnet10-1x32d | 44.78/21.42 | 2.19/2.57ms | 42.84/38.00ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 12 | resnet10-1x48d | -- | 2.55/3.01ms | 83.66/75.97ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 13 | resnet10-1x64d | 35.93/14.59 | 2.93/3.86ms | 134.3/124.8ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 14 | resnet10-1x96d | 30.66/11.13 | 3.42/5.57ms | 220.7/204.9ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 15 | resnet18-1x16d | 51.37/26.35 | 3.03/3.22ms | 25.03/22.63ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 16 | resnet18-1x32d | 38.24/16.02 | 3.53/4.14ms | 69.2/63.2ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 17 | resnet18-1x48d | 32.55/11.87 | 4.30/4.83ms | 139.1/127.6ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 18 | resnet18-1x64d
(resnet18-priv) | 29.62/10.38 | 4.48/5.07ms | 213.2/193.3ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 19 | resnet18-1x96d | 26.11/8.31 | 6.16/9.94ms | 443.2/419.0ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 20 | resnet18-1x128d | 24.81/7.61 | 9.75/16.94ms | 729.1/695.4ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 21 | resnext26-32x4d | 25.57/8.12 | 9.68/11.16ms | 331.4/300.2ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 22 | vgg13-pytorch | 31.07/11.13 | 5.70/9.35ms | 1318/1279ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models) 23 | vgg13bn-pytorch | 29.50/10.18 | 8.35/13.49ms | 1443/1336ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models) 24 | vgg16-pytorch | 29.14/10.00 | 6.79/11.78ms | 1684/1643ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models) 25 | vgg16-tf | 29.03/10.12 | 13.04/48.90ms | 1787/1647ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim) 26 | vgg16-dsd | 27.62/9.02 | 6.81/11.80ms | 1753/1660ms | [dsd](https://github.com/songhan/DSD) 27 | vgg16-5x | 31.67/11.60 | 4.46/7.15ms | 580.5/593.0ms | [channel-pruning](https://github.com/yihui-he/channel-pruning) 28 | vgg16-3c4x | 28.79/9.78 | 7.53/9.77ms | 753.4/772.4ms | [channel-pruning](https://github.com/yihui-he/channel-pruning) 29 | vgg16bn-pytorch | 27.53/8.99 | 9.14/15.83ms | 1783/1695ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models) 30 | vgg19-pytorch | 28.23/9.60 | 8.03/14.26ms | 2076/2012ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models) 31 | vgg19bn-pytorch | 26.58/8.45 | 10.75/18.77ms | 2224/2081ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models) 32 | inception-v1-tf | 31.37/11.10 | 10.66/7.84ms | 186.2/155.8ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim) 33 | inception-v2-tf | 27.91/9.40 | 13.93/10.65ms | 286.4/255.0ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim) 34 | xception | 20.90/5.49 | 15.21/31.65ms | 1262/1253ms | [keras-models](https://github.com/fchollet/deep-learning-models) 35 | mobilenet-v1-1.0 | 29.98/10.52 | 6.16/9.50ms | 169.4/138.1ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim) 36 | air14-1x8d | 56.28/31.25 | 4.28/3.08ms | 21.01/3.29ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 37 | air14-1x16d | 44.23/20.68 | 5.13/3.56ms | 45.45/6.41ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 38 | air26-1x16d | 36.31/14.59 | 7.32/4.70ms | 62.02/8.52ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 39 | air26-1x32d | 28.71/9.59 | 8.77/5.05ms | 170.7/19.25ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 40 | air50-1x16d | 31.19/11.26 | 14.73/8.31ms | 91.65/16.06ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 41 | air50-1x32d | 25.59/7.89 | 15.39/7.64ms | 229.6/22.81ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification) 42 | dpn68 | 22.56/6.24 | 22.70/21.41ms | 371.1/329.3ms | [DPNs](https://github.com/cypw/DPNs) 43 | se-resnet50 | 22.39/6.37 | 17.91/19.49ms | 932.2/821.4ms | [senet](https://github.com/hujie-frank/SENet) 44 | se-resnet50-hik | 21.98/5.80 | 17.43/20.13ms | 581.1/482.7ms | [senet-caffe](https://github.com/shicai/SENet-Caffe) 45 | se-inception-v2 | 23.64/7.04 | 15.31/11.21ms | 251.9/218.5ms | [senet](https://github.com/hujie-frank/SENet) 46 | -------------------------------------------------------------------------------- /det/faster_rcnn/models/pascal_voc/2007test400.txt: -------------------------------------------------------------------------------- 1 | 000001 2 | 000002 3 | 000003 4 | 000004 5 | 000006 6 | 000008 7 | 000010 8 | 000011 9 | 000013 10 | 000014 11 | 000015 12 | 000018 13 | 000022 14 | 000025 15 | 000027 16 | 000028 17 | 000029 18 | 000031 19 | 000037 20 | 000038 21 | 000040 22 | 000043 23 | 000045 24 | 000049 25 | 000053 26 | 000054 27 | 000055 28 | 000056 29 | 000057 30 | 000058 31 | 000059 32 | 000062 33 | 000067 34 | 000068 35 | 000069 36 | 000070 37 | 000071 38 | 000074 39 | 000075 40 | 000076 41 | 000079 42 | 000080 43 | 000082 44 | 000084 45 | 000085 46 | 000086 47 | 000087 48 | 000088 49 | 000090 50 | 000092 51 | 000094 52 | 000096 53 | 000097 54 | 000098 55 | 000100 56 | 000103 57 | 000105 58 | 000106 59 | 000108 60 | 000111 61 | 000114 62 | 000115 63 | 000116 64 | 000119 65 | 000124 66 | 000126 67 | 000127 68 | 000128 69 | 000135 70 | 000136 71 | 000137 72 | 000139 73 | 000144 74 | 000145 75 | 000148 76 | 000149 77 | 000151 78 | 000152 79 | 000155 80 | 000157 81 | 000160 82 | 000166 83 | 000167 84 | 000168 85 | 000172 86 | 000175 87 | 000176 88 | 000178 89 | 000179 90 | 000181 91 | 000182 92 | 000183 93 | 000185 94 | 000186 95 | 000188 96 | 000191 97 | 000195 98 | 000196 99 | 000197 100 | 000199 101 | 000201 102 | 000202 103 | 000204 104 | 000205 105 | 000206 106 | 000212 107 | 000213 108 | 000216 109 | 000217 110 | 000223 111 | 000226 112 | 000227 113 | 000230 114 | 000231 115 | 000234 116 | 000237 117 | 000238 118 | 000239 119 | 000240 120 | 000243 121 | 000247 122 | 000248 123 | 000252 124 | 000253 125 | 000254 126 | 000255 127 | 000258 128 | 000260 129 | 000261 130 | 000264 131 | 000265 132 | 000267 133 | 000271 134 | 000272 135 | 000273 136 | 000274 137 | 000277 138 | 000279 139 | 000280 140 | 000281 141 | 000283 142 | 000284 143 | 000286 144 | 000287 145 | 000290 146 | 000291 147 | 000292 148 | 000293 149 | 000295 150 | 000297 151 | 000299 152 | 000300 153 | 000301 154 | 000309 155 | 000310 156 | 000313 157 | 000314 158 | 000315 159 | 000316 160 | 000319 161 | 000324 162 | 000326 163 | 000327 164 | 000330 165 | 000333 166 | 000335 167 | 000339 168 | 000341 169 | 000342 170 | 000345 171 | 000346 172 | 000348 173 | 000350 174 | 000351 175 | 000353 176 | 000356 177 | 000357 178 | 000358 179 | 000360 180 | 000361 181 | 000362 182 | 000364 183 | 000365 184 | 000366 185 | 000368 186 | 000369 187 | 000371 188 | 000375 189 | 000376 190 | 000377 191 | 000378 192 | 000383 193 | 000384 194 | 000385 195 | 000386 196 | 000388 197 | 000389 198 | 000390 199 | 000392 200 | 000393 201 | 000397 202 | 000398 203 | 000399 204 | 000401 205 | 000402 206 | 000405 207 | 000409 208 | 000410 209 | 000412 210 | 000413 211 | 000414 212 | 000415 213 | 000418 214 | 000421 215 | 000422 216 | 000423 217 | 000425 218 | 000426 219 | 000429 220 | 000432 221 | 000434 222 | 000436 223 | 000437 224 | 000440 225 | 000441 226 | 000442 227 | 000444 228 | 000445 229 | 000447 230 | 000449 231 | 000451 232 | 000452 233 | 000453 234 | 000455 235 | 000456 236 | 000457 237 | 000458 238 | 000465 239 | 000466 240 | 000467 241 | 000471 242 | 000472 243 | 000473 244 | 000475 245 | 000478 246 | 000479 247 | 000481 248 | 000485 249 | 000487 250 | 000488 251 | 000490 252 | 000493 253 | 000495 254 | 000497 255 | 000502 256 | 000504 257 | 000505 258 | 000506 259 | 000507 260 | 000510 261 | 000511 262 | 000512 263 | 000517 264 | 000521 265 | 000527 266 | 000529 267 | 000532 268 | 000533 269 | 000534 270 | 000536 271 | 000538 272 | 000539 273 | 000542 274 | 000546 275 | 000547 276 | 000548 277 | 000551 278 | 000553 279 | 000556 280 | 000557 281 | 000558 282 | 000560 283 | 000561 284 | 000562 285 | 000566 286 | 000567 287 | 000568 288 | 000569 289 | 000570 290 | 000571 291 | 000572 292 | 000573 293 | 000574 294 | 000575 295 | 000576 296 | 000578 297 | 000580 298 | 000584 299 | 000585 300 | 000586 301 | 000587 302 | 000593 303 | 000594 304 | 000595 305 | 000596 306 | 000600 307 | 000602 308 | 000603 309 | 000604 310 | 000606 311 | 000607 312 | 000611 313 | 000614 314 | 000615 315 | 000616 316 | 000617 317 | 000618 318 | 000621 319 | 000623 320 | 000624 321 | 000627 322 | 000629 323 | 000630 324 | 000631 325 | 000634 326 | 000636 327 | 000638 328 | 000639 329 | 000640 330 | 000641 331 | 000642 332 | 000643 333 | 000644 334 | 000646 335 | 000649 336 | 000650 337 | 000651 338 | 000652 339 | 000655 340 | 000658 341 | 000659 342 | 000662 343 | 000664 344 | 000665 345 | 000666 346 | 000668 347 | 000669 348 | 000670 349 | 000673 350 | 000674 351 | 000678 352 | 000679 353 | 000681 354 | 000683 355 | 000687 356 | 000691 357 | 000692 358 | 000693 359 | 000696 360 | 000697 361 | 000698 362 | 000701 363 | 000703 364 | 000704 365 | 000706 366 | 000708 367 | 000715 368 | 000716 369 | 000718 370 | 000719 371 | 000721 372 | 000722 373 | 000723 374 | 000724 375 | 000725 376 | 000727 377 | 000732 378 | 000734 379 | 000735 380 | 000736 381 | 000737 382 | 000741 383 | 000743 384 | 000744 385 | 000745 386 | 000747 387 | 000749 388 | 000751 389 | 000757 390 | 000758 391 | 000759 392 | 000762 393 | 000765 394 | 000766 395 | 000769 396 | 000773 397 | 000775 398 | 000778 399 | 000779 400 | 000781 -------------------------------------------------------------------------------- /det/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Object Detection 3 | 4 | **We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) 5 | we are releasing the training code and files, the models and more experiments will come soon.** 6 | 7 | ### Object Detection Performance on PASCAL VOC. ([More experiments](https://github.com/soeaver/caffe-model/blob/master/det/VOC_Benchmark.md)) 8 | 9 | #### **1. Original Faster-RCNN training on VOC 2007+2012 trainval and testing on VOC 2007 test.** 10 | 11 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory 12 | :---:|:---:|:---:|:---:|:---:|:---: 13 | resnet18 | 70.02 | 9.5 img/s | 1,235MB | 17.5 img/s | 989MB 14 | resnet101-v2| 79.6 | 3.1 img/s | 6,495MB | 7.1 img/s | 4,573MB 15 | wrn50-2| 78.59 | 2.1 img/s | 4,895MB | 4.9 img/s | 3,499MB 16 | resnext50-32x4d| 77.99 | 3.6 img/s | 5,315MB | 7.4 img/s | 4,305MB 17 | resnext101-32x4d| 79.98 | 2.7 img/s | 7,836MB | 6.3 img/s | 5,705MB 18 | inception-v4| 81.49 | 2.6 img/s | 6,759MB | 5.4 img/s | 4,683MB 19 | inception-resnet-v2| 80.0 | 2.0 img/s
(batch=112) | 11,497MB | 3.2 img/s | 8,409MB 20 | air101| 81.0 | 2.4 img/s | 7,747MB | 5.1 img/s | 5,777MB 21 | 22 | - To reduce memory usage, we merge all the models batchnorm layer parameters into scale layer, more details please refer to [faster-rcnn-resnet](https://github.com/Eniac-Xie/faster-rcnn-resnet#modification) or [pva-faster-rcnn](https://github.com/sanghoon/pva-faster-rcnn/blob/master/tools/gen_merged_model.py); 23 | - We also split the deploy file to rpn deploy file and rcnn deploy file for adopting more testing tricks. 24 | - Performanc, speed and memory are calculated on [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) with Nvidia Titan pascal, we do not guarantee that the results can be reproduced under any other conditions; 25 | - All the models are trained on a single scale (600*1000) with image flipping and train-batch=128 for 80,000 iterations, tested on the same single scale with test-batch=300 and nms=0.3; 26 | 27 | 28 | #### **2. Faster-RCNN-2fc-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.** 29 | 30 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory 31 | :---:|:---:|:---:|:---:|:---:|:---: 32 | se-inception-v2 | (77.57) | 9.4 img/s | 2,453MB | 15.9 img/s | 1,573MB 33 | se-resnet50 | (79.73) | 6.2 img/s | 4,129MB | 12.8 img/s | 2,175MB 34 | resnet101-v2 | 80.6(80.49) | 5.0 img/s | 5,833MB | 10.5 img/s | 3,147MB 35 | air101 | (81.47) | 3.4 img/s | 6,653MB | 8.7 img/s | 4,503MB 36 | inception-v4-3x3 | 81.12(81.30) | 3.73 img/s | 5,383MB | 10.1 img/s | 3,217MB 37 | 38 | - 2fc means: conv256d --- fc1024d --- fc1024d; 39 | - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587); 40 | 41 | 42 | #### **3. RFCN-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.** 43 | 44 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory 45 | :---:|:---:|:---:|:---:|:---:|:---: 46 | resnet18 | 71.82 | 14.3 img/s | 1,215MB | 23.4 img/s | 899MB 47 | se-inception-v2| (78.23) | 10.2 img/s | 2,303MB | 14.0 img/s | 1,567MB 48 | se-resnet50 | (79.19) | 6.3 img/s | 3.999MB | 11.7 img/s | 2,205MB 49 | resnet101-v2| 78.93(79.9) | 4.9 img/s | 5,719MB | 10.4 img/s | 3,097MB 50 | resnext101-32x4d| 79.98(80.35) | 3.8 img/s | 6,977MB | 8.8 img/s | 4,761M 51 | air101| 79.42(80.93) | 3.4 img/s | 6,525MB | 8.5 img/s | 4,477MB 52 | inception-v4| 80.2 | 4.1 img/s | 4,371MB | 10.3 img/s | 2,343MB 53 | 54 | - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587); 55 | 56 | 57 | ### Object Detection Performance on MSCOCO. ([More experiments](https://github.com/soeaver/caffe-model/blob/master/det/MSCOCO_Benchmark.md)) 58 | 59 | #### **1. Results training on MSCOCO2017-trainval and testing on test-dev2017.** 60 | 61 | Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L 62 | :---:|:---:|:---:|:---:|:---:|:---:|:---: 63 | **RFCN-se-inception-v2**
with ms-train & ohem & multigrid | 32.6 | 53.6 | 34.5 | 12.5 | 35.1 | 48.4 64 | **RFCN-se-resnet50**
with ms-train & ohem & multigrid | 32.9 | 54.4 | 34.8 | 13.0 | 35.3 | 48.1 65 | **RFCN-air101**
with ms-train & ohem & multigrid | 38.2 | 60.1 | 41.2 | 18.2 | 41.9 | 53.0 66 | **Faster-2fc-air101**
with ms-train & ohem & multigrid | 36.5 | 60.4 | 38.1 | 15.5 | 39.5 | 53.5 67 | 68 | - All the models are test on a single scale (600*1000) without any bells and whistles; 69 | 70 | 71 | #### **2. Context Pyramid Attention Network (CPANet) results training on MSCOCO2017-trainval and testing on test-dev2017.** 72 | 73 | Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L 74 | :---:|:---:|:---:|:---:|:---:|:---:|:---: 75 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test | 41.9 | 64.8 | 45.5 | 24.0 | 45.9 | 54.6 76 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test & snms | 42.7 | 65.4 | 46.7 | 24.6 | 46.8 | 55.6 77 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test & snms & flipping | 43.5 | 65.9 | 47.5 | 25.1 | 47.7 | 56.6 78 | -------------------------------------------------------------------------------- /det/VOC_Benchmark.md: -------------------------------------------------------------------------------- 1 | ## VOC Detection Benchmark 2 | 3 | **We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)** 4 | 5 | ### **1. Original Faster-RCNN training on VOC 2007+2012 trainval and testing on VOC 2007 test.** 6 | 7 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory 8 | :---:|:---:|:---:|:---:|:---:|:---: 9 | resnet18 | 70.02 | 9.5 img/s | 1,235MB | 17.5 img/s | 989MB 10 | resnet101-v2| 79.6 | 3.1 img/s | 6,495MB | 7.1 img/s | 4,573MB 11 | resnet152-v2| 80.72 | 2.8 img/s | 9,315MB | 6.2 img/s | 6,021MB 12 | wrn50-2| 78.59 | 2.1 img/s | 4,895MB | 4.9 img/s | 3,499MB 13 | resnext50-32x4d| 77.99 | 3.6 img/s | 5,315MB | 7.4 img/s | 4,305MB 14 | resnext101-32x4d| 79.98 | 2.7 img/s | 7,836MB | 6.3 img/s | 5,705MB 15 | resnext101-64x4d| 80.71 | 2.0 img/s
(batch=96) | 11,277MB | 3.7 img/s | 9,461MB 16 | inception-v3| 78.6 | 4.1 img/s | 4,325MB | 7.3 img/s | 3,445MB 17 | xception| 76.6 | 3.3 img/s | 7,341MB | 7.8 img/s | 2,979MB 18 | inception-v4| 81.49 | 2.6 img/s | 6,759MB | 5.4 img/s | 4,683MB 19 | inception-resnet-v2| 80.0 | 2.0 img/s
(batch=112) | 11,497MB | 3.2 img/s | 8,409MB 20 | densenet-201| 77.53 | 3.9 img/s
(batch=72) | 10,073MB | 5.5 img/s | 9,955MB 21 | resnet38a| 80.1 | 1.4 img/s | 8,723MB | 3.4 img/s | 5,501MB 22 | air101| 81.0 | 2.4 img/s | 7,747MB | 5.1 img/s | 5,777MB 23 | 24 | - To reduce memory usage, we merge all the models batchnorm layer parameters into scale layer, more details please refer to [faster-rcnn-resnet](https://github.com/Eniac-Xie/faster-rcnn-resnet#modification) or [pva-faster-rcnn](https://github.com/sanghoon/pva-faster-rcnn/blob/master/tools/gen_merged_model.py); 25 | - We also split the deploy file to rpn deploy file and rcnn deploy file for adopting more testing tricks. 26 | - Performanc, speed and memory are calculated on [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) with Nvidia Titan pascal, we do not guarantee that the results can be reproduced under any other conditions; 27 | - All the models are trained on a single scale (600*1000) with image flipping and train-batch=128 for 80,000 iterations, tested on the same single scale with test-batch=300 and nms=0.3; 28 | 29 | 30 | **Comparisons on VOC 2007 test using Faster-RCNN with inception-v4.** 31 | 32 | Method|mAP@50| improvment |test speed 33 | :---|:---:|:---:|:---: 34 | baseline inception-v4 | 81.49 | -- | 5.4 img/s 35 |  +multi-scale training | 83.79 | 2.30 | 5.4 img/s 36 |  +box voting | 83.95 | 0.16 | 5.4 img/s 37 |  +nms=0.4 | 84.22 | 0.27 | 5.4 img/s 38 |  +image flipping test | 84.54 | 0.32 | 2.7 img/s 39 |  +multi-scale testing | 85.78 | 1.24 | 0.13 img/s 40 | 41 | - The SCALES for multi-scale training is (200, 400, 600, 800, 1000) and MAX_SIZE is 1666; 42 | - For multi-scale training, we double the training iterations (160000 for VOC0712trainval); 43 | - The SCALES for multi-scale testing is (400, 600, 800, 1000, 1200) and MAX_SIZE is 2000; 44 | 45 | ### **2. Faster-RCNN-2fc-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.** 46 | 47 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory 48 | :---:|:---:|:---:|:---:|:---:|:---: 49 | se-inception-v2 | (77.57) | 9.4 img/s | 2,453MB | 15.9 img/s | 1,573MB 50 | se-resnet50 | (79.73) | 6.2 img/s | 4,129MB | 12.8 img/s | 2,175MB 51 | resnet101-v2 w/o OHEM | 80.18 | 5.4 img/s | 5,807MB | 10.5 img/s | 3,147MB 52 | resnet101-v2 | 80.6(80.49) | 5.0 img/s | 5,833MB | 10.5 img/s | 3,147MB 53 | air101 | (81.47) | 3.4 img/s | 6,653MB | 8.7 img/s | 4,503MB 54 | air101-context | (82.09) | 3.3 img/s | 6,773MB | 8.6 img/s | 4,577MB 55 | air101-fpn w/o OHEM | 81.44 | 2.4 img/s | 7,063MB | 3.8 img/s | 4,433MB 56 | inception-v4-3x3 | 81.12(81.30) | 3.73 img/s | 5,383MB | 10.1 img/s | 3,217MB 57 | 58 | - 2fc means: conv256d --- fc1024d --- fc1024d; 59 | - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587); 60 | 61 | 62 | ### **3. RFCN-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.** 63 | 64 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory 65 | :---:|:---:|:---:|:---:|:---:|:---: 66 | resnet18 | 71.82 | 14.3 img/s | 1,215MB | 23.4 img/s | 899MB 67 | resnext26-32x4d| 72.07 | 7.5 img/s | 2,521MB | 15.0 img/s | 1,797MB 68 | se-inception-v2| (78.23) | 10.2 img/s | 2,303MB | 14.0 img/s | 1,567MB 69 | se-resnet50 | (79.19) | 6.3 img/s | 3.999MB | 11.7 img/s | 2,205MB 70 | resnet101-v2| 78.93(79.9) | 4.9 img/s | 5,719MB | 10.4 img/s | 3,097MB 71 | resnext101-32x4d| 79.98(80.35) | 3.8 img/s | 6,977MB | 8.8 img/s | 4,761MB 72 | resnext101-64x4d| 80.26(79.88) | 2.4 img/s | 10,203MB | 6.2 img/s | 8,529MB 73 | air101| 79.42(80.93) | 3.4 img/s | 6,525MB | 8.5 img/s | 4,477MB 74 | air152| (81.18) | 2.6 img/s | 9,331MB | 6.7 img/s | 6,151MB 75 | inception-v4| 80.2 | 4.1 img/s | 4,371MB | 10.3 img/s | 2,343MB 76 | inception-v4-3x3 | 81.15 | 3.7 img/s | 5,207MB | 9.5 img/s | 3,151MB 77 | 78 | - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587); 79 | 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Caffe-model 2 | Caffe models (include classification, detection and segmentation) and deploy prototxt for resnet, resnext, inception_v3, inception_v4, inception_resnet, wider_resnet, densenet, aligned-inception-resne(x)t, DPNs and other networks. 3 | 4 | Clone the caffe-model repository 5 | ```Shell 6 | git clone https://github.com/soeaver/caffe-model --recursive 7 | ``` 8 | 9 | ## We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) 10 | Please install [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) for evaluating and finetuning. 11 | 12 | ## Disclaimer 13 | 14 | Most of the pre-train models are converted from other projects, the main contribution belongs to the original authors. 15 | 16 | Project links: 17 | 18 | [mxnet-model-gallery](https://github.com/dmlc/mxnet-model-gallery)、 [tensorflow slim](https://github.com/tensorflow/models/tree/master/slim)、 [craftGBD](https://github.com/craftGBD/craftGBD)、 [ResNeXt](https://github.com/facebookresearch/ResNeXt)、 [DenseNet](https://github.com/liuzhuang13/DenseNet)、 [wide-residual-networks](https://github.com/szagoruyko/wide-residual-networks)、 [keras deep-learning-models](https://github.com/fchollet/deep-learning-models)、 [ademxapp](https://github.com/itijyou/ademxapp)、 [DPNs](https://github.com/cypw/DPNs)、[Senet](https://github.com/hujie-frank/SENet) 19 | 20 | 21 | ## CLS (Classification, more details are in [cls](https://github.com/soeaver/caffe-model/tree/master/cls)) 22 | ### Performance on imagenet validation. 23 | **Top-1/5 error of pre-train models in this repository (Pre-train models download [urls](https://github.com/soeaver/caffe-model/tree/master/cls#performance-on-imagenet-validation)).** 24 | 25 | Network|224/299
(single-crop)|224/299
(12-crop)|320/395
(single-crop)|320/395
(12-crop) 26 | :---:|:---:|:---:|:---:|:---: 27 | resnet101-v2| 21.95/6.12 | 19.99/5.04 | 20.37/5.16 | 19.29/4.57 28 | resnet152-v2| 20.85/5.42 | 19.24/4.68 | 19.66/4.73 | 18.84/4.32 29 | resnet269-v2| 19.71/5.00 | **18.25**/4.20 | 18.70/4.33 | **17.87**/3.85 30 | inception-v3| 21.67/5.75 | 19.60/4.73 | 20.10/4.82 | 19.25/4.24 31 | xception| 20.90/5.49 | 19.68/4.90 | 19.58/4.77 | 18.91/4.39 32 | inception-v4| 20.03/5.09 | 18.60/4.30 | 18.68/4.32 |18.12/3.92 33 | inception-resnet-v2| 19.86/**4.83** | 18.46/**4.08** | 18.75/**4.02** | 18.15/**3.71** 34 | resnext50-32x4d| 22.37/6.31 | 20.53/5.35 | 21.10/5.53 | 20.37/5.03 35 | resnext101-32x4d| 21.30/5.79 | 19.47/4.89 | 19.91/4.97 | 19.19/4.59 36 | resnext101-64x4d| 20.60/5.41 | 18.88/4.59 | 19.26/4.63 | 18.48/4.31 37 | wrn50-2
(resnet50-1x128d)| 22.13/6.13 | 20.09/5.06 | 20.68/5.28 | 19.83/4.87 38 | air101| 21.32/5.76 | 19.36/4.84 | 19.92/4.75 | 19.05/4.43 39 | dpn-92| 20.81/5.47 | 18.99/4.59 | 19.23/4.64 | 18.68/4.24 40 | dpn-107| 19.70/5.06 | ../.. | 18.41/4.25 | ../.. 41 | 42 | 43 | ## DET (Detection, more details are in [det](https://github.com/soeaver/caffe-model/tree/master/det)) 44 | ### Object Detection Performance on PASCAL VOC. 45 | **Original faster rcnn train on VOC 2007+2012 trainval and test on VOC 2007 test.** 46 | 47 | Network|mAP@50|train speed|train memory|test speed|test memory 48 | :---:|:---:|:---:|:---:|:---:|:---: 49 | resnet18 | 70.02 | 9.5 img/s | 1,235MB | 17.5 img/s | 989MB 50 | resnet101-v2| 79.6 | 3.1 img/s | 6,495MB | 7.1 img/s | 4,573MB 51 | resnet152-v2| 80.72 | 2.8 img/s | 9,315MB | 6.2 img/s | 6,021MB 52 | wrn50-2| 78.59 | 2.1 img/s | 4,895MB | 4.9 img/s | 3,499MB 53 | resnext50-32x4d| 77.99 | 3.6 img/s | 5,315MB | 7.4 img/s | 4,305MB 54 | resnext101-32x4d| 79.98 | 2.7 img/s | 7,836MB | 6.3 img/s | 5,705MB 55 | resnext101-64x4d| 80.71 | 2.0 img/s
(batch=96) | 11,277MB | 3.7 img/s | 9,461MB 56 | inception-v3| 78.6 | 4.1 img/s | 4,325MB | 7.3 img/s | 3,445MB 57 | inception-v4| 81.49 | 2.6 img/s | 6,759MB | 5.4 img/s | 4,683MB 58 | inception-resnet-v2| 80.0 | 2.0 img/s
(batch=112) | 11,497MB | 3.2 img/s | 8,409MB 59 | densenet-201| 77.53 | 3.9 img/s
(batch=72) | 10,073MB | 5.5 img/s | 9,955MB 60 | resnet38a| 80.1 | 1.4 img/s | 8,723MB | 3.4 img/s | 5,501MB 61 | 62 | 63 | ## SEG (Segmentation, more details are in [seg](https://github.com/soeaver/caffe-model/tree/master/seg)) 64 | ### Object Segmentation Performance on PASCAL VOC. 65 | **PSPNet training on [SBD](http://home.bharathh.info/pubs/pdfs/BharathICCV2011.pdf) (10,582 images) and testing on VOC 2012 validation (1,449 images).** 66 | 67 | Network|mIoU(%)|pixel acc(%)|training
speed|training
memory|testing
speed|testing
memory 68 | :---:|:---:|:---:|:---:|:---:|:---:|:---: 69 | resnet101-v2| 77.94 | 94.94 | 1.6 img/s | 8,023MB | 3.0 img/s | 4,071MB 70 | resnet101-v2-selu| 77.10 | 94.80 | 1.6 img/s | 8,017MB | 3.0 img/s | 4,065MB 71 | resnext101-32x4d| 77.79 | 94.92 | 1.3 img/s | 8,891MB | 2.6 img/s | 5,241MB 72 | air101| 77.64 | 94.93 | 1.3 img/s | 10,017MB | 2.5 img/s | 5,241MB 73 | inception-v4| 77.58 | 94.83 | -- img/s | --MB | -- img/s | --MB 74 | 75 | 76 | ## License 77 | 78 | caffe-model is released under the MIT License (refer to the LICENSE file for details). 79 | 80 | 81 | ## Acknowlegement 82 | 83 | I greatly thank [Yangqing Jia](https://github.com/Yangqing) and [BVLC group](https://www.github.com/BVLC/caffe) for developing Caffe. 84 | 85 | And I would like to thank all the authors of every network. 86 | -------------------------------------------------------------------------------- /det/faster_rcnn/models/pascal_voc/resnet18/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet18-priv-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "res4b" 2 | input_shape { 3 | dim: 1 4 | dim: 256 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rois" 10 | input_shape { 11 | dim: 300 12 | dim: 5 13 | } 14 | 15 | #============== RCNN =============== 16 | layer { 17 | name: "roi_pool" 18 | type: "ROIPooling" 19 | bottom: "res4b" 20 | bottom: "rois" 21 | top: "roi_pool" 22 | roi_pooling_param { 23 | pooled_w: 14 24 | pooled_h: 14 25 | spatial_scale: 0.062500 26 | } 27 | } 28 | layer { 29 | bottom: "roi_pool" 30 | top: "res5a_branch1" 31 | name: "res5a_branch1" 32 | type: "Convolution" 33 | param { 34 | lr_mult: 1 35 | decay_mult: 1 36 | } 37 | convolution_param { 38 | num_output: 512 39 | kernel_size: 1 40 | pad: 0 41 | stride: 2 42 | bias_term: false 43 | } 44 | } 45 | layer { 46 | bottom: "res5a_branch1" 47 | top: "res5a_branch1" 48 | name: "scale5a_branch1" 49 | type: "Scale" 50 | scale_param { 51 | bias_term: true 52 | } 53 | param { 54 | lr_mult: 0.0 55 | decay_mult: 0.0 56 | } 57 | param { 58 | lr_mult: 0.0 59 | decay_mult: 0.0 60 | } 61 | } 62 | layer { 63 | bottom: "roi_pool" 64 | top: "res5a_branch2a" 65 | name: "res5a_branch2a" 66 | type: "Convolution" 67 | param { 68 | lr_mult: 1 69 | decay_mult: 1 70 | } 71 | convolution_param { 72 | num_output: 512 73 | kernel_size: 3 74 | pad: 1 75 | stride: 2 76 | bias_term: false 77 | } 78 | } 79 | layer { 80 | bottom: "res5a_branch2a" 81 | top: "res5a_branch2a" 82 | name: "scale5a_branch2a" 83 | type: "Scale" 84 | scale_param { 85 | bias_term: true 86 | } 87 | param { 88 | lr_mult: 0.0 89 | decay_mult: 0.0 90 | } 91 | param { 92 | lr_mult: 0.0 93 | decay_mult: 0.0 94 | } 95 | } 96 | layer { 97 | bottom: "res5a_branch2a" 98 | top: "res5a_branch2a" 99 | name: "res5a_branch2a_relu" 100 | type: "ReLU" 101 | } 102 | layer { 103 | bottom: "res5a_branch2a" 104 | top: "res5a_branch2b" 105 | name: "res5a_branch2b" 106 | type: "Convolution" 107 | param { 108 | lr_mult: 1 109 | decay_mult: 1 110 | } 111 | convolution_param { 112 | num_output: 512 113 | kernel_size: 3 114 | pad: 1 115 | stride: 1 116 | bias_term: false 117 | } 118 | } 119 | layer { 120 | bottom: "res5a_branch2b" 121 | top: "res5a_branch2b" 122 | name: "scale5a_branch2b" 123 | type: "Scale" 124 | scale_param { 125 | bias_term: true 126 | } 127 | param { 128 | lr_mult: 0.0 129 | decay_mult: 0.0 130 | } 131 | param { 132 | lr_mult: 0.0 133 | decay_mult: 0.0 134 | } 135 | } 136 | layer { 137 | bottom: "res5a_branch1" 138 | bottom: "res5a_branch2b" 139 | top: "res5a" 140 | name: "res5a" 141 | type: "Eltwise" 142 | } 143 | layer { 144 | bottom: "res5a" 145 | top: "res5a" 146 | name: "res5a_relu" 147 | type: "ReLU" 148 | } 149 | layer { 150 | bottom: "res5a" 151 | top: "res5b_branch2a" 152 | name: "res5b_branch2a" 153 | type: "Convolution" 154 | param { 155 | lr_mult: 1 156 | decay_mult: 1 157 | } 158 | convolution_param { 159 | num_output: 512 160 | kernel_size: 3 161 | pad: 1 162 | stride: 1 163 | bias_term: false 164 | } 165 | } 166 | layer { 167 | bottom: "res5b_branch2a" 168 | top: "res5b_branch2a" 169 | name: "scale5b_branch2a" 170 | type: "Scale" 171 | scale_param { 172 | bias_term: true 173 | } 174 | param { 175 | lr_mult: 0.0 176 | decay_mult: 0.0 177 | } 178 | param { 179 | lr_mult: 0.0 180 | decay_mult: 0.0 181 | } 182 | } 183 | layer { 184 | bottom: "res5b_branch2a" 185 | top: "res5b_branch2a" 186 | name: "res5b_branch2a_relu" 187 | type: "ReLU" 188 | } 189 | layer { 190 | bottom: "res5b_branch2a" 191 | top: "res5b_branch2b" 192 | name: "res5b_branch2b" 193 | type: "Convolution" 194 | param { 195 | lr_mult: 1 196 | decay_mult: 1 197 | } 198 | convolution_param { 199 | num_output: 512 200 | kernel_size: 3 201 | pad: 1 202 | stride: 1 203 | bias_term: false 204 | } 205 | } 206 | layer { 207 | bottom: "res5b_branch2b" 208 | top: "res5b_branch2b" 209 | name: "scale5b_branch2b" 210 | type: "Scale" 211 | scale_param { 212 | bias_term: true 213 | } 214 | param { 215 | lr_mult: 0.0 216 | decay_mult: 0.0 217 | } 218 | param { 219 | lr_mult: 0.0 220 | decay_mult: 0.0 221 | } 222 | } 223 | layer { 224 | bottom: "res5a" 225 | bottom: "res5b_branch2b" 226 | top: "res5b" 227 | name: "res5b" 228 | type: "Eltwise" 229 | } 230 | layer { 231 | bottom: "res5b" 232 | top: "res5b" 233 | name: "res5b_relu" 234 | type: "ReLU" 235 | } 236 | layer { 237 | bottom: "res5b" 238 | top: "pool5" 239 | name: "pool5" 240 | type: "Pooling" 241 | pooling_param { 242 | global_pooling: true 243 | pool: AVE 244 | } 245 | } 246 | layer { 247 | name: "cls_score" 248 | type: "InnerProduct" 249 | bottom: "pool5" 250 | top: "cls_score" 251 | param { 252 | lr_mult: 1 253 | decay_mult: 1 254 | } 255 | param { 256 | lr_mult: 2 257 | decay_mult: 0 258 | } 259 | inner_product_param { 260 | num_output: 21 261 | weight_filler { 262 | type: "msra" 263 | std: 0.01 264 | } 265 | bias_filler { 266 | type: "constant" 267 | value: 0 268 | } 269 | } 270 | } 271 | layer { 272 | name: "bbox_pred" 273 | type: "InnerProduct" 274 | bottom: "pool5" 275 | top: "bbox_pred" 276 | param { 277 | lr_mult: 1 278 | decay_mult: 1 279 | } 280 | param { 281 | lr_mult: 2 282 | decay_mult: 0 283 | } 284 | inner_product_param { 285 | num_output: 84 286 | weight_filler { 287 | type: "msra" 288 | std: 0.01 289 | } 290 | bias_filler { 291 | type: "constant" 292 | value: 0 293 | } 294 | } 295 | } 296 | layer { 297 | name: "cls_prob" 298 | type: "Softmax" 299 | bottom: "cls_score" 300 | top: "cls_prob" 301 | } 302 | 303 | 304 | -------------------------------------------------------------------------------- /cls/vgg/deploy_vgg13-pytorch.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 224 6 | dim: 224 7 | } 8 | 9 | layer { 10 | name: "conv1_1" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv1_1" 14 | convolution_param { 15 | bias_term: true 16 | num_output: 64 17 | pad: 1 18 | kernel_size: 3 19 | stride: 1 20 | } 21 | } 22 | 23 | layer { 24 | name: "relu1_1" 25 | type: "ReLU" 26 | bottom: "conv1_1" 27 | top: "conv1_1" 28 | } 29 | 30 | layer { 31 | name: "conv1_2" 32 | type: "Convolution" 33 | bottom: "conv1_1" 34 | top: "conv1_2" 35 | convolution_param { 36 | bias_term: true 37 | num_output: 64 38 | pad: 1 39 | kernel_size: 3 40 | stride: 1 41 | } 42 | } 43 | 44 | layer { 45 | name: "relu1_2" 46 | type: "ReLU" 47 | bottom: "conv1_2" 48 | top: "conv1_2" 49 | } 50 | 51 | layer { 52 | name: "pool1" 53 | type: "Pooling" 54 | bottom: "conv1_2" 55 | top: "pool1" 56 | pooling_param { 57 | pool: MAX 58 | kernel_size: 2 59 | stride: 2 60 | } 61 | } 62 | 63 | layer { 64 | name: "conv2_1" 65 | type: "Convolution" 66 | bottom: "pool1" 67 | top: "conv2_1" 68 | convolution_param { 69 | bias_term: true 70 | num_output: 128 71 | pad: 1 72 | kernel_size: 3 73 | stride: 1 74 | } 75 | } 76 | 77 | layer { 78 | name: "relu2_1" 79 | type: "ReLU" 80 | bottom: "conv2_1" 81 | top: "conv2_1" 82 | } 83 | 84 | layer { 85 | name: "conv2_2" 86 | type: "Convolution" 87 | bottom: "conv2_1" 88 | top: "conv2_2" 89 | convolution_param { 90 | bias_term: true 91 | num_output: 128 92 | pad: 1 93 | kernel_size: 3 94 | stride: 1 95 | } 96 | } 97 | 98 | layer { 99 | name: "relu2_2" 100 | type: "ReLU" 101 | bottom: "conv2_2" 102 | top: "conv2_2" 103 | } 104 | 105 | layer { 106 | name: "pool2" 107 | type: "Pooling" 108 | bottom: "conv2_2" 109 | top: "pool2" 110 | pooling_param { 111 | pool: MAX 112 | kernel_size: 2 113 | stride: 2 114 | } 115 | } 116 | 117 | layer { 118 | name: "conv3_1" 119 | type: "Convolution" 120 | bottom: "pool2" 121 | top: "conv3_1" 122 | convolution_param { 123 | bias_term: true 124 | num_output: 256 125 | pad: 1 126 | kernel_size: 3 127 | stride: 1 128 | } 129 | } 130 | 131 | layer { 132 | name: "relu3_1" 133 | type: "ReLU" 134 | bottom: "conv3_1" 135 | top: "conv3_1" 136 | } 137 | 138 | layer { 139 | name: "conv3_2" 140 | type: "Convolution" 141 | bottom: "conv3_1" 142 | top: "conv3_2" 143 | convolution_param { 144 | bias_term: true 145 | num_output: 256 146 | pad: 1 147 | kernel_size: 3 148 | stride: 1 149 | } 150 | } 151 | 152 | layer { 153 | name: "relu3_2" 154 | type: "ReLU" 155 | bottom: "conv3_2" 156 | top: "conv3_2" 157 | } 158 | 159 | layer { 160 | name: "pool3" 161 | type: "Pooling" 162 | bottom: "conv3_2" 163 | top: "pool3" 164 | pooling_param { 165 | pool: MAX 166 | kernel_size: 2 167 | stride: 2 168 | } 169 | } 170 | 171 | layer { 172 | name: "conv4_1" 173 | type: "Convolution" 174 | bottom: "pool3" 175 | top: "conv4_1" 176 | convolution_param { 177 | bias_term: true 178 | num_output: 512 179 | pad: 1 180 | kernel_size: 3 181 | stride: 1 182 | } 183 | } 184 | 185 | layer { 186 | name: "relu4_1" 187 | type: "ReLU" 188 | bottom: "conv4_1" 189 | top: "conv4_1" 190 | } 191 | 192 | layer { 193 | name: "conv4_2" 194 | type: "Convolution" 195 | bottom: "conv4_1" 196 | top: "conv4_2" 197 | convolution_param { 198 | bias_term: true 199 | num_output: 512 200 | pad: 1 201 | kernel_size: 3 202 | stride: 1 203 | } 204 | } 205 | 206 | layer { 207 | name: "relu4_2" 208 | type: "ReLU" 209 | bottom: "conv4_2" 210 | top: "conv4_2" 211 | } 212 | 213 | layer { 214 | name: "pool4" 215 | type: "Pooling" 216 | bottom: "conv4_2" 217 | top: "pool4" 218 | pooling_param { 219 | pool: MAX 220 | kernel_size: 2 221 | stride: 2 222 | } 223 | } 224 | 225 | layer { 226 | name: "conv5_1" 227 | type: "Convolution" 228 | bottom: "pool4" 229 | top: "conv5_1" 230 | convolution_param { 231 | bias_term: true 232 | num_output: 512 233 | pad: 1 234 | kernel_size: 3 235 | stride: 1 236 | } 237 | } 238 | 239 | layer { 240 | name: "relu5_1" 241 | type: "ReLU" 242 | bottom: "conv5_1" 243 | top: "conv5_1" 244 | } 245 | 246 | layer { 247 | name: "conv5_2" 248 | type: "Convolution" 249 | bottom: "conv5_1" 250 | top: "conv5_2" 251 | convolution_param { 252 | bias_term: true 253 | num_output: 512 254 | pad: 1 255 | kernel_size: 3 256 | stride: 1 257 | } 258 | } 259 | 260 | layer { 261 | name: "relu5_2" 262 | type: "ReLU" 263 | bottom: "conv5_2" 264 | top: "conv5_2" 265 | } 266 | 267 | layer { 268 | name: "pool5" 269 | type: "Pooling" 270 | bottom: "conv5_2" 271 | top: "pool5" 272 | pooling_param { 273 | pool: MAX 274 | kernel_size: 2 275 | stride: 2 276 | } 277 | } 278 | 279 | layer { 280 | bottom: "pool5" 281 | top: "fc6" 282 | name: "fc6" 283 | type: "InnerProduct" 284 | inner_product_param { 285 | num_output: 4096 286 | } 287 | } 288 | 289 | layer { 290 | name: "relu6" 291 | type: "ReLU" 292 | bottom: "fc6" 293 | top: "fc6" 294 | } 295 | 296 | layer { 297 | name: "dropout6" 298 | type: "Dropout" 299 | bottom: "fc6" 300 | top: "fc6" 301 | dropout_param { 302 | dropout_ratio: 0.5 303 | } 304 | } 305 | 306 | layer { 307 | bottom: "fc6" 308 | top: "fc7" 309 | name: "fc7" 310 | type: "InnerProduct" 311 | inner_product_param { 312 | num_output: 4096 313 | } 314 | } 315 | 316 | layer { 317 | name: "relu7" 318 | type: "ReLU" 319 | bottom: "fc7" 320 | top: "fc7" 321 | } 322 | 323 | layer { 324 | name: "dropout7" 325 | type: "Dropout" 326 | bottom: "fc7" 327 | top: "fc7" 328 | dropout_param { 329 | dropout_ratio: 0.5 330 | } 331 | } 332 | 333 | layer { 334 | bottom: "fc7" 335 | top: "classifier" 336 | name: "classifier" 337 | type: "InnerProduct" 338 | inner_product_param { 339 | num_output: 1000 340 | } 341 | } 342 | 343 | layer { 344 | name: "prob" 345 | type: "Softmax" 346 | bottom: "classifier" 347 | top: "prob" 348 | } 349 | -------------------------------------------------------------------------------- /seg/evaluation_seg.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('/home/prmct/workspace/PSPNet-0120/python/') 4 | 5 | import caffe 6 | import cv2 7 | import numpy as np 8 | import datetime 9 | 10 | gpu_mode = True 11 | gpu_id = 3 12 | data_root = '/home/prmct/Database/VOC_PASCAL/VOC2012_test/JPEGImages/' 13 | val_file = 'test_205.txt' 14 | save_root = './predict205_40000_ms/' 15 | model_weights = 'psp_resnext101_32x4d_coco_sbd_iter_40000.caffemodel' 16 | model_deploy = 'deploy_psp_resnext101_32x4d_merge_bn_scale.prototxt' 17 | prob_layer = 'prob' # output layer, normally Softmax 18 | class_num = 21 19 | base_size = 512 20 | crop_size = 473 21 | raw_scale = 57.5 # image scale factor, 1.0 or 128.0 22 | # mean_value = np.array([104.008, 116.669, 122.675]) 23 | # mean_value = np.array([128, 128, 128]) 24 | mean_value = np.array([103.52, 116.28, 123.675]) 25 | scale_array = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # multi scale 26 | # scale_array = [1.0] # single scale 27 | flip = True 28 | class_offset = 0 29 | crf = False 30 | crf_deploy = '/home/prmct/Program/segmentation/deploy_crf.prototxt' 31 | crf_factor = 4.0 32 | 33 | if gpu_mode: 34 | caffe.set_mode_gpu() 35 | caffe.set_device(gpu_id) 36 | else: 37 | caffe.set_mode_cpu() 38 | net = caffe.Net(model_deploy, model_weights, caffe.TEST) 39 | 40 | if crf: 41 | net_crf = caffe.Net(crf_deploy, caffe.TEST) 42 | 43 | 44 | def eval_batch(): 45 | eval_images = [] 46 | f = open(val_file, 'r') 47 | for i in f: 48 | eval_images.append(i.strip()) 49 | 50 | skip_num = 0 51 | eval_len = len(eval_images) 52 | start_time = datetime.datetime.now() 53 | for i in xrange(eval_len - skip_num): 54 | _img = cv2.imread(data_root + eval_images[i + skip_num] + '.jpg') 55 | h, w, d = _img.shape 56 | 57 | score_map = np.zeros((h, w, class_num), dtype=np.float32) 58 | for j in scale_array: 59 | long_size = float(base_size) * j + 1 60 | ratio = long_size / max(h, w) 61 | new_size = (int(w * ratio), int(h * ratio)) 62 | _scale = cv2.resize(_img, new_size) 63 | score_map += cv2.resize(scale_process(_scale), (w, h)) 64 | score_map /= len(scale_array) 65 | 66 | if crf: 67 | tmp_data = np.asarray([_img.transpose(2, 0, 1)], dtype=np.float32) 68 | tmp_score = np.asarray([score_map.transpose(2, 0, 1)], dtype=np.float32) 69 | net_crf.blobs['data'].reshape(*tmp_data.shape) 70 | net_crf.blobs['data'].data[...] = tmp_data / raw_scale 71 | net_crf.blobs['data_dim'].data[...] = [[[h, w]]] 72 | net_crf.blobs['score'].reshape(*tmp_score.shape) 73 | net_crf.blobs['score'].data[...] = tmp_score * crf_factor 74 | net_crf.forward() 75 | score_map = net_crf.blobs[prob_layer].data[0].transpose(1, 2, 0) 76 | 77 | cv2.imwrite(save_root + eval_images[i + skip_num] + '.png', score_map.argmax(2) + class_offset) 78 | print 'Testing image: ' + str(i + 1) + '/' + str(eval_len) + ' ' + str(eval_images[i + skip_num]) 79 | end_time = datetime.datetime.now() 80 | print '\nEvaluation process ends at: {}. \nTime cost is: {}. '.format(str(end_time), str(end_time - start_time)) 81 | print '\n{} images has been tested. \nThe model is: {}'.format(str(eval_len), model_weights) 82 | 83 | 84 | def scale_process(_scale): 85 | sh, sw, sd = _scale.shape 86 | _scale = np.asarray(_scale, dtype=np.float32) 87 | long_size = max(sh, sw) 88 | short_size = min(sh, sw) 89 | if long_size <= crop_size: 90 | input_data = pad_img(_scale - mean_value) 91 | score = caffe_process(input_data)[:sh, :sw, :] 92 | else: 93 | stride_rate = 2.0 / 3 94 | stride = np.ceil(crop_size * stride_rate) 95 | _pad = _scale 96 | if short_size < crop_size: 97 | _pad = pad_img(_scale - mean_value) + mean_value 98 | 99 | ph, pw, pd = _pad.shape 100 | h_grid = int(np.ceil(float(ph - crop_size) / stride)) + 1 101 | w_grid = int(np.ceil(float(pw - crop_size) / stride)) + 1 102 | data_scale = np.zeros((ph, pw, class_num), dtype=np.float32) 103 | count_scale = np.zeros((ph, pw, class_num), dtype=np.float32) 104 | for grid_yidx in xrange(0, h_grid): 105 | for grid_xidx in xrange(0, w_grid): 106 | s_x = int(grid_xidx * stride) 107 | s_y = int(grid_yidx * stride) 108 | e_x = min(s_x + crop_size, pw) 109 | e_y = min(s_y + crop_size, ph) 110 | s_x = int(e_x - crop_size) 111 | s_y = int(e_y - crop_size) 112 | _sub = _pad[s_y:e_y, s_x:e_x, :] 113 | count_scale[s_y:e_y, s_x:e_x, :] += 1.0 114 | input_data = pad_img(_sub - mean_value) 115 | data_scale[s_y:e_y, s_x:e_x, :] += caffe_process(input_data) 116 | score = data_scale / count_scale 117 | score = score[:sh, :sw, :] 118 | 119 | return score 120 | 121 | 122 | def pad_img(_scale): 123 | sh, sw, sd = _scale.shape 124 | if sh < crop_size: 125 | _pad = np.zeros((crop_size, sw, sd), dtype=np.float32) 126 | _pad[:sh, :, :] = _scale 127 | _scale = _pad 128 | sh, sw, sd = _scale.shape 129 | if sw < crop_size: 130 | _pad = np.zeros((sh, crop_size, sd), dtype=np.float32) 131 | _pad[:, :sw, :] = _scale 132 | _scale = _pad 133 | 134 | return _scale 135 | 136 | 137 | def caffe_process(_input): 138 | h, w, d = _input.shape 139 | _score = np.zeros((h, w, class_num), dtype=np.float32) 140 | if flip: 141 | _flip = _input[:, ::-1] 142 | _flip = _flip.transpose(2, 0, 1) 143 | _flip = _flip.reshape((1,) + _flip.shape) 144 | net.blobs['data'].reshape(*_flip.shape) 145 | net.blobs['data'].data[...] = _flip / raw_scale 146 | # net.blobs['data_dim'].data[...] = [[[h, w]]] 147 | net.forward() 148 | _score += net.blobs[prob_layer].data[0].transpose(1, 2, 0)[:, ::-1] 149 | 150 | _input = _input.transpose(2, 0, 1) 151 | _input = _input.reshape((1,) + _input.shape) 152 | net.blobs['data'].reshape(*_input.shape) 153 | net.blobs['data'].data[...] = _input / raw_scale 154 | # net.blobs['data_dim'].data[...] = [[[h, w]]] 155 | net.forward() 156 | _score += net.blobs[prob_layer].data[0].transpose(1, 2, 0) 157 | 158 | return _score / int(flip + 1) 159 | 160 | if __name__ == '__main__': 161 | eval_batch() 162 | 163 | -------------------------------------------------------------------------------- /cls/vgg/deploy_vgg16-5x.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 224 6 | dim: 224 7 | } 8 | layer { 9 | name: "conv1_1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1_1" 13 | convolution_param { 14 | num_output: 24 15 | pad: 1 16 | kernel_size: 3 17 | } 18 | } 19 | layer { 20 | name: "relu1_1" 21 | type: "ReLU" 22 | bottom: "conv1_1" 23 | top: "conv1_1" 24 | } 25 | layer { 26 | name: "conv1_2" 27 | type: "Convolution" 28 | bottom: "conv1_1" 29 | top: "conv1_2" 30 | convolution_param { 31 | num_output: 22 32 | pad: 1 33 | kernel_size: 3 34 | } 35 | } 36 | layer { 37 | name: "relu1_2" 38 | type: "ReLU" 39 | bottom: "conv1_2" 40 | top: "conv1_2" 41 | } 42 | layer { 43 | name: "pool1" 44 | type: "Pooling" 45 | bottom: "conv1_2" 46 | top: "pool1" 47 | pooling_param { 48 | pool: MAX 49 | kernel_size: 2 50 | stride: 2 51 | } 52 | } 53 | layer { 54 | name: "conv2_1" 55 | type: "Convolution" 56 | bottom: "pool1" 57 | top: "conv2_1" 58 | convolution_param { 59 | num_output: 41 60 | pad: 1 61 | kernel_size: 3 62 | } 63 | } 64 | layer { 65 | name: "relu2_1" 66 | type: "ReLU" 67 | bottom: "conv2_1" 68 | top: "conv2_1" 69 | } 70 | layer { 71 | name: "conv2_2" 72 | type: "Convolution" 73 | bottom: "conv2_1" 74 | top: "conv2_2" 75 | convolution_param { 76 | num_output: 51 77 | pad: 1 78 | kernel_size: 3 79 | } 80 | } 81 | layer { 82 | name: "relu2_2" 83 | type: "ReLU" 84 | bottom: "conv2_2" 85 | top: "conv2_2" 86 | } 87 | layer { 88 | name: "pool2" 89 | type: "Pooling" 90 | bottom: "conv2_2" 91 | top: "pool2" 92 | pooling_param { 93 | pool: MAX 94 | kernel_size: 2 95 | stride: 2 96 | } 97 | } 98 | layer { 99 | name: "conv3_1" 100 | type: "Convolution" 101 | bottom: "pool2" 102 | top: "conv3_1" 103 | convolution_param { 104 | num_output: 108 105 | pad: 1 106 | kernel_size: 3 107 | } 108 | } 109 | layer { 110 | name: "relu3_1" 111 | type: "ReLU" 112 | bottom: "conv3_1" 113 | top: "conv3_1" 114 | } 115 | layer { 116 | name: "conv3_2" 117 | type: "Convolution" 118 | bottom: "conv3_1" 119 | top: "conv3_2" 120 | convolution_param { 121 | num_output: 89 122 | pad: 1 123 | kernel_size: 3 124 | } 125 | } 126 | layer { 127 | name: "relu3_2" 128 | type: "ReLU" 129 | bottom: "conv3_2" 130 | top: "conv3_2" 131 | } 132 | layer { 133 | name: "conv3_3" 134 | type: "Convolution" 135 | bottom: "conv3_2" 136 | top: "conv3_3" 137 | convolution_param { 138 | num_output: 111 139 | pad: 1 140 | kernel_size: 3 141 | } 142 | } 143 | layer { 144 | name: "relu3_3" 145 | type: "ReLU" 146 | bottom: "conv3_3" 147 | top: "conv3_3" 148 | } 149 | layer { 150 | name: "pool3" 151 | type: "Pooling" 152 | bottom: "conv3_3" 153 | top: "pool3" 154 | pooling_param { 155 | pool: MAX 156 | kernel_size: 2 157 | stride: 2 158 | } 159 | } 160 | layer { 161 | name: "conv4_1" 162 | type: "Convolution" 163 | bottom: "pool3" 164 | top: "conv4_1" 165 | convolution_param { 166 | num_output: 184 167 | pad: 1 168 | kernel_size: 3 169 | } 170 | } 171 | layer { 172 | name: "relu4_1" 173 | type: "ReLU" 174 | bottom: "conv4_1" 175 | top: "conv4_1" 176 | } 177 | layer { 178 | name: "conv4_2" 179 | type: "Convolution" 180 | bottom: "conv4_1" 181 | top: "conv4_2" 182 | convolution_param { 183 | num_output: 276 184 | pad: 1 185 | kernel_size: 3 186 | } 187 | } 188 | layer { 189 | name: "relu4_2" 190 | type: "ReLU" 191 | bottom: "conv4_2" 192 | top: "conv4_2" 193 | } 194 | layer { 195 | name: "conv4_3" 196 | type: "Convolution" 197 | bottom: "conv4_2" 198 | top: "conv4_3" 199 | convolution_param { 200 | num_output: 228 201 | pad: 1 202 | kernel_size: 3 203 | } 204 | } 205 | layer { 206 | name: "relu4_3" 207 | type: "ReLU" 208 | bottom: "conv4_3" 209 | top: "conv4_3" 210 | } 211 | layer { 212 | name: "pool4" 213 | type: "Pooling" 214 | bottom: "conv4_3" 215 | top: "pool4" 216 | pooling_param { 217 | pool: MAX 218 | kernel_size: 2 219 | stride: 2 220 | } 221 | } 222 | layer { 223 | name: "conv5_1" 224 | type: "Convolution" 225 | bottom: "pool4" 226 | top: "conv5_1" 227 | convolution_param { 228 | num_output: 512 229 | pad: 1 230 | kernel_size: 3 231 | } 232 | } 233 | layer { 234 | name: "relu5_1" 235 | type: "ReLU" 236 | bottom: "conv5_1" 237 | top: "conv5_1" 238 | } 239 | layer { 240 | name: "conv5_2" 241 | type: "Convolution" 242 | bottom: "conv5_1" 243 | top: "conv5_2" 244 | convolution_param { 245 | num_output: 512 246 | pad: 1 247 | kernel_size: 3 248 | } 249 | } 250 | layer { 251 | name: "relu5_2" 252 | type: "ReLU" 253 | bottom: "conv5_2" 254 | top: "conv5_2" 255 | } 256 | layer { 257 | name: "conv5_3" 258 | type: "Convolution" 259 | bottom: "conv5_2" 260 | top: "conv5_3" 261 | convolution_param { 262 | num_output: 512 263 | pad: 1 264 | kernel_size: 3 265 | } 266 | } 267 | layer { 268 | name: "relu5_3" 269 | type: "ReLU" 270 | bottom: "conv5_3" 271 | top: "conv5_3" 272 | } 273 | layer { 274 | name: "pool5" 275 | type: "Pooling" 276 | bottom: "conv5_3" 277 | top: "pool5" 278 | pooling_param { 279 | pool: MAX 280 | kernel_size: 2 281 | stride: 2 282 | } 283 | } 284 | layer { 285 | name: "fc6" 286 | type: "InnerProduct" 287 | bottom: "pool5" 288 | top: "fc6" 289 | inner_product_param { 290 | num_output: 4096 291 | } 292 | } 293 | layer { 294 | name: "relu6" 295 | type: "ReLU" 296 | bottom: "fc6" 297 | top: "fc6" 298 | } 299 | layer { 300 | name: "drop6" 301 | type: "Dropout" 302 | bottom: "fc6" 303 | top: "fc6" 304 | dropout_param { 305 | dropout_ratio: 0.5 306 | } 307 | } 308 | layer { 309 | name: "fc7" 310 | type: "InnerProduct" 311 | bottom: "fc6" 312 | top: "fc7" 313 | inner_product_param { 314 | num_output: 4096 315 | } 316 | } 317 | layer { 318 | name: "relu7" 319 | type: "ReLU" 320 | bottom: "fc7" 321 | top: "fc7" 322 | } 323 | layer { 324 | name: "drop7" 325 | type: "Dropout" 326 | bottom: "fc7" 327 | top: "fc7" 328 | dropout_param { 329 | dropout_ratio: 0.5 330 | } 331 | } 332 | layer { 333 | name: "fc8" 334 | type: "InnerProduct" 335 | bottom: "fc7" 336 | top: "fc8" 337 | inner_product_param { 338 | num_output: 1000 339 | } 340 | } 341 | layer { 342 | name: "prob" 343 | type: "Softmax" 344 | bottom: "fc8" 345 | top: "prob" 346 | } 347 | 348 | -------------------------------------------------------------------------------- /cls/vgg/deploy_vgg16-dsd.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 224 6 | dim: 224 7 | } 8 | layer { 9 | bottom: "data" 10 | top: "conv1_1" 11 | name: "conv1_1" 12 | type: "Convolution" 13 | convolution_param { 14 | num_output: 64 15 | pad: 1 16 | kernel_size: 3 17 | } 18 | } 19 | layer { 20 | bottom: "conv1_1" 21 | top: "conv1_1" 22 | name: "relu1_1" 23 | type: "ReLU" 24 | } 25 | layer { 26 | bottom: "conv1_1" 27 | top: "conv1_2" 28 | name: "conv1_2" 29 | type: "Convolution" 30 | convolution_param { 31 | num_output: 64 32 | pad: 1 33 | kernel_size: 3 34 | } 35 | } 36 | layer { 37 | bottom: "conv1_2" 38 | top: "conv1_2" 39 | name: "relu1_2" 40 | type: "ReLU" 41 | } 42 | layer { 43 | bottom: "conv1_2" 44 | top: "pool1" 45 | name: "pool1" 46 | type: "Pooling" 47 | pooling_param { 48 | pool: MAX 49 | kernel_size: 2 50 | stride: 2 51 | } 52 | } 53 | layer { 54 | bottom: "pool1" 55 | top: "conv2_1" 56 | name: "conv2_1" 57 | type: "Convolution" 58 | convolution_param { 59 | num_output: 128 60 | pad: 1 61 | kernel_size: 3 62 | } 63 | } 64 | layer { 65 | bottom: "conv2_1" 66 | top: "conv2_1" 67 | name: "relu2_1" 68 | type: "ReLU" 69 | } 70 | layer { 71 | bottom: "conv2_1" 72 | top: "conv2_2" 73 | name: "conv2_2" 74 | type: "Convolution" 75 | convolution_param { 76 | num_output: 128 77 | pad: 1 78 | kernel_size: 3 79 | } 80 | } 81 | layer { 82 | bottom: "conv2_2" 83 | top: "conv2_2" 84 | name: "relu2_2" 85 | type: "ReLU" 86 | } 87 | layer { 88 | bottom: "conv2_2" 89 | top: "pool2" 90 | name: "pool2" 91 | type: "Pooling" 92 | pooling_param { 93 | pool: MAX 94 | kernel_size: 2 95 | stride: 2 96 | } 97 | } 98 | layer { 99 | bottom: "pool2" 100 | top: "conv3_1" 101 | name: "conv3_1" 102 | type: "Convolution" 103 | convolution_param { 104 | num_output: 256 105 | pad: 1 106 | kernel_size: 3 107 | } 108 | } 109 | layer { 110 | bottom: "conv3_1" 111 | top: "conv3_1" 112 | name: "relu3_1" 113 | type: "ReLU" 114 | } 115 | layer { 116 | bottom: "conv3_1" 117 | top: "conv3_2" 118 | name: "conv3_2" 119 | type: "Convolution" 120 | convolution_param { 121 | num_output: 256 122 | pad: 1 123 | kernel_size: 3 124 | } 125 | } 126 | layer { 127 | bottom: "conv3_2" 128 | top: "conv3_2" 129 | name: "relu3_2" 130 | type: "ReLU" 131 | } 132 | layer { 133 | bottom: "conv3_2" 134 | top: "conv3_3" 135 | name: "conv3_3" 136 | type: "Convolution" 137 | convolution_param { 138 | num_output: 256 139 | pad: 1 140 | kernel_size: 3 141 | } 142 | } 143 | layer { 144 | bottom: "conv3_3" 145 | top: "conv3_3" 146 | name: "relu3_3" 147 | type: "ReLU" 148 | } 149 | layer { 150 | bottom: "conv3_3" 151 | top: "pool3" 152 | name: "pool3" 153 | type: "Pooling" 154 | pooling_param { 155 | pool: MAX 156 | kernel_size: 2 157 | stride: 2 158 | } 159 | } 160 | layer { 161 | bottom: "pool3" 162 | top: "conv4_1" 163 | name: "conv4_1" 164 | type: "Convolution" 165 | convolution_param { 166 | num_output: 512 167 | pad: 1 168 | kernel_size: 3 169 | } 170 | } 171 | layer { 172 | bottom: "conv4_1" 173 | top: "conv4_1" 174 | name: "relu4_1" 175 | type: "ReLU" 176 | } 177 | layer { 178 | bottom: "conv4_1" 179 | top: "conv4_2" 180 | name: "conv4_2" 181 | type: "Convolution" 182 | convolution_param { 183 | num_output: 512 184 | pad: 1 185 | kernel_size: 3 186 | } 187 | } 188 | layer { 189 | bottom: "conv4_2" 190 | top: "conv4_2" 191 | name: "relu4_2" 192 | type: "ReLU" 193 | } 194 | layer { 195 | bottom: "conv4_2" 196 | top: "conv4_3" 197 | name: "conv4_3" 198 | type: "Convolution" 199 | convolution_param { 200 | num_output: 512 201 | pad: 1 202 | kernel_size: 3 203 | } 204 | } 205 | layer { 206 | bottom: "conv4_3" 207 | top: "conv4_3" 208 | name: "relu4_3" 209 | type: "ReLU" 210 | } 211 | layer { 212 | bottom: "conv4_3" 213 | top: "pool4" 214 | name: "pool4" 215 | type: "Pooling" 216 | pooling_param { 217 | pool: MAX 218 | kernel_size: 2 219 | stride: 2 220 | } 221 | } 222 | layer { 223 | bottom: "pool4" 224 | top: "conv5_1" 225 | name: "conv5_1" 226 | type: "Convolution" 227 | convolution_param { 228 | num_output: 512 229 | pad: 1 230 | kernel_size: 3 231 | } 232 | } 233 | layer { 234 | bottom: "conv5_1" 235 | top: "conv5_1" 236 | name: "relu5_1" 237 | type: "ReLU" 238 | } 239 | layer { 240 | bottom: "conv5_1" 241 | top: "conv5_2" 242 | name: "conv5_2" 243 | type: "Convolution" 244 | convolution_param { 245 | num_output: 512 246 | pad: 1 247 | kernel_size: 3 248 | } 249 | } 250 | layer { 251 | bottom: "conv5_2" 252 | top: "conv5_2" 253 | name: "relu5_2" 254 | type: "ReLU" 255 | } 256 | layer { 257 | bottom: "conv5_2" 258 | top: "conv5_3" 259 | name: "conv5_3" 260 | type: "Convolution" 261 | convolution_param { 262 | num_output: 512 263 | pad: 1 264 | kernel_size: 3 265 | } 266 | } 267 | layer { 268 | bottom: "conv5_3" 269 | top: "conv5_3" 270 | name: "relu5_3" 271 | type: "ReLU" 272 | } 273 | layer { 274 | bottom: "conv5_3" 275 | top: "pool5" 276 | name: "pool5" 277 | type: "Pooling" 278 | pooling_param { 279 | pool: MAX 280 | kernel_size: 2 281 | stride: 2 282 | } 283 | } 284 | layer { 285 | bottom: "pool5" 286 | top: "fc6" 287 | name: "fc6" 288 | type: "InnerProduct" 289 | inner_product_param { 290 | num_output: 4096 291 | } 292 | } 293 | layer { 294 | bottom: "fc6" 295 | top: "fc6" 296 | name: "relu6" 297 | type: "ReLU" 298 | } 299 | layer { 300 | bottom: "fc6" 301 | top: "fc6" 302 | name: "drop6" 303 | type: "Dropout" 304 | dropout_param { 305 | dropout_ratio: 0.5 306 | } 307 | } 308 | layer { 309 | bottom: "fc6" 310 | top: "fc7" 311 | name: "fc7" 312 | type: "InnerProduct" 313 | inner_product_param { 314 | num_output: 4096 315 | } 316 | } 317 | layer { 318 | bottom: "fc7" 319 | top: "fc7" 320 | name: "relu7" 321 | type: "ReLU" 322 | } 323 | layer { 324 | bottom: "fc7" 325 | top: "fc7" 326 | name: "drop7" 327 | type: "Dropout" 328 | dropout_param { 329 | dropout_ratio: 0.5 330 | } 331 | } 332 | layer { 333 | bottom: "fc7" 334 | top: "fc8" 335 | name: "fc8" 336 | type: "InnerProduct" 337 | inner_product_param { 338 | num_output: 1000 339 | } 340 | } 341 | layer { 342 | name: "prob" 343 | type: "Softmax" 344 | bottom: "fc8" 345 | top: "prob" 346 | } 347 | 348 | -------------------------------------------------------------------------------- /det/rfcn/tools/score.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xml.etree.ElementTree as ET 3 | 4 | gt_root = '~/Database/VOC_PASCAL/VOC2007_test/Annotations/' 5 | val_file = '~/2007test.txt' 6 | det_root = '~/predict_ss/' 7 | 8 | _classes = ('__background__', # always index 0 9 | 'aeroplane', 'bicycle', 'bird', 'boat', 10 | 'bottle', 'bus', 'car', 'cat', 'chair', 11 | 'cow', 'diningtable', 'dog', 'horse', 12 | 'motorbike', 'person', 'pottedplant', 13 | 'sheep', 'sofa', 'train', 'tvmonitor') 14 | 15 | def parse_rec(filename): 16 | """ Parse a PASCAL VOC xml file """ 17 | tree = ET.parse(filename) 18 | objects = [] 19 | for obj in tree.findall('object'): 20 | obj_struct = {} 21 | obj_struct['name'] = obj.find('name').text 22 | obj_struct['pose'] = obj.find('pose').text 23 | obj_struct['truncated'] = int(obj.find('truncated').text) 24 | obj_struct['difficult'] = int(obj.find('difficult').text) 25 | bbox = obj.find('bndbox') 26 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 27 | int(bbox.find('ymin').text), 28 | int(bbox.find('xmax').text), 29 | int(bbox.find('ymax').text)] 30 | objects.append(obj_struct) 31 | 32 | return objects 33 | 34 | def voc_ap(rec, prec, use_07_metric=False): 35 | """ ap = voc_ap(rec, prec, [use_07_metric]) 36 | Compute VOC AP given precision and recall. 37 | If use_07_metric is true, uses the 38 | VOC 07 11 point method (default:False). 39 | """ 40 | if use_07_metric: 41 | # 11 point metric 42 | ap = 0. 43 | for t in np.arange(0., 1.1, 0.1): 44 | if np.sum(rec >= t) == 0: 45 | p = 0 46 | else: 47 | p = np.max(prec[rec >= t]) 48 | ap = ap + p / 11. 49 | else: 50 | # correct AP calculation 51 | # first append sentinel values at the end 52 | mrec = np.concatenate(([0.], rec, [1.])) 53 | mpre = np.concatenate(([0.], prec, [0.])) 54 | 55 | # compute the precision envelope 56 | for i in range(mpre.size - 1, 0, -1): 57 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 58 | 59 | # to calculate area under PR curve, look for points 60 | # where X axis (recall) changes value 61 | i = np.where(mrec[1:] != mrec[:-1])[0] 62 | 63 | # and sum (\Delta recall) * prec 64 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 65 | return ap 66 | 67 | def voc_eval(filename, classname, ovthresh=0.5, use_07_metric=False): 68 | 69 | eval_images = [] 70 | f = open(val_file, 'r') 71 | for i in f: 72 | eval_images.append(i.strip()) 73 | 74 | recs = {} 75 | for imagename in eval_images: 76 | recs[imagename] = parse_rec(gt_root + imagename + '.xml') 77 | 78 | class_recs = {} 79 | 80 | npos = 0 81 | for imagename in eval_images: 82 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 83 | bbox = np.array([x['bbox'] for x in R]) 84 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 85 | det = [False] * len(R) 86 | npos = npos + sum(~difficult) 87 | class_recs[imagename] = {'bbox': bbox, 88 | 'difficult': difficult, 89 | 'det': det} 90 | detfile = det_root + 'comp4' + '_det' + '_test_' + classname + '.txt' 91 | with open(detfile, 'r') as f: 92 | lines = f.readlines() 93 | splitlines = [x.strip().split(' ') for x in lines] 94 | image_ids = [x[0] for x in splitlines] 95 | confidence = np.array([float(x[1]) for x in splitlines]) 96 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 97 | 98 | # sort by confidence 99 | sorted_ind = np.argsort(-confidence) 100 | sorted_scores = np.sort(-confidence) 101 | BB = BB[sorted_ind, :] 102 | image_ids = [image_ids[x] for x in sorted_ind] 103 | 104 | # go down dets and mark TPs and FPs 105 | nd = len(image_ids) 106 | tp = np.zeros(nd) 107 | fp = np.zeros(nd) 108 | for d in range(nd): 109 | R = class_recs[image_ids[d]] 110 | bb = BB[d, :].astype(float) 111 | ovmax = -np.inf 112 | BBGT = R['bbox'].astype(float) 113 | 114 | if BBGT.size > 0: 115 | # compute overlaps 116 | # intersection 117 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 118 | iymin = np.maximum(BBGT[:, 1], bb[1]) 119 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 120 | iymax = np.minimum(BBGT[:, 3], bb[3]) 121 | iw = np.maximum(ixmax - ixmin + 1., 0.) 122 | ih = np.maximum(iymax - iymin + 1., 0.) 123 | inters = iw * ih 124 | 125 | # union 126 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 127 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 128 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 129 | 130 | overlaps = inters / uni 131 | ovmax = np.max(overlaps) 132 | jmax = np.argmax(overlaps) 133 | 134 | if ovmax > ovthresh: 135 | if not R['difficult'][jmax]: 136 | if not R['det'][jmax]: 137 | tp[d] = 1. 138 | R['det'][jmax] = 1 139 | else: 140 | fp[d] = 1. 141 | else: 142 | fp[d] = 1. 143 | 144 | # compute precision recall 145 | fp = np.cumsum(fp) 146 | tp = np.cumsum(tp) 147 | rec = tp / float(npos) 148 | # avoid divide by zero in case the first detection matches a difficult 149 | # ground truth 150 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 151 | ap = voc_ap(rec, prec, use_07_metric) 152 | 153 | return rec, prec, ap 154 | 155 | def compute_ap(): 156 | 157 | aps = [] 158 | for i, cls in enumerate(_classes): 159 | if cls == '__background__': 160 | continue 161 | filename = det_root + 'comp4' + '_det' + '_test_' + cls + '.txt' 162 | rec, prec, ap = voc_eval(filename, cls, ovthresh=0.5, use_07_metric=True) 163 | 164 | aps += [ap] 165 | print('AP for {} = {:.4f}'.format(cls, ap)) 166 | 167 | print('Mean AP = {:.4f}'.format(np.mean(aps))) 168 | print('~~~~~~~~') 169 | print('Results:') 170 | 171 | for ap in aps: 172 | print('{:.3f}'.format(ap)) 173 | print('{:.3f}'.format(np.mean(aps))) 174 | print('~~~~~~~~') 175 | 176 | if __name__ == '__main__': 177 | compute_ap() 178 | 179 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /det/faster_rcnn/tools/score.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xml.etree.ElementTree as ET 3 | 4 | gt_root = '~/Database/VOC_PASCAL/VOC2007_test/Annotations/' 5 | val_file = '~/2007test.txt' 6 | det_root = '~/predict_ss/' 7 | 8 | _classes = ('__background__', # always index 0 9 | 'aeroplane', 'bicycle', 'bird', 'boat', 10 | 'bottle', 'bus', 'car', 'cat', 'chair', 11 | 'cow', 'diningtable', 'dog', 'horse', 12 | 'motorbike', 'person', 'pottedplant', 13 | 'sheep', 'sofa', 'train', 'tvmonitor') 14 | 15 | def parse_rec(filename): 16 | """ Parse a PASCAL VOC xml file """ 17 | tree = ET.parse(filename) 18 | objects = [] 19 | for obj in tree.findall('object'): 20 | obj_struct = {} 21 | obj_struct['name'] = obj.find('name').text 22 | obj_struct['pose'] = obj.find('pose').text 23 | obj_struct['truncated'] = int(obj.find('truncated').text) 24 | obj_struct['difficult'] = int(obj.find('difficult').text) 25 | bbox = obj.find('bndbox') 26 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 27 | int(bbox.find('ymin').text), 28 | int(bbox.find('xmax').text), 29 | int(bbox.find('ymax').text)] 30 | objects.append(obj_struct) 31 | 32 | return objects 33 | 34 | def voc_ap(rec, prec, use_07_metric=False): 35 | """ ap = voc_ap(rec, prec, [use_07_metric]) 36 | Compute VOC AP given precision and recall. 37 | If use_07_metric is true, uses the 38 | VOC 07 11 point method (default:False). 39 | """ 40 | if use_07_metric: 41 | # 11 point metric 42 | ap = 0. 43 | for t in np.arange(0., 1.1, 0.1): 44 | if np.sum(rec >= t) == 0: 45 | p = 0 46 | else: 47 | p = np.max(prec[rec >= t]) 48 | ap = ap + p / 11. 49 | else: 50 | # correct AP calculation 51 | # first append sentinel values at the end 52 | mrec = np.concatenate(([0.], rec, [1.])) 53 | mpre = np.concatenate(([0.], prec, [0.])) 54 | 55 | # compute the precision envelope 56 | for i in range(mpre.size - 1, 0, -1): 57 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 58 | 59 | # to calculate area under PR curve, look for points 60 | # where X axis (recall) changes value 61 | i = np.where(mrec[1:] != mrec[:-1])[0] 62 | 63 | # and sum (\Delta recall) * prec 64 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 65 | return ap 66 | 67 | def voc_eval(filename, classname, ovthresh=0.5, use_07_metric=False): 68 | 69 | eval_images = [] 70 | f = open(val_file, 'r') 71 | for i in f: 72 | eval_images.append(i.strip()) 73 | 74 | recs = {} 75 | for imagename in eval_images: 76 | recs[imagename] = parse_rec(gt_root + imagename + '.xml') 77 | 78 | class_recs = {} 79 | 80 | npos = 0 81 | for imagename in eval_images: 82 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 83 | bbox = np.array([x['bbox'] for x in R]) 84 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 85 | det = [False] * len(R) 86 | npos = npos + sum(~difficult) 87 | class_recs[imagename] = {'bbox': bbox, 88 | 'difficult': difficult, 89 | 'det': det} 90 | detfile = det_root + 'comp4' + '_det' + '_test_' + classname + '.txt' 91 | with open(detfile, 'r') as f: 92 | lines = f.readlines() 93 | splitlines = [x.strip().split(' ') for x in lines] 94 | image_ids = [x[0] for x in splitlines] 95 | confidence = np.array([float(x[1]) for x in splitlines]) 96 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 97 | 98 | # sort by confidence 99 | sorted_ind = np.argsort(-confidence) 100 | sorted_scores = np.sort(-confidence) 101 | BB = BB[sorted_ind, :] 102 | image_ids = [image_ids[x] for x in sorted_ind] 103 | 104 | # go down dets and mark TPs and FPs 105 | nd = len(image_ids) 106 | tp = np.zeros(nd) 107 | fp = np.zeros(nd) 108 | for d in range(nd): 109 | R = class_recs[image_ids[d]] 110 | bb = BB[d, :].astype(float) 111 | ovmax = -np.inf 112 | BBGT = R['bbox'].astype(float) 113 | 114 | if BBGT.size > 0: 115 | # compute overlaps 116 | # intersection 117 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 118 | iymin = np.maximum(BBGT[:, 1], bb[1]) 119 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 120 | iymax = np.minimum(BBGT[:, 3], bb[3]) 121 | iw = np.maximum(ixmax - ixmin + 1., 0.) 122 | ih = np.maximum(iymax - iymin + 1., 0.) 123 | inters = iw * ih 124 | 125 | # union 126 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 127 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 128 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 129 | 130 | overlaps = inters / uni 131 | ovmax = np.max(overlaps) 132 | jmax = np.argmax(overlaps) 133 | 134 | if ovmax > ovthresh: 135 | if not R['difficult'][jmax]: 136 | if not R['det'][jmax]: 137 | tp[d] = 1. 138 | R['det'][jmax] = 1 139 | else: 140 | fp[d] = 1. 141 | else: 142 | fp[d] = 1. 143 | 144 | # compute precision recall 145 | fp = np.cumsum(fp) 146 | tp = np.cumsum(tp) 147 | rec = tp / float(npos) 148 | # avoid divide by zero in case the first detection matches a difficult 149 | # ground truth 150 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 151 | ap = voc_ap(rec, prec, use_07_metric) 152 | 153 | return rec, prec, ap 154 | 155 | def compute_ap(): 156 | 157 | aps = [] 158 | for i, cls in enumerate(_classes): 159 | if cls == '__background__': 160 | continue 161 | filename = det_root + 'comp4' + '_det' + '_test_' + cls + '.txt' 162 | rec, prec, ap = voc_eval(filename, cls, ovthresh=0.5, use_07_metric=True) 163 | 164 | aps += [ap] 165 | print('AP for {} = {:.4f}'.format(cls, ap)) 166 | 167 | print('Mean AP = {:.4f}'.format(np.mean(aps))) 168 | print('~~~~~~~~') 169 | print('Results:') 170 | 171 | for ap in aps: 172 | print('{:.3f}'.format(ap)) 173 | print('{:.3f}'.format(np.mean(aps))) 174 | print('~~~~~~~~') 175 | 176 | if __name__ == '__main__': 177 | compute_ap() 178 | 179 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /cls/evaluation_cls.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('~/caffe-master-0116/python') 4 | 5 | import numpy as np 6 | import caffe 7 | import cv2 8 | import datetime 9 | 10 | gpu_mode = True 11 | gpu_id = 0 12 | data_root = '~/Database/ILSVRC2012' 13 | val_file = 'ILSVRC2012_val.txt' 14 | save_log = 'log{}.txt'.format(datetime.datetime.now().strftime('%Y%m%d%H%M%S')) 15 | model_weights = 'resnet-v2/resnet101_v2.caffemodel' 16 | model_deploy = 'resnet-v2/deploy_resnet101_v2.prototxt' 17 | prob_layer = 'prob' 18 | class_num = 1000 19 | base_size = 256 # short size 20 | crop_size = 224 21 | # mean_value = np.array([128.0, 128.0, 128.0]) # BGR 22 | mean_value = np.array([102.9801, 115.9465, 122.7717]) # BGR 23 | # std = np.array([128.0, 128.0, 128.0]) # BGR 24 | std = np.array([1.0, 1.0, 1.0]) # BGR 25 | crop_num = 1 # 1 and others for center(single)-crop, 12 for mirror(12)-crop, 144 for multi(144)-crop 26 | batch_size = 1 27 | top_k = (1, 5) 28 | 29 | if gpu_mode: 30 | caffe.set_mode_gpu() 31 | caffe.set_device(gpu_id) 32 | else: 33 | caffe.set_mode_cpu() 34 | net = caffe.Net(model_deploy, model_weights, caffe.TEST) 35 | 36 | 37 | def eval_batch(): 38 | eval_images = [] 39 | ground_truth = [] 40 | f = open(val_file, 'r') 41 | for i in f: 42 | eval_images.append(i.strip().split(' ')[0]) 43 | ground_truth.append(int(i.strip().split(' ')[1])) 44 | f.close() 45 | 46 | skip_num = 0 47 | eval_len = len(eval_images) 48 | accuracy = np.zeros(len(top_k)) 49 | # eval_len = 100 50 | start_time = datetime.datetime.now() 51 | for i in xrange(eval_len - skip_num): 52 | _img = cv2.imread(data_root + eval_images[i + skip_num]) 53 | _img = cv2.resize(_img, (int(_img.shape[1] * base_size / min(_img.shape[:2])), 54 | int(_img.shape[0] * base_size / min(_img.shape[:2]))) 55 | ) 56 | _img = image_preprocess(_img) 57 | 58 | score_vec = np.zeros(class_num, dtype=np.float32) 59 | crops = [] 60 | if crop_num == 1: 61 | crops.append(center_crop(_img)) 62 | elif crop_num == 12: 63 | crops.extend(mirror_crop(_img)) 64 | elif crop_num == 144: 65 | crops.extend(multi_crop(_img)) 66 | else: 67 | crops.append(center_crop(_img)) 68 | 69 | iter_num = int(len(crops) / batch_size) 70 | for j in xrange(iter_num): 71 | score_vec += caffe_process(np.asarray(crops, dtype=np.float32)[j*batch_size:(j+1)*batch_size]) 72 | score_index = (-score_vec / len(crops)).argsort() 73 | 74 | print 'Testing image: ' + str(i + 1) + '/' + str(eval_len - skip_num) + ' ' + str(score_index[0]) + '/' + str( 75 | ground_truth[i + skip_num]), 76 | for j in xrange(len(top_k)): 77 | if ground_truth[i + skip_num] in score_index[:top_k[j]]: 78 | accuracy[j] += 1 79 | tmp_acc = float(accuracy[j]) / float(i + 1) 80 | if top_k[j] == 1: 81 | print '\ttop_' + str(top_k[j]) + ':' + str(tmp_acc), 82 | else: 83 | print 'top_' + str(top_k[j]) + ':' + str(tmp_acc) 84 | 85 | end_time = datetime.datetime.now() 86 | w = open(save_log, 'w') 87 | s1 = 'Evaluation process ends at: {}. \nTime cost is: {}. '.format(str(end_time), str(end_time - start_time)) 88 | s2 = '\nThe model is: {}. \nThe val file is: {}. \n{} images has been tested, crop_num is: {}, base_size is: {}, ' \ 89 | 'crop_size is: {}.'.format(model_weights, val_file, str(eval_len), str(crop_num), str(base_size), str(crop_size)) 90 | s3 = '\nThe mean value is: ({}, {}, {}).'.format(str(mean_value[0]), str(mean_value[1]), str(mean_value[2])) 91 | s4 = '' 92 | for i in xrange(len(top_k)): 93 | _acc = float(accuracy[i]) / float(eval_len) 94 | s4 += '\nAccuracy of top_{} is: {}; correct num is {}.'.format(str(top_k[i]), str(_acc), str(int(accuracy[i]))) 95 | print s1, s2, s3, s4 96 | w.write(s1 + s2 + s3 + s4) 97 | w.close() 98 | 99 | 100 | def image_preprocess(img): 101 | b, g, r = cv2.split(img) 102 | return cv2.merge([(b-mean_value[0])/std[0], (g-mean_value[1])/std[1], (r-mean_value[2])/std[2]]) 103 | 104 | 105 | def center_crop(img): # single crop 106 | short_edge = min(img.shape[:2]) 107 | if short_edge < crop_size: 108 | return 109 | yy = int((img.shape[0] - crop_size) / 2) 110 | xx = int((img.shape[1] - crop_size) / 2) 111 | return img[yy: yy + crop_size, xx: xx + crop_size] 112 | 113 | 114 | def over_sample(img): # 12 crops of image 115 | short_edge = min(img.shape[:2]) 116 | if short_edge < crop_size: 117 | return 118 | yy = int((img.shape[0] - crop_size) / 2) 119 | xx = int((img.shape[1] - crop_size) / 2) 120 | sample_list = [img[:crop_size, :crop_size], img[-crop_size:, -crop_size:], img[:crop_size, -crop_size:], 121 | img[-crop_size:, :crop_size], img[yy: yy + crop_size, xx: xx + crop_size], 122 | cv2.resize(img, (crop_size, crop_size))] 123 | return sample_list 124 | 125 | 126 | def mirror_crop(img): # 12*len(size_list) crops 127 | crop_list = [] 128 | img_resize = cv2.resize(img, (base_size, base_size)) 129 | mirror = img_resize[:, ::-1] 130 | crop_list.extend(over_sample(img_resize)) 131 | crop_list.extend(over_sample(mirror)) 132 | return crop_list 133 | 134 | 135 | def multi_crop(img): # 144(12*12) crops 136 | crop_list = [] 137 | size_list = [256, 288, 320, 352] # crop_size: 224 138 | # size_list = [270, 300, 330, 360] # crop_size: 235 139 | # size_list = [320, 352, 384, 416] # crop_size: 299 140 | # size_list = [352, 384, 416, 448] # crop_size: 320 141 | short_edge = min(img.shape[:2]) 142 | for i in size_list: 143 | img_resize = cv2.resize(img, (img.shape[1] * i / short_edge, img.shape[0] * i / short_edge)) 144 | yy = int((img_resize.shape[0] - i) / 2) 145 | xx = int((img_resize.shape[1] - i) / 2) 146 | for j in xrange(3): 147 | left_center_right = img_resize[yy * j: yy * j + i, xx * j: xx * j + i] 148 | mirror = left_center_right[:, ::-1] 149 | crop_list.extend(over_sample(left_center_right)) 150 | crop_list.extend(over_sample(mirror)) 151 | return crop_list 152 | 153 | 154 | def caffe_process(_input): 155 | _input = _input.transpose(0, 3, 1, 2) 156 | net.blobs['data'].reshape(*_input.shape) 157 | net.blobs['data'].data[...] = _input 158 | net.forward() 159 | 160 | return np.sum(net.blobs[prob_layer].data, axis=0) 161 | 162 | 163 | if __name__ == '__main__': 164 | eval_batch() 165 | -------------------------------------------------------------------------------- /det/faster_rcnn/models/pascal_voc/mobilenet/rpn_rcnn_deploys/rcnn_deploy_faster_voc_mobilenet-dw.prototxt: -------------------------------------------------------------------------------- 1 | input: "conv5_5/sep" 2 | input_shape { 3 | dim: 1 4 | dim: 512 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rois" 10 | input_shape { 11 | dim: 300 12 | dim: 5 13 | } 14 | 15 | #============== RCNN =============== 16 | layer { 17 | name: "roi_pool" 18 | type: "ROIPooling" 19 | bottom: "conv5_5/sep" 20 | bottom: "rois" 21 | top: "roi_pool" 22 | roi_pooling_param { 23 | pooled_w: 14 24 | pooled_h: 14 25 | spatial_scale: 0.062500 26 | } 27 | } 28 | 29 | layer { 30 | name: "conv5_6/dw" 31 | type: "ConvolutionDepthwise" 32 | bottom: "roi_pool" 33 | top: "conv5_6/dw" 34 | param { 35 | lr_mult: 1 36 | decay_mult: 1 37 | } 38 | convolution_param { 39 | num_output: 512 40 | bias_term: false 41 | pad: 1 42 | kernel_size: 3 43 | group: 512 44 | #engine: CAFFE 45 | stride: 2 46 | weight_filler { 47 | type: "msra" 48 | } 49 | } 50 | } 51 | layer { 52 | name: "conv5_6/dw/bn" 53 | type: "BatchNorm" 54 | bottom: "conv5_6/dw" 55 | top: "conv5_6/dw" 56 | param { 57 | lr_mult: 0 58 | decay_mult: 0 59 | } 60 | param { 61 | lr_mult: 0 62 | decay_mult: 0 63 | } 64 | param { 65 | lr_mult: 0 66 | decay_mult: 0 67 | } 68 | } 69 | layer { 70 | name: "conv5_6/dw/scale" 71 | type: "Scale" 72 | bottom: "conv5_6/dw" 73 | top: "conv5_6/dw" 74 | scale_param { 75 | filler { 76 | value: 1 77 | } 78 | bias_term: true 79 | bias_filler { 80 | value: 0 81 | } 82 | } 83 | param { 84 | lr_mult: 0.0 85 | decay_mult: 0.0 86 | } 87 | param { 88 | lr_mult: 0.0 89 | decay_mult: 0.0 90 | } 91 | } 92 | layer { 93 | name: "relu5_6/dw" 94 | type: "ReLU" 95 | bottom: "conv5_6/dw" 96 | top: "conv5_6/dw" 97 | } 98 | layer { 99 | name: "conv5_6/sep" 100 | type: "Convolution" 101 | bottom: "conv5_6/dw" 102 | top: "conv5_6/sep" 103 | param { 104 | lr_mult: 1 105 | decay_mult: 1 106 | } 107 | convolution_param { 108 | num_output: 1024 109 | bias_term: false 110 | pad: 0 111 | kernel_size: 1 112 | stride: 1 113 | weight_filler { 114 | type: "msra" 115 | } 116 | } 117 | } 118 | layer { 119 | name: "conv5_6/sep/bn" 120 | type: "BatchNorm" 121 | bottom: "conv5_6/sep" 122 | top: "conv5_6/sep" 123 | param { 124 | lr_mult: 0 125 | decay_mult: 0 126 | } 127 | param { 128 | lr_mult: 0 129 | decay_mult: 0 130 | } 131 | param { 132 | lr_mult: 0 133 | decay_mult: 0 134 | } 135 | } 136 | layer { 137 | name: "conv5_6/sep/scale" 138 | type: "Scale" 139 | bottom: "conv5_6/sep" 140 | top: "conv5_6/sep" 141 | scale_param { 142 | filler { 143 | value: 1 144 | } 145 | bias_term: true 146 | bias_filler { 147 | value: 0 148 | } 149 | } 150 | param { 151 | lr_mult: 0.0 152 | decay_mult: 0.0 153 | } 154 | param { 155 | lr_mult: 0.0 156 | decay_mult: 0.0 157 | } 158 | } 159 | layer { 160 | name: "relu5_6/sep" 161 | type: "ReLU" 162 | bottom: "conv5_6/sep" 163 | top: "conv5_6/sep" 164 | } 165 | layer { 166 | name: "conv6/dw" 167 | type: "ConvolutionDepthwise" 168 | bottom: "conv5_6/sep" 169 | top: "conv6/dw" 170 | param { 171 | lr_mult: 1 172 | decay_mult: 1 173 | } 174 | convolution_param { 175 | num_output: 1024 176 | bias_term: false 177 | pad: 1 178 | kernel_size: 3 179 | group: 1024 180 | #engine: CAFFE 181 | stride: 1 182 | weight_filler { 183 | type: "msra" 184 | } 185 | } 186 | } 187 | layer { 188 | name: "conv6/dw/bn" 189 | type: "BatchNorm" 190 | bottom: "conv6/dw" 191 | top: "conv6/dw" 192 | param { 193 | lr_mult: 0 194 | decay_mult: 0 195 | } 196 | param { 197 | lr_mult: 0 198 | decay_mult: 0 199 | } 200 | param { 201 | lr_mult: 0 202 | decay_mult: 0 203 | } 204 | } 205 | layer { 206 | name: "conv6/dw/scale" 207 | type: "Scale" 208 | bottom: "conv6/dw" 209 | top: "conv6/dw" 210 | scale_param { 211 | filler { 212 | value: 1 213 | } 214 | bias_term: true 215 | bias_filler { 216 | value: 0 217 | } 218 | } 219 | param { 220 | lr_mult: 0.0 221 | decay_mult: 0.0 222 | } 223 | param { 224 | lr_mult: 0.0 225 | decay_mult: 0.0 226 | } 227 | } 228 | layer { 229 | name: "relu6/dw" 230 | type: "ReLU" 231 | bottom: "conv6/dw" 232 | top: "conv6/dw" 233 | } 234 | layer { 235 | name: "conv6/sep" 236 | type: "Convolution" 237 | bottom: "conv6/dw" 238 | top: "conv6/sep" 239 | param { 240 | lr_mult: 1 241 | decay_mult: 1 242 | } 243 | convolution_param { 244 | num_output: 1024 245 | bias_term: false 246 | pad: 0 247 | kernel_size: 1 248 | stride: 1 249 | weight_filler { 250 | type: "msra" 251 | } 252 | } 253 | } 254 | layer { 255 | name: "conv6/sep/bn" 256 | type: "BatchNorm" 257 | bottom: "conv6/sep" 258 | top: "conv6/sep" 259 | param { 260 | lr_mult: 0 261 | decay_mult: 0 262 | } 263 | param { 264 | lr_mult: 0 265 | decay_mult: 0 266 | } 267 | param { 268 | lr_mult: 0 269 | decay_mult: 0 270 | } 271 | } 272 | layer { 273 | name: "conv6/sep/scale" 274 | type: "Scale" 275 | bottom: "conv6/sep" 276 | top: "conv6/sep" 277 | scale_param { 278 | filler { 279 | value: 1 280 | } 281 | bias_term: true 282 | bias_filler { 283 | value: 0 284 | } 285 | } 286 | param { 287 | lr_mult: 0.0 288 | decay_mult: 0.0 289 | } 290 | param { 291 | lr_mult: 0.0 292 | decay_mult: 0.0 293 | } 294 | } 295 | layer { 296 | name: "relu6/sep" 297 | type: "ReLU" 298 | bottom: "conv6/sep" 299 | top: "conv6/sep" 300 | } 301 | layer { 302 | name: "pool6" 303 | type: "Pooling" 304 | bottom: "conv6/sep" 305 | top: "pool6" 306 | pooling_param { 307 | pool: AVE 308 | global_pooling: true 309 | } 310 | } 311 | layer { 312 | name: "cls_score" 313 | type: "InnerProduct" 314 | bottom: "pool6" 315 | top: "cls_score" 316 | param { 317 | lr_mult: 1 318 | decay_mult: 1 319 | } 320 | param { 321 | lr_mult: 2 322 | decay_mult: 0 323 | } 324 | inner_product_param { 325 | num_output: 21 326 | weight_filler { 327 | type: "msra" 328 | std: 0.01 329 | } 330 | bias_filler { 331 | type: "constant" 332 | value: 0 333 | } 334 | } 335 | } 336 | layer { 337 | name: "bbox_pred" 338 | type: "InnerProduct" 339 | bottom: "pool6" 340 | top: "bbox_pred" 341 | param { 342 | lr_mult: 1 343 | decay_mult: 1 344 | } 345 | param { 346 | lr_mult: 2 347 | decay_mult: 0 348 | } 349 | inner_product_param { 350 | num_output: 84 351 | weight_filler { 352 | type: "msra" 353 | std: 0.01 354 | } 355 | bias_filler { 356 | type: "constant" 357 | value: 0 358 | } 359 | } 360 | } 361 | layer { 362 | name: "cls_prob" 363 | type: "Softmax" 364 | bottom: "cls_score" 365 | top: "cls_prob" 366 | } 367 | 368 | -------------------------------------------------------------------------------- /cls/vgg/deploy_vgg16-pytorch.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 224 6 | dim: 224 7 | } 8 | 9 | layer { 10 | name: "conv1_1" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv1_1" 14 | convolution_param { 15 | bias_term: true 16 | num_output: 64 17 | pad: 1 18 | kernel_size: 3 19 | stride: 1 20 | } 21 | } 22 | 23 | layer { 24 | name: "relu1_1" 25 | type: "ReLU" 26 | bottom: "conv1_1" 27 | top: "conv1_1" 28 | } 29 | 30 | layer { 31 | name: "conv1_2" 32 | type: "Convolution" 33 | bottom: "conv1_1" 34 | top: "conv1_2" 35 | convolution_param { 36 | bias_term: true 37 | num_output: 64 38 | pad: 1 39 | kernel_size: 3 40 | stride: 1 41 | } 42 | } 43 | 44 | layer { 45 | name: "relu1_2" 46 | type: "ReLU" 47 | bottom: "conv1_2" 48 | top: "conv1_2" 49 | } 50 | 51 | layer { 52 | name: "pool1" 53 | type: "Pooling" 54 | bottom: "conv1_2" 55 | top: "pool1" 56 | pooling_param { 57 | pool: MAX 58 | kernel_size: 2 59 | stride: 2 60 | } 61 | } 62 | 63 | layer { 64 | name: "conv2_1" 65 | type: "Convolution" 66 | bottom: "pool1" 67 | top: "conv2_1" 68 | convolution_param { 69 | bias_term: true 70 | num_output: 128 71 | pad: 1 72 | kernel_size: 3 73 | stride: 1 74 | } 75 | } 76 | 77 | layer { 78 | name: "relu2_1" 79 | type: "ReLU" 80 | bottom: "conv2_1" 81 | top: "conv2_1" 82 | } 83 | 84 | layer { 85 | name: "conv2_2" 86 | type: "Convolution" 87 | bottom: "conv2_1" 88 | top: "conv2_2" 89 | convolution_param { 90 | bias_term: true 91 | num_output: 128 92 | pad: 1 93 | kernel_size: 3 94 | stride: 1 95 | } 96 | } 97 | 98 | layer { 99 | name: "relu2_2" 100 | type: "ReLU" 101 | bottom: "conv2_2" 102 | top: "conv2_2" 103 | } 104 | 105 | layer { 106 | name: "pool2" 107 | type: "Pooling" 108 | bottom: "conv2_2" 109 | top: "pool2" 110 | pooling_param { 111 | pool: MAX 112 | kernel_size: 2 113 | stride: 2 114 | } 115 | } 116 | 117 | layer { 118 | name: "conv3_1" 119 | type: "Convolution" 120 | bottom: "pool2" 121 | top: "conv3_1" 122 | convolution_param { 123 | bias_term: true 124 | num_output: 256 125 | pad: 1 126 | kernel_size: 3 127 | stride: 1 128 | } 129 | } 130 | 131 | layer { 132 | name: "relu3_1" 133 | type: "ReLU" 134 | bottom: "conv3_1" 135 | top: "conv3_1" 136 | } 137 | 138 | layer { 139 | name: "conv3_2" 140 | type: "Convolution" 141 | bottom: "conv3_1" 142 | top: "conv3_2" 143 | convolution_param { 144 | bias_term: true 145 | num_output: 256 146 | pad: 1 147 | kernel_size: 3 148 | stride: 1 149 | } 150 | } 151 | 152 | layer { 153 | name: "relu3_2" 154 | type: "ReLU" 155 | bottom: "conv3_2" 156 | top: "conv3_2" 157 | } 158 | 159 | layer { 160 | name: "conv3_3" 161 | type: "Convolution" 162 | bottom: "conv3_2" 163 | top: "conv3_3" 164 | convolution_param { 165 | bias_term: true 166 | num_output: 256 167 | pad: 1 168 | kernel_size: 3 169 | stride: 1 170 | } 171 | } 172 | 173 | layer { 174 | name: "relu3_3" 175 | type: "ReLU" 176 | bottom: "conv3_3" 177 | top: "conv3_3" 178 | } 179 | 180 | layer { 181 | name: "pool3" 182 | type: "Pooling" 183 | bottom: "conv3_3" 184 | top: "pool3" 185 | pooling_param { 186 | pool: MAX 187 | kernel_size: 2 188 | stride: 2 189 | } 190 | } 191 | 192 | layer { 193 | name: "conv4_1" 194 | type: "Convolution" 195 | bottom: "pool3" 196 | top: "conv4_1" 197 | convolution_param { 198 | bias_term: true 199 | num_output: 512 200 | pad: 1 201 | kernel_size: 3 202 | stride: 1 203 | } 204 | } 205 | 206 | layer { 207 | name: "relu4_1" 208 | type: "ReLU" 209 | bottom: "conv4_1" 210 | top: "conv4_1" 211 | } 212 | 213 | layer { 214 | name: "conv4_2" 215 | type: "Convolution" 216 | bottom: "conv4_1" 217 | top: "conv4_2" 218 | convolution_param { 219 | bias_term: true 220 | num_output: 512 221 | pad: 1 222 | kernel_size: 3 223 | stride: 1 224 | } 225 | } 226 | 227 | layer { 228 | name: "relu4_2" 229 | type: "ReLU" 230 | bottom: "conv4_2" 231 | top: "conv4_2" 232 | } 233 | 234 | layer { 235 | name: "conv4_3" 236 | type: "Convolution" 237 | bottom: "conv4_2" 238 | top: "conv4_3" 239 | convolution_param { 240 | bias_term: true 241 | num_output: 512 242 | pad: 1 243 | kernel_size: 3 244 | stride: 1 245 | } 246 | } 247 | 248 | layer { 249 | name: "relu4_3" 250 | type: "ReLU" 251 | bottom: "conv4_3" 252 | top: "conv4_3" 253 | } 254 | 255 | layer { 256 | name: "pool4" 257 | type: "Pooling" 258 | bottom: "conv4_3" 259 | top: "pool4" 260 | pooling_param { 261 | pool: MAX 262 | kernel_size: 2 263 | stride: 2 264 | } 265 | } 266 | 267 | layer { 268 | name: "conv5_1" 269 | type: "Convolution" 270 | bottom: "pool4" 271 | top: "conv5_1" 272 | convolution_param { 273 | bias_term: true 274 | num_output: 512 275 | pad: 1 276 | kernel_size: 3 277 | stride: 1 278 | } 279 | } 280 | 281 | layer { 282 | name: "relu5_1" 283 | type: "ReLU" 284 | bottom: "conv5_1" 285 | top: "conv5_1" 286 | } 287 | 288 | layer { 289 | name: "conv5_2" 290 | type: "Convolution" 291 | bottom: "conv5_1" 292 | top: "conv5_2" 293 | convolution_param { 294 | bias_term: true 295 | num_output: 512 296 | pad: 1 297 | kernel_size: 3 298 | stride: 1 299 | } 300 | } 301 | 302 | layer { 303 | name: "relu5_2" 304 | type: "ReLU" 305 | bottom: "conv5_2" 306 | top: "conv5_2" 307 | } 308 | 309 | layer { 310 | name: "conv5_3" 311 | type: "Convolution" 312 | bottom: "conv5_2" 313 | top: "conv5_3" 314 | convolution_param { 315 | bias_term: true 316 | num_output: 512 317 | pad: 1 318 | kernel_size: 3 319 | stride: 1 320 | } 321 | } 322 | 323 | layer { 324 | name: "relu5_3" 325 | type: "ReLU" 326 | bottom: "conv5_3" 327 | top: "conv5_3" 328 | } 329 | 330 | layer { 331 | name: "pool5" 332 | type: "Pooling" 333 | bottom: "conv5_3" 334 | top: "pool5" 335 | pooling_param { 336 | pool: MAX 337 | kernel_size: 2 338 | stride: 2 339 | } 340 | } 341 | 342 | layer { 343 | bottom: "pool5" 344 | top: "fc6" 345 | name: "fc6" 346 | type: "InnerProduct" 347 | inner_product_param { 348 | num_output: 4096 349 | } 350 | } 351 | 352 | layer { 353 | name: "relu6" 354 | type: "ReLU" 355 | bottom: "fc6" 356 | top: "fc6" 357 | } 358 | 359 | layer { 360 | name: "dropout6" 361 | type: "Dropout" 362 | bottom: "fc6" 363 | top: "fc6" 364 | dropout_param { 365 | dropout_ratio: 0.5 366 | } 367 | } 368 | 369 | layer { 370 | bottom: "fc6" 371 | top: "fc7" 372 | name: "fc7" 373 | type: "InnerProduct" 374 | inner_product_param { 375 | num_output: 4096 376 | } 377 | } 378 | 379 | layer { 380 | name: "relu7" 381 | type: "ReLU" 382 | bottom: "fc7" 383 | top: "fc7" 384 | } 385 | 386 | layer { 387 | name: "dropout7" 388 | type: "Dropout" 389 | bottom: "fc7" 390 | top: "fc7" 391 | dropout_param { 392 | dropout_ratio: 0.5 393 | } 394 | } 395 | 396 | layer { 397 | bottom: "fc7" 398 | top: "classifier" 399 | name: "classifier" 400 | type: "InnerProduct" 401 | inner_product_param { 402 | num_output: 1000 403 | } 404 | } 405 | 406 | layer { 407 | name: "prob" 408 | type: "Softmax" 409 | bottom: "classifier" 410 | top: "prob" 411 | } 412 | -------------------------------------------------------------------------------- /cls/vgg/deploy_vgg16-tf.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 224 6 | dim: 224 7 | } 8 | 9 | layer { 10 | name: "conv1_1" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv1_1" 14 | convolution_param { 15 | bias_term: true 16 | num_output: 64 17 | pad: 1 18 | kernel_size: 3 19 | stride: 1 20 | } 21 | } 22 | 23 | layer { 24 | name: "relu1_1" 25 | type: "ReLU" 26 | bottom: "conv1_1" 27 | top: "conv1_1" 28 | } 29 | 30 | layer { 31 | name: "conv1_2" 32 | type: "Convolution" 33 | bottom: "conv1_1" 34 | top: "conv1_2" 35 | convolution_param { 36 | bias_term: true 37 | num_output: 64 38 | pad: 1 39 | kernel_size: 3 40 | stride: 1 41 | } 42 | } 43 | 44 | layer { 45 | name: "relu1_2" 46 | type: "ReLU" 47 | bottom: "conv1_2" 48 | top: "conv1_2" 49 | } 50 | 51 | layer { 52 | name: "pool1" 53 | type: "Pooling" 54 | bottom: "conv1_2" 55 | top: "pool1" 56 | pooling_param { 57 | pool: MAX 58 | kernel_size: 2 59 | stride: 2 60 | } 61 | } 62 | 63 | layer { 64 | name: "conv2_1" 65 | type: "Convolution" 66 | bottom: "pool1" 67 | top: "conv2_1" 68 | convolution_param { 69 | bias_term: true 70 | num_output: 128 71 | pad: 1 72 | kernel_size: 3 73 | stride: 1 74 | } 75 | } 76 | 77 | layer { 78 | name: "relu2_1" 79 | type: "ReLU" 80 | bottom: "conv2_1" 81 | top: "conv2_1" 82 | } 83 | 84 | layer { 85 | name: "conv2_2" 86 | type: "Convolution" 87 | bottom: "conv2_1" 88 | top: "conv2_2" 89 | convolution_param { 90 | bias_term: true 91 | num_output: 128 92 | pad: 1 93 | kernel_size: 3 94 | stride: 1 95 | } 96 | } 97 | 98 | layer { 99 | name: "relu2_2" 100 | type: "ReLU" 101 | bottom: "conv2_2" 102 | top: "conv2_2" 103 | } 104 | 105 | layer { 106 | name: "pool2" 107 | type: "Pooling" 108 | bottom: "conv2_2" 109 | top: "pool2" 110 | pooling_param { 111 | pool: MAX 112 | kernel_size: 2 113 | stride: 2 114 | } 115 | } 116 | 117 | layer { 118 | name: "conv3_1" 119 | type: "Convolution" 120 | bottom: "pool2" 121 | top: "conv3_1" 122 | convolution_param { 123 | bias_term: true 124 | num_output: 256 125 | pad: 1 126 | kernel_size: 3 127 | stride: 1 128 | } 129 | } 130 | 131 | layer { 132 | name: "relu3_1" 133 | type: "ReLU" 134 | bottom: "conv3_1" 135 | top: "conv3_1" 136 | } 137 | 138 | layer { 139 | name: "conv3_2" 140 | type: "Convolution" 141 | bottom: "conv3_1" 142 | top: "conv3_2" 143 | convolution_param { 144 | bias_term: true 145 | num_output: 256 146 | pad: 1 147 | kernel_size: 3 148 | stride: 1 149 | } 150 | } 151 | 152 | layer { 153 | name: "relu3_2" 154 | type: "ReLU" 155 | bottom: "conv3_2" 156 | top: "conv3_2" 157 | } 158 | 159 | layer { 160 | name: "conv3_3" 161 | type: "Convolution" 162 | bottom: "conv3_2" 163 | top: "conv3_3" 164 | convolution_param { 165 | bias_term: true 166 | num_output: 256 167 | pad: 1 168 | kernel_size: 3 169 | stride: 1 170 | } 171 | } 172 | 173 | layer { 174 | name: "relu3_3" 175 | type: "ReLU" 176 | bottom: "conv3_3" 177 | top: "conv3_3" 178 | } 179 | 180 | layer { 181 | name: "pool3" 182 | type: "Pooling" 183 | bottom: "conv3_3" 184 | top: "pool3" 185 | pooling_param { 186 | pool: MAX 187 | kernel_size: 2 188 | stride: 2 189 | } 190 | } 191 | 192 | layer { 193 | name: "conv4_1" 194 | type: "Convolution" 195 | bottom: "pool3" 196 | top: "conv4_1" 197 | convolution_param { 198 | bias_term: true 199 | num_output: 512 200 | pad: 1 201 | kernel_size: 3 202 | stride: 1 203 | } 204 | } 205 | 206 | layer { 207 | name: "relu4_1" 208 | type: "ReLU" 209 | bottom: "conv4_1" 210 | top: "conv4_1" 211 | } 212 | 213 | layer { 214 | name: "conv4_2" 215 | type: "Convolution" 216 | bottom: "conv4_1" 217 | top: "conv4_2" 218 | convolution_param { 219 | bias_term: true 220 | num_output: 512 221 | pad: 1 222 | kernel_size: 3 223 | stride: 1 224 | } 225 | } 226 | 227 | layer { 228 | name: "relu4_2" 229 | type: "ReLU" 230 | bottom: "conv4_2" 231 | top: "conv4_2" 232 | } 233 | 234 | layer { 235 | name: "conv4_3" 236 | type: "Convolution" 237 | bottom: "conv4_2" 238 | top: "conv4_3" 239 | convolution_param { 240 | bias_term: true 241 | num_output: 512 242 | pad: 1 243 | kernel_size: 3 244 | stride: 1 245 | } 246 | } 247 | 248 | layer { 249 | name: "relu4_3" 250 | type: "ReLU" 251 | bottom: "conv4_3" 252 | top: "conv4_3" 253 | } 254 | 255 | layer { 256 | name: "pool4" 257 | type: "Pooling" 258 | bottom: "conv4_3" 259 | top: "pool4" 260 | pooling_param { 261 | pool: MAX 262 | kernel_size: 2 263 | stride: 2 264 | } 265 | } 266 | 267 | layer { 268 | name: "conv5_1" 269 | type: "Convolution" 270 | bottom: "pool4" 271 | top: "conv5_1" 272 | convolution_param { 273 | bias_term: true 274 | num_output: 512 275 | pad: 1 276 | kernel_size: 3 277 | stride: 1 278 | } 279 | } 280 | 281 | layer { 282 | name: "relu5_1" 283 | type: "ReLU" 284 | bottom: "conv5_1" 285 | top: "conv5_1" 286 | } 287 | 288 | layer { 289 | name: "conv5_2" 290 | type: "Convolution" 291 | bottom: "conv5_1" 292 | top: "conv5_2" 293 | convolution_param { 294 | bias_term: true 295 | num_output: 512 296 | pad: 1 297 | kernel_size: 3 298 | stride: 1 299 | } 300 | } 301 | 302 | layer { 303 | name: "relu5_2" 304 | type: "ReLU" 305 | bottom: "conv5_2" 306 | top: "conv5_2" 307 | } 308 | 309 | layer { 310 | name: "conv5_3" 311 | type: "Convolution" 312 | bottom: "conv5_2" 313 | top: "conv5_3" 314 | convolution_param { 315 | bias_term: true 316 | num_output: 512 317 | pad: 1 318 | kernel_size: 3 319 | stride: 1 320 | } 321 | } 322 | 323 | layer { 324 | name: "relu5_3" 325 | type: "ReLU" 326 | bottom: "conv5_3" 327 | top: "conv5_3" 328 | } 329 | 330 | layer { 331 | name: "pool5" 332 | type: "Pooling" 333 | bottom: "conv5_3" 334 | top: "pool5" 335 | pooling_param { 336 | pool: MAX 337 | kernel_size: 2 338 | stride: 2 339 | } 340 | } 341 | 342 | layer { 343 | name: "fc6" 344 | type: "Convolution" 345 | bottom: "pool5" 346 | top: "fc6" 347 | convolution_param { 348 | bias_term: true 349 | num_output: 4096 350 | kernel_size: 7 351 | stride: 1 352 | } 353 | } 354 | 355 | layer { 356 | name: "relu6" 357 | type: "ReLU" 358 | bottom: "fc6" 359 | top: "fc6" 360 | } 361 | 362 | layer { 363 | name: "dropout6" 364 | type: "Dropout" 365 | bottom: "fc6" 366 | top: "fc6" 367 | dropout_param { 368 | dropout_ratio: 0.5 369 | } 370 | } 371 | 372 | layer { 373 | name: "fc7" 374 | type: "Convolution" 375 | bottom: "fc6" 376 | top: "fc7" 377 | convolution_param { 378 | bias_term: true 379 | num_output: 4096 380 | kernel_size: 1 381 | stride: 1 382 | } 383 | } 384 | 385 | layer { 386 | name: "relu7" 387 | type: "ReLU" 388 | bottom: "fc7" 389 | top: "fc7" 390 | } 391 | 392 | layer { 393 | name: "dropout7" 394 | type: "Dropout" 395 | bottom: "fc7" 396 | top: "fc7" 397 | dropout_param { 398 | dropout_ratio: 0.5 399 | } 400 | } 401 | 402 | layer { 403 | name: "fc8" 404 | type: "Convolution" 405 | bottom: "fc7" 406 | top: "fc8" 407 | convolution_param { 408 | bias_term: true 409 | num_output: 1000 410 | kernel_size: 1 411 | stride: 1 412 | } 413 | } 414 | 415 | layer { 416 | name: "reshape" 417 | type: "Reshape" 418 | bottom: "fc8" 419 | top: "reshape" 420 | reshape_param { 421 | shape { 422 | dim: 0 423 | dim: 0 424 | } 425 | } 426 | } 427 | 428 | layer { 429 | name: "prob" 430 | type: "Softmax" 431 | bottom: "reshape" 432 | top: "prob" 433 | } -------------------------------------------------------------------------------- /det/faster_rcnn/models/pascal_voc/xception/rpn_rcnn_deploys/rcnn_deploy_faster_voc_xception-dw-merge-aligned.prototxt: -------------------------------------------------------------------------------- 1 | input: "xception11_elewise" 2 | input_shape { 3 | dim: 1 4 | dim: 728 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rois" 10 | input_shape { 11 | dim: 300 12 | dim: 5 13 | } 14 | 15 | #============== RCNN =============== 16 | layer { 17 | name: "roi_pool" 18 | type: "ROIPooling" 19 | bottom: "xception11_elewise" 20 | bottom: "rois" 21 | top: "roi_pool" 22 | roi_pooling_param { 23 | pooled_w: 8 24 | pooled_h: 8 25 | spatial_scale: 0.062500 26 | } 27 | } 28 | layer { 29 | name: "xception12_match_conv" 30 | type: "Convolution" 31 | bottom: "roi_pool" 32 | top: "xception12_match_conv" 33 | param { 34 | lr_mult: 1 35 | decay_mult: 1 36 | } 37 | convolution_param { 38 | bias_term: false 39 | num_output: 1024 40 | pad: 0 41 | kernel_size: 1 42 | stride: 1 43 | } 44 | } 45 | layer { 46 | name: "xception12_match_conv_scale" 47 | type: "Scale" 48 | bottom: "xception12_match_conv" 49 | top: "xception12_match_conv" 50 | scale_param { 51 | bias_term: true 52 | } 53 | param { 54 | lr_mult: 0.0 55 | decay_mult: 0.0 56 | } 57 | param { 58 | lr_mult: 0.0 59 | decay_mult: 0.0 60 | } 61 | } 62 | layer { 63 | name: "xception12_relu" 64 | type: "ReLU" 65 | bottom: "roi_pool" 66 | top: "xception12_relu" 67 | } 68 | layer { 69 | name: "xception12_conv1_1" 70 | type: "ConvolutionDepthwise" 71 | bottom: "xception12_relu" 72 | top: "xception12_conv1_1" 73 | param { 74 | lr_mult: 1 75 | decay_mult: 1 76 | } 77 | convolution_param { 78 | bias_term: false 79 | num_output: 728 80 | group: 728 81 | pad: 1 82 | kernel_size: 3 83 | stride: 1 84 | } 85 | } 86 | layer { 87 | name: "xception12_conv1_2" 88 | type: "Convolution" 89 | bottom: "xception12_conv1_1" 90 | top: "xception12_conv1_2" 91 | param { 92 | lr_mult: 1 93 | decay_mult: 1 94 | } 95 | convolution_param { 96 | bias_term: false 97 | num_output: 728 98 | pad: 0 99 | kernel_size: 1 100 | stride: 1 101 | } 102 | } 103 | layer { 104 | name: "xception12_conv1_scale" 105 | type: "Scale" 106 | bottom: "xception12_conv1_2" 107 | top: "xception12_conv1_2" 108 | scale_param { 109 | bias_term: true 110 | } 111 | param { 112 | lr_mult: 0.0 113 | decay_mult: 0.0 114 | } 115 | param { 116 | lr_mult: 0.0 117 | decay_mult: 0.0 118 | } 119 | } 120 | layer { 121 | name: "xception12_conv1_relu" 122 | type: "ReLU" 123 | bottom: "xception12_conv1_2" 124 | top: "xception12_conv1_2" 125 | } 126 | layer { 127 | name: "xception12_conv2_1" 128 | type: "ConvolutionDepthwise" 129 | bottom: "xception12_conv1_2" 130 | top: "xception12_conv2_1" 131 | param { 132 | lr_mult: 1 133 | decay_mult: 1 134 | } 135 | convolution_param { 136 | bias_term: false 137 | num_output: 728 138 | group: 728 139 | pad: 1 140 | kernel_size: 3 141 | stride: 1 142 | } 143 | } 144 | layer { 145 | name: "xception12_conv2_2" 146 | type: "Convolution" 147 | bottom: "xception12_conv2_1" 148 | top: "xception12_conv2_2" 149 | param { 150 | lr_mult: 1 151 | decay_mult: 1 152 | } 153 | convolution_param { 154 | bias_term: false 155 | num_output: 1024 156 | pad: 0 157 | kernel_size: 1 158 | stride: 1 159 | } 160 | } 161 | layer { 162 | name: "xception12_conv2_scale" 163 | type: "Scale" 164 | bottom: "xception12_conv2_2" 165 | top: "xception12_conv2_2" 166 | scale_param { 167 | bias_term: true 168 | } 169 | param { 170 | lr_mult: 0.0 171 | decay_mult: 0.0 172 | } 173 | param { 174 | lr_mult: 0.0 175 | decay_mult: 0.0 176 | } 177 | } 178 | layer { 179 | name: "xception12_pool" 180 | type: "Pooling" 181 | bottom: "xception12_conv2_2" 182 | top: "xception12_pool" 183 | pooling_param { 184 | pool: MAX 185 | kernel_size: 3 186 | stride: 1 187 | pad: 1 188 | ceil_mode: false 189 | } 190 | } 191 | layer { 192 | name: "xception12_elewise" 193 | type: "Eltwise" 194 | bottom: "xception12_match_conv" 195 | bottom: "xception12_pool" 196 | top: "xception12_elewise" 197 | eltwise_param { 198 | operation: SUM 199 | } 200 | } 201 | layer { 202 | name: "conv3_1" 203 | type: "ConvolutionDepthwise" 204 | bottom: "xception12_elewise" 205 | top: "conv3_1" 206 | param { 207 | lr_mult: 1 208 | decay_mult: 1 209 | } 210 | convolution_param { 211 | bias_term: false 212 | num_output: 1024 213 | group: 1024 214 | pad: 1 215 | kernel_size: 3 216 | stride: 1 217 | } 218 | } 219 | layer { 220 | name: "conv3_2" 221 | type: "Convolution" 222 | bottom: "conv3_1" 223 | top: "conv3_2" 224 | param { 225 | lr_mult: 1 226 | decay_mult: 1 227 | } 228 | convolution_param { 229 | bias_term: false 230 | num_output: 1536 231 | pad: 0 232 | kernel_size: 1 233 | stride: 1 234 | } 235 | } 236 | layer { 237 | name: "conv3_scale" 238 | type: "Scale" 239 | bottom: "conv3_2" 240 | top: "conv3_2" 241 | scale_param { 242 | bias_term: true 243 | } 244 | param { 245 | lr_mult: 0.0 246 | decay_mult: 0.0 247 | } 248 | param { 249 | lr_mult: 0.0 250 | decay_mult: 0.0 251 | } 252 | } 253 | layer { 254 | name: "conv3_relu" 255 | type: "ReLU" 256 | bottom: "conv3_2" 257 | top: "conv3_2" 258 | } 259 | layer { 260 | name: "conv4_1" 261 | type: "ConvolutionDepthwise" 262 | bottom: "conv3_2" 263 | top: "conv4_1" 264 | param { 265 | lr_mult: 1 266 | decay_mult: 1 267 | } 268 | convolution_param { 269 | bias_term: false 270 | num_output: 1536 271 | group: 1536 272 | pad: 1 273 | kernel_size: 3 274 | stride: 1 275 | } 276 | } 277 | layer { 278 | name: "conv4_2" 279 | type: "Convolution" 280 | bottom: "conv4_1" 281 | top: "conv4_2" 282 | param { 283 | lr_mult: 1 284 | decay_mult: 1 285 | } 286 | convolution_param { 287 | bias_term: false 288 | num_output: 2048 289 | pad: 0 290 | kernel_size: 1 291 | stride: 1 292 | } 293 | } 294 | layer { 295 | name: "conv4_scale" 296 | type: "Scale" 297 | bottom: "conv4_2" 298 | top: "conv4_2" 299 | scale_param { 300 | bias_term: true 301 | } 302 | param { 303 | lr_mult: 0.0 304 | decay_mult: 0.0 305 | } 306 | param { 307 | lr_mult: 0.0 308 | decay_mult: 0.0 309 | } 310 | } 311 | layer { 312 | name: "conv4_relu" 313 | type: "ReLU" 314 | bottom: "conv4_2" 315 | top: "conv4_2" 316 | } 317 | layer { 318 | name: "pool_ave" 319 | type: "Pooling" 320 | bottom: "conv4_2" 321 | top: "pool_ave" 322 | pooling_param { 323 | global_pooling : true 324 | pool: AVE 325 | } 326 | } 327 | layer { 328 | name: "cls_score" 329 | type: "InnerProduct" 330 | bottom: "pool_ave" 331 | top: "cls_score" 332 | param { 333 | lr_mult: 1 334 | decay_mult: 1 335 | } 336 | param { 337 | lr_mult: 2 338 | decay_mult: 0 339 | } 340 | inner_product_param { 341 | num_output: 21 342 | weight_filler { 343 | type: "msra" 344 | std: 0.01 345 | } 346 | bias_filler { 347 | type: "constant" 348 | value: 0 349 | } 350 | } 351 | } 352 | layer { 353 | name: "bbox_pred" 354 | type: "InnerProduct" 355 | bottom: "pool_ave" 356 | top: "bbox_pred" 357 | param { 358 | lr_mult: 1 359 | decay_mult: 1 360 | } 361 | param { 362 | lr_mult: 2 363 | decay_mult: 0 364 | } 365 | inner_product_param { 366 | num_output: 84 367 | weight_filler { 368 | type: "msra" 369 | std: 0.01 370 | } 371 | bias_filler { 372 | type: "constant" 373 | value: 0 374 | } 375 | } 376 | } 377 | layer { 378 | name: "cls_prob" 379 | type: "Softmax" 380 | bottom: "cls_score" 381 | top: "cls_prob" 382 | } 383 | 384 | 385 | -------------------------------------------------------------------------------- /cls/vgg/deploy_vgg19-pytorch.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 224 6 | dim: 224 7 | } 8 | 9 | layer { 10 | name: "conv1_1" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv1_1" 14 | convolution_param { 15 | bias_term: true 16 | num_output: 64 17 | pad: 1 18 | kernel_size: 3 19 | stride: 1 20 | } 21 | } 22 | layer { 23 | name: "relu1_1" 24 | type: "ReLU" 25 | bottom: "conv1_1" 26 | top: "conv1_1" 27 | } 28 | 29 | layer { 30 | name: "conv1_2" 31 | type: "Convolution" 32 | bottom: "conv1_1" 33 | top: "conv1_2" 34 | convolution_param { 35 | bias_term: true 36 | num_output: 64 37 | pad: 1 38 | kernel_size: 3 39 | stride: 1 40 | } 41 | } 42 | layer { 43 | name: "relu1_2" 44 | type: "ReLU" 45 | bottom: "conv1_2" 46 | top: "conv1_2" 47 | } 48 | 49 | layer { 50 | name: "pool1" 51 | type: "Pooling" 52 | bottom: "conv1_2" 53 | top: "pool1" 54 | pooling_param { 55 | pool: MAX 56 | kernel_size: 2 57 | stride: 2 58 | } 59 | } 60 | 61 | layer { 62 | name: "conv2_1" 63 | type: "Convolution" 64 | bottom: "pool1" 65 | top: "conv2_1" 66 | convolution_param { 67 | bias_term: true 68 | num_output: 128 69 | pad: 1 70 | kernel_size: 3 71 | stride: 1 72 | } 73 | } 74 | layer { 75 | name: "relu2_1" 76 | type: "ReLU" 77 | bottom: "conv2_1" 78 | top: "conv2_1" 79 | } 80 | 81 | layer { 82 | name: "conv2_2" 83 | type: "Convolution" 84 | bottom: "conv2_1" 85 | top: "conv2_2" 86 | convolution_param { 87 | bias_term: true 88 | num_output: 128 89 | pad: 1 90 | kernel_size: 3 91 | stride: 1 92 | } 93 | } 94 | layer { 95 | name: "relu2_2" 96 | type: "ReLU" 97 | bottom: "conv2_2" 98 | top: "conv2_2" 99 | } 100 | 101 | layer { 102 | name: "pool2" 103 | type: "Pooling" 104 | bottom: "conv2_2" 105 | top: "pool2" 106 | pooling_param { 107 | pool: MAX 108 | kernel_size: 2 109 | stride: 2 110 | } 111 | } 112 | 113 | layer { 114 | name: "conv3_1" 115 | type: "Convolution" 116 | bottom: "pool2" 117 | top: "conv3_1" 118 | convolution_param { 119 | bias_term: true 120 | num_output: 256 121 | pad: 1 122 | kernel_size: 3 123 | stride: 1 124 | } 125 | } 126 | layer { 127 | name: "relu3_1" 128 | type: "ReLU" 129 | bottom: "conv3_1" 130 | top: "conv3_1" 131 | } 132 | 133 | layer { 134 | name: "conv3_2" 135 | type: "Convolution" 136 | bottom: "conv3_1" 137 | top: "conv3_2" 138 | convolution_param { 139 | bias_term: true 140 | num_output: 256 141 | pad: 1 142 | kernel_size: 3 143 | stride: 1 144 | } 145 | } 146 | layer { 147 | name: "relu3_2" 148 | type: "ReLU" 149 | bottom: "conv3_2" 150 | top: "conv3_2" 151 | } 152 | 153 | layer { 154 | name: "conv3_3" 155 | type: "Convolution" 156 | bottom: "conv3_2" 157 | top: "conv3_3" 158 | convolution_param { 159 | bias_term: true 160 | num_output: 256 161 | pad: 1 162 | kernel_size: 3 163 | stride: 1 164 | } 165 | } 166 | layer { 167 | name: "relu3_3" 168 | type: "ReLU" 169 | bottom: "conv3_3" 170 | top: "conv3_3" 171 | } 172 | layer { 173 | name: "conv3_4" 174 | type: "Convolution" 175 | bottom: "conv3_3" 176 | top: "conv3_4" 177 | convolution_param { 178 | bias_term: true 179 | num_output: 256 180 | pad: 1 181 | kernel_size: 3 182 | stride: 1 183 | } 184 | } 185 | layer { 186 | name: "relu3_4" 187 | type: "ReLU" 188 | bottom: "conv3_4" 189 | top: "conv3_4" 190 | } 191 | 192 | layer { 193 | name: "pool3" 194 | type: "Pooling" 195 | bottom: "conv3_4" 196 | top: "pool3" 197 | pooling_param { 198 | pool: MAX 199 | kernel_size: 2 200 | stride: 2 201 | } 202 | } 203 | 204 | layer { 205 | name: "conv4_1" 206 | type: "Convolution" 207 | bottom: "pool3" 208 | top: "conv4_1" 209 | convolution_param { 210 | bias_term: true 211 | num_output: 512 212 | pad: 1 213 | kernel_size: 3 214 | stride: 1 215 | } 216 | } 217 | layer { 218 | name: "relu4_1" 219 | type: "ReLU" 220 | bottom: "conv4_1" 221 | top: "conv4_1" 222 | } 223 | 224 | layer { 225 | name: "conv4_2" 226 | type: "Convolution" 227 | bottom: "conv4_1" 228 | top: "conv4_2" 229 | convolution_param { 230 | bias_term: true 231 | num_output: 512 232 | pad: 1 233 | kernel_size: 3 234 | stride: 1 235 | } 236 | } 237 | layer { 238 | name: "relu4_2" 239 | type: "ReLU" 240 | bottom: "conv4_2" 241 | top: "conv4_2" 242 | } 243 | 244 | layer { 245 | name: "conv4_3" 246 | type: "Convolution" 247 | bottom: "conv4_2" 248 | top: "conv4_3" 249 | convolution_param { 250 | bias_term: true 251 | num_output: 512 252 | pad: 1 253 | kernel_size: 3 254 | stride: 1 255 | } 256 | } 257 | layer { 258 | name: "relu4_3" 259 | type: "ReLU" 260 | bottom: "conv4_3" 261 | top: "conv4_3" 262 | } 263 | 264 | layer { 265 | name: "conv4_4" 266 | type: "Convolution" 267 | bottom: "conv4_3" 268 | top: "conv4_4" 269 | convolution_param { 270 | bias_term: true 271 | num_output: 512 272 | pad: 1 273 | kernel_size: 3 274 | stride: 1 275 | } 276 | } 277 | layer { 278 | name: "relu4_4" 279 | type: "ReLU" 280 | bottom: "conv4_4" 281 | top: "conv4_4" 282 | } 283 | 284 | layer { 285 | name: "pool4" 286 | type: "Pooling" 287 | bottom: "conv4_4" 288 | top: "pool4" 289 | pooling_param { 290 | pool: MAX 291 | kernel_size: 2 292 | stride: 2 293 | } 294 | } 295 | 296 | layer { 297 | name: "conv5_1" 298 | type: "Convolution" 299 | bottom: "pool4" 300 | top: "conv5_1" 301 | convolution_param { 302 | bias_term: true 303 | num_output: 512 304 | pad: 1 305 | kernel_size: 3 306 | stride: 1 307 | } 308 | } 309 | layer { 310 | name: "relu5_1" 311 | type: "ReLU" 312 | bottom: "conv5_1" 313 | top: "conv5_1" 314 | } 315 | 316 | layer { 317 | name: "conv5_2" 318 | type: "Convolution" 319 | bottom: "conv5_1" 320 | top: "conv5_2" 321 | convolution_param { 322 | bias_term: true 323 | num_output: 512 324 | pad: 1 325 | kernel_size: 3 326 | stride: 1 327 | } 328 | } 329 | layer { 330 | name: "relu5_2" 331 | type: "ReLU" 332 | bottom: "conv5_2" 333 | top: "conv5_2" 334 | } 335 | 336 | layer { 337 | name: "conv5_3" 338 | type: "Convolution" 339 | bottom: "conv5_2" 340 | top: "conv5_3" 341 | convolution_param { 342 | bias_term: true 343 | num_output: 512 344 | pad: 1 345 | kernel_size: 3 346 | stride: 1 347 | } 348 | } 349 | layer { 350 | name: "relu5_3" 351 | type: "ReLU" 352 | bottom: "conv5_3" 353 | top: "conv5_3" 354 | } 355 | 356 | layer { 357 | name: "conv5_4" 358 | type: "Convolution" 359 | bottom: "conv5_3" 360 | top: "conv5_4" 361 | convolution_param { 362 | bias_term: true 363 | num_output: 512 364 | pad: 1 365 | kernel_size: 3 366 | stride: 1 367 | } 368 | } 369 | layer { 370 | name: "relu5_4" 371 | type: "ReLU" 372 | bottom: "conv5_4" 373 | top: "conv5_4" 374 | } 375 | 376 | layer { 377 | name: "pool5" 378 | type: "Pooling" 379 | bottom: "conv5_4" 380 | top: "pool5" 381 | pooling_param { 382 | pool: MAX 383 | kernel_size: 2 384 | stride: 2 385 | } 386 | } 387 | 388 | layer { 389 | bottom: "pool5" 390 | top: "fc6" 391 | name: "fc6" 392 | type: "InnerProduct" 393 | inner_product_param { 394 | num_output: 4096 395 | } 396 | } 397 | 398 | layer { 399 | name: "relu6" 400 | type: "ReLU" 401 | bottom: "fc6" 402 | top: "fc6" 403 | } 404 | 405 | layer { 406 | name: "dropout6" 407 | type: "Dropout" 408 | bottom: "fc6" 409 | top: "fc6" 410 | dropout_param { 411 | dropout_ratio: 0.5 412 | } 413 | } 414 | 415 | layer { 416 | bottom: "fc6" 417 | top: "fc7" 418 | name: "fc7" 419 | type: "InnerProduct" 420 | inner_product_param { 421 | num_output: 4096 422 | } 423 | } 424 | 425 | layer { 426 | name: "relu7" 427 | type: "ReLU" 428 | bottom: "fc7" 429 | top: "fc7" 430 | } 431 | 432 | layer { 433 | name: "dropout7" 434 | type: "Dropout" 435 | bottom: "fc7" 436 | top: "fc7" 437 | dropout_param { 438 | dropout_ratio: 0.5 439 | } 440 | } 441 | 442 | layer { 443 | bottom: "fc7" 444 | top: "classifier" 445 | name: "classifier" 446 | type: "InnerProduct" 447 | inner_product_param { 448 | num_output: 1000 449 | } 450 | } 451 | 452 | layer { 453 | name: "prob" 454 | type: "Softmax" 455 | bottom: "classifier" 456 | top: "prob" 457 | } 458 | -------------------------------------------------------------------------------- /seg/pspnet/tools/image_seg_data.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import sys 3 | 4 | sys.path.append('/home/prmct/workspace/py-RFCN-priv/caffe-priv/python') 5 | 6 | import caffe 7 | 8 | import numpy as np 9 | 10 | 11 | class ImageSegDataLayer(caffe.Layer): 12 | def get_gpu_id(self, gpu_id=0): 13 | self.gpu_id = gpu_id 14 | if self.shuffle: 15 | np.random.seed(self.gpu_id) 16 | np.random.shuffle(self.indices) 17 | 18 | def setup(self, bottom, top): 19 | print self.param_str 20 | params = eval(self.param_str) 21 | 22 | self.color_factor = np.array(params.get('color_factor', (0.95, 1.05))) # (0.95, 1.05) 23 | self.contrast_factor = np.array(params.get('contrast_factor', (0.95, 1.05))) # (0.95, 1.05) 24 | self.brightness_factor = np.array(params.get('brightness_factor', (0.95, 1.05))) # (0.95, 1.05) 25 | self.mirror = params.get('mirror', True) 26 | self.gaussian_blur = params.get('gaussian_blur', True) 27 | self.scale_factor = np.array(params.get('scale_factor', (0.75, 2.0))) # (0.75, 2.0) 28 | self.rotation_factor = np.array(params.get('rotation_factor', (-10, 10))) # (-10, 10) 29 | 30 | self.crop_size = int(params.get('crop_size', 513)) 31 | self.ignore_label = int(params.get('ignore_label', 255)) 32 | self.mean = np.array(params.get('mean', (102.98, 115.947, 122.772)), dtype=np.float32) 33 | self.scale = float(params.get('scale', 1.0)) 34 | 35 | self.root_dir = params['root_dir'] 36 | self.source = params['source'] 37 | self.batch_size = int(params.get('batch_size', 1)) 38 | self.shuffle = params.get('shuffle', True) 39 | 40 | if len(top) != 2: 41 | raise Exception("Need to define two tops: data and label.") 42 | if len(bottom) != 0: 43 | raise Exception("Do not define a bottom.") # data layers have no bottoms 44 | if len(self.color_factor) != 2: 45 | raise Exception("'color_factor' must have 2 values for factor range.") 46 | if len(self.contrast_factor) != 2: 47 | raise Exception("'contrast_factor' must have 2 values for factor range.") 48 | if len(self.brightness_factor) != 2: 49 | raise Exception("'brightness_factor' must have 2 values for factor range.") 50 | if len(self.mean) != 3: 51 | raise Exception("'mean' must have 3 values for B G R.") 52 | if len(self.scale_factor) != 2: 53 | raise Exception("'scale_factor' must have 2 values for factor range.") 54 | if self.crop_size <= 0: 55 | raise Exception("'Need positive crop_size.") 56 | 57 | self.indices = open(self.source, 'r').read().splitlines() 58 | self.epoch_num = len(self.indices) 59 | self.idx = 0 60 | 61 | def reshape(self, bottom, top): 62 | top[0].reshape(self.batch_size, 3, self.crop_size, self.crop_size) # for images 63 | top[1].reshape(self.batch_size, 1, self.crop_size, self.crop_size) # for labels 64 | 65 | def forward(self, bottom, top): 66 | batch_img = [] 67 | batch_label = [] 68 | for _ in xrange(self.batch_size): 69 | _img = cv2.imread('{}{}'.format(self.root_dir, self.indices[self.idx].split(' ')[0])) 70 | _label = cv2.imread('{}{}'.format(self.root_dir, self.indices[self.idx].split(' ')[1]), 0) 71 | 72 | if _img.shape[:2] != _label.shape: 73 | raise Exception("Need to define two tops: data and label.") 74 | 75 | aug_img, aug_label = self.augmentation(_img, _label) 76 | batch_img.append(aug_img.transpose((2, 0, 1))) 77 | batch_label.append([aug_label]) 78 | 79 | self.idx += 1 80 | if self.idx == self.epoch_num: 81 | self.idx = 0 82 | if self.shuffle: 83 | np.random.seed(self.gpu_id) 84 | np.random.shuffle(self.indices) 85 | batch_img = np.asarray(batch_img) 86 | batch_label = np.asarray(batch_label) 87 | 88 | top[0].data[...] = batch_img 89 | top[1].data[...] = batch_label 90 | 91 | def backward(self, top, propagate_down, bottom): 92 | pass 93 | 94 | def augmentation(self, img, label): 95 | ori_h, ori_w = img.shape[:2] 96 | 97 | _color = 1.0 98 | _contrast = 1.0 99 | _brightness = 1.0 100 | 101 | if self.color_factor[0] != 0 and self.color_factor[1] != 0 and self.color_factor[0] < self.color_factor[1]: 102 | _color = np.random.randint(int(self.color_factor[0] * 100), 103 | int(self.color_factor[1] * 100)) / 100.0 104 | 105 | if self.contrast_factor[0] != 0 and self.contrast_factor[1] != 0 and self.contrast_factor[0] < \ 106 | self.contrast_factor[1]: 107 | _contrast = np.random.randint(int(self.contrast_factor[0] * 100), 108 | int(self.contrast_factor[1] * 100)) / 100.0 109 | 110 | if self.brightness_factor[0] != 0 and self.brightness_factor[1] != 0 and self.brightness_factor[0] < \ 111 | self.brightness_factor[1]: 112 | _brightness = np.random.randint(int(self.brightness_factor[0] * 100), 113 | int(self.brightness_factor[1] * 100)) / 100.0 114 | 115 | _HSV = np.dot(cv2.cvtColor(img, cv2.COLOR_BGR2HSV).reshape((-1, 3)), 116 | np.array([[_color, 0, 0], [0, _contrast, 0], [0, 0, _brightness]])) 117 | _HSV_H = np.where(_HSV < 255, _HSV, 255) 118 | img = cv2.cvtColor(np.uint8(_HSV_H.reshape((-1, img.shape[1], 3))), cv2.COLOR_HSV2BGR) 119 | 120 | if self.gaussian_blur: 121 | if not np.random.randint(0, 4): 122 | img = cv2.GaussianBlur(img, (3, 3), 0) 123 | 124 | img = np.asarray(img, dtype=np.float32) 125 | label = np.asarray(label, dtype=np.uint8) 126 | 127 | if self.mirror: 128 | if np.random.randint(0, 2): 129 | img = img[:, :: -1] 130 | label = label[:, :: -1] 131 | 132 | if self.scale_factor[0] != 0 and self.scale_factor[1] != 0 and self.scale_factor[0] < self.scale_factor[1]: 133 | _scale = np.random.randint(int(self.scale_factor[0] * 100), 134 | int(self.scale_factor[1] * 100)) / 100.0 135 | res_w = int(_scale * ori_w) 136 | res_h = int(_scale * ori_h) 137 | img = cv2.resize(img, (res_w, res_h)) 138 | label = cv2.resize(label, (res_w, res_h), interpolation=cv2.cv.CV_INTER_NN) 139 | 140 | if self.rotation_factor[0] != 0 and self.rotation_factor[1] != 0 and self.rotation_factor[0] < \ 141 | self.rotation_factor[1]: 142 | if np.random.randint(0, 2): 143 | _rotation = np.random.randint(int(self.rotation_factor[0] * 100), 144 | int(self.rotation_factor[1] * 100)) / 100.0 145 | tmp_h, tmp_w = img.shape[:2] 146 | rotate_mat = cv2.getRotationMatrix2D((tmp_w / 2, tmp_h / 2), _rotation, 147 | 1) 148 | img = cv2.warpAffine(img, rotate_mat, (tmp_w, tmp_h), 149 | borderValue=cv2.cv.Scalar(self.mean[0], self.mean[1], self.mean[2])) 150 | label = cv2.warpAffine(label, rotate_mat, (tmp_w, tmp_h), flags=cv2.cv.CV_INTER_NN, 151 | borderValue=cv2.cv.Scalar(self.ignore_label)) 152 | 153 | # perform random crop 154 | pad_h = max(self.crop_size - img.shape[0], 0) 155 | pad_w = max(self.crop_size - img.shape[1], 0) 156 | pad_img = cv2.copyMakeBorder(img, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, 157 | value=cv2.cv.Scalar(self.mean[0], self.mean[1], self.mean[2])) 158 | pad_label = cv2.copyMakeBorder(label, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, 159 | value=cv2.cv.Scalar(self.ignore_label)) 160 | off_h = np.random.randint(0, pad_img.shape[0] - self.crop_size + 1) 161 | off_w = np.random.randint(0, pad_img.shape[1] - self.crop_size + 1) 162 | aug_img = pad_img[off_h:off_h + self.crop_size, off_w:off_w + self.crop_size, :] 163 | aug_label = pad_label[off_h:off_h + self.crop_size, off_w:off_w + self.crop_size] 164 | 165 | # perform (x-mean)*scale 166 | aug_img -= self.mean 167 | aug_img *= self.scale 168 | 169 | return aug_img, aug_label 170 | -------------------------------------------------------------------------------- /det/faster_rcnn/models/pascal_voc/resnet38a/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet38a-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "res15_eletwise" 2 | input_shape { 3 | dim: 1 4 | dim: 1024 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rois" 10 | input_shape { 11 | dim: 300 12 | dim: 5 13 | } 14 | 15 | #============== RCNN =============== 16 | layer { 17 | name: "roi_pool" 18 | type: "ROIPooling" 19 | bottom: "res15_eletwise" 20 | bottom: "rois" 21 | top: "roi_pool" 22 | roi_pooling_param { 23 | pooled_w: 14 24 | pooled_h: 14 25 | spatial_scale: 0.062500 26 | } 27 | } 28 | layer { 29 | bottom: "roi_pool" 30 | top: "pool5" 31 | name: "pool5" 32 | type: "Pooling" 33 | pooling_param { 34 | kernel_size: 3 35 | pad: 0 36 | stride: 2 37 | pool: MAX 38 | } 39 | } 40 | layer { 41 | bottom: "pool5" 42 | top: "res16_scale" 43 | name: "res16_scale" 44 | type: "Scale" 45 | scale_param { 46 | bias_term: true 47 | } 48 | param { 49 | lr_mult: 0.0 50 | decay_mult: 0.0 51 | } 52 | param { 53 | lr_mult: 0.0 54 | decay_mult: 0.0 55 | } 56 | } 57 | layer { 58 | bottom: "res16_scale" 59 | top: "res16_scale" 60 | name: "res16_relu" 61 | type: "ReLU" 62 | } 63 | layer { 64 | bottom: "res16_scale" 65 | top: "res16_match_conv" 66 | name: "res16_match_conv" 67 | type: "Convolution" 68 | param { 69 | lr_mult: 1 70 | decay_mult: 1 71 | } 72 | convolution_param { 73 | num_output: 2048 74 | kernel_size: 1 75 | pad: 0 76 | stride: 1 77 | bias_term: false 78 | } 79 | } 80 | layer { 81 | bottom: "res16_scale" 82 | top: "res16_conv1" 83 | name: "res16_conv1" 84 | type: "Convolution" 85 | param { 86 | lr_mult: 1 87 | decay_mult: 1 88 | } 89 | convolution_param { 90 | num_output: 512 91 | kernel_size: 1 92 | pad: 0 93 | stride: 1 94 | bias_term: false 95 | } 96 | } 97 | layer { 98 | bottom: "res16_conv1" 99 | top: "res16_conv1" 100 | name: "res16_conv1_scale" 101 | type: "Scale" 102 | scale_param { 103 | bias_term: true 104 | } 105 | param { 106 | lr_mult: 0.0 107 | decay_mult: 0.0 108 | } 109 | param { 110 | lr_mult: 0.0 111 | decay_mult: 0.0 112 | } 113 | } 114 | layer { 115 | bottom: "res16_conv1" 116 | top: "res16_conv1" 117 | name: "res16_conv1_relu" 118 | type: "ReLU" 119 | } 120 | layer { 121 | bottom: "res16_conv1" 122 | top: "res16_conv2" 123 | name: "res16_conv2" 124 | type: "Convolution" 125 | param { 126 | lr_mult: 1 127 | decay_mult: 1 128 | } 129 | convolution_param { 130 | num_output: 1024 131 | kernel_size: 3 132 | pad: 1 133 | stride: 1 134 | bias_term: false 135 | } 136 | } 137 | layer { 138 | bottom: "res16_conv2" 139 | top: "res16_conv2" 140 | name: "res16_conv2_scale" 141 | type: "Scale" 142 | scale_param { 143 | bias_term: true 144 | } 145 | param { 146 | lr_mult: 0.0 147 | decay_mult: 0.0 148 | } 149 | param { 150 | lr_mult: 0.0 151 | decay_mult: 0.0 152 | } 153 | } 154 | layer { 155 | bottom: "res16_conv2" 156 | top: "res16_conv2" 157 | name: "res16_conv2_relu" 158 | type: "ReLU" 159 | } 160 | layer { 161 | bottom: "res16_conv2" 162 | top: "res16_conv3" 163 | name: "res16_conv3" 164 | type: "Convolution" 165 | param { 166 | lr_mult: 1 167 | decay_mult: 1 168 | } 169 | convolution_param { 170 | num_output: 2048 171 | kernel_size: 1 172 | pad: 0 173 | stride: 1 174 | bias_term: false 175 | } 176 | } 177 | layer { 178 | bottom: "res16_match_conv" 179 | bottom: "res16_conv3" 180 | top: "res16_eletwise" 181 | name: "res16_eletwise" 182 | type: "Eltwise" 183 | } 184 | layer { 185 | bottom: "res16_eletwise" 186 | top: "res17_scale" 187 | name: "res17_scale" 188 | type: "Scale" 189 | scale_param { 190 | bias_term: true 191 | } 192 | param { 193 | lr_mult: 0.0 194 | decay_mult: 0.0 195 | } 196 | param { 197 | lr_mult: 0.0 198 | decay_mult: 0.0 199 | } 200 | } 201 | layer { 202 | bottom: "res17_scale" 203 | top: "res17_scale" 204 | name: "res17_relu" 205 | type: "ReLU" 206 | } 207 | layer { 208 | bottom: "res17_scale" 209 | top: "res17_match_conv" 210 | name: "res17_match_conv" 211 | type: "Convolution" 212 | param { 213 | lr_mult: 1 214 | decay_mult: 1 215 | } 216 | convolution_param { 217 | num_output: 4096 218 | kernel_size: 1 219 | pad: 0 220 | stride: 1 221 | bias_term: false 222 | } 223 | } 224 | layer { 225 | bottom: "res17_scale" 226 | top: "res17_conv1" 227 | name: "res17_conv1" 228 | type: "Convolution" 229 | param { 230 | lr_mult: 1 231 | decay_mult: 1 232 | } 233 | convolution_param { 234 | num_output: 1024 235 | kernel_size: 1 236 | pad: 0 237 | stride: 1 238 | bias_term: false 239 | } 240 | } 241 | layer { 242 | bottom: "res17_conv1" 243 | top: "res17_conv1" 244 | name: "res17_conv1_scale" 245 | type: "Scale" 246 | scale_param { 247 | bias_term: true 248 | } 249 | param { 250 | lr_mult: 0.0 251 | decay_mult: 0.0 252 | } 253 | param { 254 | lr_mult: 0.0 255 | decay_mult: 0.0 256 | } 257 | } 258 | layer { 259 | bottom: "res17_conv1" 260 | top: "res17_conv1" 261 | name: "res17_conv1_relu" 262 | type: "ReLU" 263 | } 264 | layer { 265 | bottom: "res17_conv1" 266 | top: "res17_conv2" 267 | name: "res17_conv2" 268 | type: "Convolution" 269 | param { 270 | lr_mult: 1 271 | decay_mult: 1 272 | } 273 | convolution_param { 274 | num_output: 2048 275 | kernel_size: 3 276 | pad: 1 277 | stride: 1 278 | bias_term: false 279 | } 280 | } 281 | layer { 282 | bottom: "res17_conv2" 283 | top: "res17_conv2" 284 | name: "res17_conv2_scale" 285 | type: "Scale" 286 | scale_param { 287 | bias_term: true 288 | } 289 | param { 290 | lr_mult: 0.0 291 | decay_mult: 0.0 292 | } 293 | param { 294 | lr_mult: 0.0 295 | decay_mult: 0.0 296 | } 297 | } 298 | layer { 299 | bottom: "res17_conv2" 300 | top: "res17_conv2" 301 | name: "res17_conv2_relu" 302 | type: "ReLU" 303 | } 304 | layer { 305 | bottom: "res17_conv2" 306 | top: "res17_conv3" 307 | name: "res17_conv3" 308 | type: "Convolution" 309 | param { 310 | lr_mult: 1 311 | decay_mult: 1 312 | } 313 | convolution_param { 314 | num_output: 4096 315 | kernel_size: 1 316 | pad: 0 317 | stride: 1 318 | bias_term: false 319 | } 320 | } 321 | layer { 322 | bottom: "res17_match_conv" 323 | bottom: "res17_conv3" 324 | top: "res17_eletwise" 325 | name: "res17_eletwise" 326 | type: "Eltwise" 327 | } 328 | layer { 329 | bottom: "res17_eletwise" 330 | top: "res17_eletwise" 331 | name: "res17_eletwise_scale" 332 | type: "Scale" 333 | scale_param { 334 | bias_term: true 335 | } 336 | param { 337 | lr_mult: 0.0 338 | decay_mult: 0.0 339 | } 340 | param { 341 | lr_mult: 0.0 342 | decay_mult: 0.0 343 | } 344 | } 345 | layer { 346 | bottom: "res17_eletwise" 347 | top: "res17_eletwise" 348 | name: "res17_eletwise_relu" 349 | type: "ReLU" 350 | } 351 | layer { 352 | bottom: "res17_eletwise" 353 | top: "pool_ave" 354 | name: "pool_ave" 355 | type: "Pooling" 356 | pooling_param { 357 | global_pooling : true 358 | pool: AVE 359 | } 360 | } 361 | layer { 362 | name: "cls_score" 363 | type: "InnerProduct" 364 | bottom: "pool_ave" 365 | top: "cls_score" 366 | param { 367 | lr_mult: 1 368 | decay_mult: 1 369 | } 370 | param { 371 | lr_mult: 2 372 | decay_mult: 0 373 | } 374 | inner_product_param { 375 | num_output: 21 376 | weight_filler { 377 | type: "msra" 378 | std: 0.01 379 | } 380 | bias_filler { 381 | type: "constant" 382 | value: 0 383 | } 384 | } 385 | } 386 | layer { 387 | name: "bbox_pred" 388 | type: "InnerProduct" 389 | bottom: "pool_ave" 390 | top: "bbox_pred" 391 | param { 392 | lr_mult: 1 393 | decay_mult: 1 394 | } 395 | param { 396 | lr_mult: 2 397 | decay_mult: 0 398 | } 399 | inner_product_param { 400 | num_output: 84 401 | weight_filler { 402 | type: "msra" 403 | std: 0.01 404 | } 405 | bias_filler { 406 | type: "constant" 407 | value: 0 408 | } 409 | } 410 | } 411 | layer { 412 | name: "cls_prob" 413 | type: "Softmax" 414 | bottom: "cls_score" 415 | top: "cls_prob" 416 | } 417 | 418 | -------------------------------------------------------------------------------- /cls/vgg/deploy_vgg13bn-pytorch.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 224 6 | dim: 224 7 | } 8 | 9 | layer { 10 | name: "conv1_1" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv1_1" 14 | convolution_param { 15 | bias_term: true 16 | num_output: 64 17 | pad: 1 18 | kernel_size: 3 19 | stride: 1 20 | } 21 | } 22 | layer { 23 | bottom: "conv1_1" 24 | top: "conv1_1" 25 | name: "conv1_1_bn" 26 | type: "BatchNorm" 27 | batch_norm_param { 28 | use_global_stats: true 29 | } 30 | } 31 | layer { 32 | bottom: "conv1_1" 33 | top: "conv1_1" 34 | name: "conv1_1_scale" 35 | type: "Scale" 36 | scale_param { 37 | bias_term: true 38 | } 39 | } 40 | layer { 41 | name: "relu1_1" 42 | type: "ReLU" 43 | bottom: "conv1_1" 44 | top: "conv1_1" 45 | } 46 | 47 | layer { 48 | name: "conv1_2" 49 | type: "Convolution" 50 | bottom: "conv1_1" 51 | top: "conv1_2" 52 | convolution_param { 53 | bias_term: true 54 | num_output: 64 55 | pad: 1 56 | kernel_size: 3 57 | stride: 1 58 | } 59 | } 60 | layer { 61 | bottom: "conv1_2" 62 | top: "conv1_2" 63 | name: "conv1_2_bn" 64 | type: "BatchNorm" 65 | batch_norm_param { 66 | use_global_stats: true 67 | } 68 | } 69 | layer { 70 | bottom: "conv1_2" 71 | top: "conv1_2" 72 | name: "conv1_2_scale" 73 | type: "Scale" 74 | scale_param { 75 | bias_term: true 76 | } 77 | } 78 | layer { 79 | name: "relu1_2" 80 | type: "ReLU" 81 | bottom: "conv1_2" 82 | top: "conv1_2" 83 | } 84 | 85 | layer { 86 | name: "pool1" 87 | type: "Pooling" 88 | bottom: "conv1_2" 89 | top: "pool1" 90 | pooling_param { 91 | pool: MAX 92 | kernel_size: 2 93 | stride: 2 94 | } 95 | } 96 | 97 | layer { 98 | name: "conv2_1" 99 | type: "Convolution" 100 | bottom: "pool1" 101 | top: "conv2_1" 102 | convolution_param { 103 | bias_term: true 104 | num_output: 128 105 | pad: 1 106 | kernel_size: 3 107 | stride: 1 108 | } 109 | } 110 | layer { 111 | bottom: "conv2_1" 112 | top: "conv2_1" 113 | name: "conv2_1_bn" 114 | type: "BatchNorm" 115 | batch_norm_param { 116 | use_global_stats: true 117 | } 118 | } 119 | layer { 120 | bottom: "conv2_1" 121 | top: "conv2_1" 122 | name: "conv2_1_scale" 123 | type: "Scale" 124 | scale_param { 125 | bias_term: true 126 | } 127 | } 128 | layer { 129 | name: "relu2_1" 130 | type: "ReLU" 131 | bottom: "conv2_1" 132 | top: "conv2_1" 133 | } 134 | 135 | layer { 136 | name: "conv2_2" 137 | type: "Convolution" 138 | bottom: "conv2_1" 139 | top: "conv2_2" 140 | convolution_param { 141 | bias_term: true 142 | num_output: 128 143 | pad: 1 144 | kernel_size: 3 145 | stride: 1 146 | } 147 | } 148 | layer { 149 | bottom: "conv2_2" 150 | top: "conv2_2" 151 | name: "conv2_2_bn" 152 | type: "BatchNorm" 153 | batch_norm_param { 154 | use_global_stats: true 155 | } 156 | } 157 | layer { 158 | bottom: "conv2_2" 159 | top: "conv2_2" 160 | name: "conv2_2_scale" 161 | type: "Scale" 162 | scale_param { 163 | bias_term: true 164 | } 165 | } 166 | layer { 167 | name: "relu2_2" 168 | type: "ReLU" 169 | bottom: "conv2_2" 170 | top: "conv2_2" 171 | } 172 | 173 | layer { 174 | name: "pool2" 175 | type: "Pooling" 176 | bottom: "conv2_2" 177 | top: "pool2" 178 | pooling_param { 179 | pool: MAX 180 | kernel_size: 2 181 | stride: 2 182 | } 183 | } 184 | 185 | layer { 186 | name: "conv3_1" 187 | type: "Convolution" 188 | bottom: "pool2" 189 | top: "conv3_1" 190 | convolution_param { 191 | bias_term: true 192 | num_output: 256 193 | pad: 1 194 | kernel_size: 3 195 | stride: 1 196 | } 197 | } 198 | layer { 199 | bottom: "conv3_1" 200 | top: "conv3_1" 201 | name: "conv3_1_bn" 202 | type: "BatchNorm" 203 | batch_norm_param { 204 | use_global_stats: true 205 | } 206 | } 207 | layer { 208 | bottom: "conv3_1" 209 | top: "conv3_1" 210 | name: "conv3_1_scale" 211 | type: "Scale" 212 | scale_param { 213 | bias_term: true 214 | } 215 | } 216 | layer { 217 | name: "relu3_1" 218 | type: "ReLU" 219 | bottom: "conv3_1" 220 | top: "conv3_1" 221 | } 222 | 223 | layer { 224 | name: "conv3_2" 225 | type: "Convolution" 226 | bottom: "conv3_1" 227 | top: "conv3_2" 228 | convolution_param { 229 | bias_term: true 230 | num_output: 256 231 | pad: 1 232 | kernel_size: 3 233 | stride: 1 234 | } 235 | } 236 | layer { 237 | bottom: "conv3_2" 238 | top: "conv3_2" 239 | name: "conv3_2_bn" 240 | type: "BatchNorm" 241 | batch_norm_param { 242 | use_global_stats: true 243 | } 244 | } 245 | layer { 246 | bottom: "conv3_2" 247 | top: "conv3_2" 248 | name: "conv3_2_scale" 249 | type: "Scale" 250 | scale_param { 251 | bias_term: true 252 | } 253 | } 254 | layer { 255 | name: "relu3_2" 256 | type: "ReLU" 257 | bottom: "conv3_2" 258 | top: "conv3_2" 259 | } 260 | 261 | layer { 262 | name: "pool3" 263 | type: "Pooling" 264 | bottom: "conv3_2" 265 | top: "pool3" 266 | pooling_param { 267 | pool: MAX 268 | kernel_size: 2 269 | stride: 2 270 | } 271 | } 272 | 273 | layer { 274 | name: "conv4_1" 275 | type: "Convolution" 276 | bottom: "pool3" 277 | top: "conv4_1" 278 | convolution_param { 279 | bias_term: true 280 | num_output: 512 281 | pad: 1 282 | kernel_size: 3 283 | stride: 1 284 | } 285 | } 286 | layer { 287 | bottom: "conv4_1" 288 | top: "conv4_1" 289 | name: "conv4_1_bn" 290 | type: "BatchNorm" 291 | batch_norm_param { 292 | use_global_stats: true 293 | } 294 | } 295 | layer { 296 | bottom: "conv4_1" 297 | top: "conv4_1" 298 | name: "conv4_1_scale" 299 | type: "Scale" 300 | scale_param { 301 | bias_term: true 302 | } 303 | } 304 | layer { 305 | name: "relu4_1" 306 | type: "ReLU" 307 | bottom: "conv4_1" 308 | top: "conv4_1" 309 | } 310 | 311 | layer { 312 | name: "conv4_2" 313 | type: "Convolution" 314 | bottom: "conv4_1" 315 | top: "conv4_2" 316 | convolution_param { 317 | bias_term: true 318 | num_output: 512 319 | pad: 1 320 | kernel_size: 3 321 | stride: 1 322 | } 323 | } 324 | layer { 325 | bottom: "conv4_2" 326 | top: "conv4_2" 327 | name: "conv4_2_bn" 328 | type: "BatchNorm" 329 | batch_norm_param { 330 | use_global_stats: true 331 | } 332 | } 333 | layer { 334 | bottom: "conv4_2" 335 | top: "conv4_2" 336 | name: "conv4_2_scale" 337 | type: "Scale" 338 | scale_param { 339 | bias_term: true 340 | } 341 | } 342 | layer { 343 | name: "relu4_2" 344 | type: "ReLU" 345 | bottom: "conv4_2" 346 | top: "conv4_2" 347 | } 348 | 349 | layer { 350 | name: "pool4" 351 | type: "Pooling" 352 | bottom: "conv4_2" 353 | top: "pool4" 354 | pooling_param { 355 | pool: MAX 356 | kernel_size: 2 357 | stride: 2 358 | } 359 | } 360 | 361 | layer { 362 | name: "conv5_1" 363 | type: "Convolution" 364 | bottom: "pool4" 365 | top: "conv5_1" 366 | convolution_param { 367 | bias_term: true 368 | num_output: 512 369 | pad: 1 370 | kernel_size: 3 371 | stride: 1 372 | } 373 | } 374 | layer { 375 | bottom: "conv5_1" 376 | top: "conv5_1" 377 | name: "conv5_1_bn" 378 | type: "BatchNorm" 379 | batch_norm_param { 380 | use_global_stats: true 381 | } 382 | } 383 | layer { 384 | bottom: "conv5_1" 385 | top: "conv5_1" 386 | name: "conv5_1_scale" 387 | type: "Scale" 388 | scale_param { 389 | bias_term: true 390 | } 391 | } 392 | layer { 393 | name: "relu5_1" 394 | type: "ReLU" 395 | bottom: "conv5_1" 396 | top: "conv5_1" 397 | } 398 | 399 | layer { 400 | name: "conv5_2" 401 | type: "Convolution" 402 | bottom: "conv5_1" 403 | top: "conv5_2" 404 | convolution_param { 405 | bias_term: true 406 | num_output: 512 407 | pad: 1 408 | kernel_size: 3 409 | stride: 1 410 | } 411 | } 412 | layer { 413 | bottom: "conv5_2" 414 | top: "conv5_2" 415 | name: "conv5_2_bn" 416 | type: "BatchNorm" 417 | batch_norm_param { 418 | use_global_stats: true 419 | } 420 | } 421 | layer { 422 | bottom: "conv5_2" 423 | top: "conv5_2" 424 | name: "conv5_2_scale" 425 | type: "Scale" 426 | scale_param { 427 | bias_term: true 428 | } 429 | } 430 | layer { 431 | name: "relu5_2" 432 | type: "ReLU" 433 | bottom: "conv5_2" 434 | top: "conv5_2" 435 | } 436 | 437 | layer { 438 | name: "pool5" 439 | type: "Pooling" 440 | bottom: "conv5_2" 441 | top: "pool5" 442 | pooling_param { 443 | pool: MAX 444 | kernel_size: 2 445 | stride: 2 446 | } 447 | } 448 | 449 | layer { 450 | bottom: "pool5" 451 | top: "fc6" 452 | name: "fc6" 453 | type: "InnerProduct" 454 | inner_product_param { 455 | num_output: 4096 456 | } 457 | } 458 | 459 | layer { 460 | name: "relu6" 461 | type: "ReLU" 462 | bottom: "fc6" 463 | top: "fc6" 464 | } 465 | 466 | layer { 467 | name: "dropout6" 468 | type: "Dropout" 469 | bottom: "fc6" 470 | top: "fc6" 471 | dropout_param { 472 | dropout_ratio: 0.5 473 | } 474 | } 475 | 476 | layer { 477 | bottom: "fc6" 478 | top: "fc7" 479 | name: "fc7" 480 | type: "InnerProduct" 481 | inner_product_param { 482 | num_output: 4096 483 | } 484 | } 485 | 486 | layer { 487 | name: "relu7" 488 | type: "ReLU" 489 | bottom: "fc7" 490 | top: "fc7" 491 | } 492 | 493 | layer { 494 | name: "dropout7" 495 | type: "Dropout" 496 | bottom: "fc7" 497 | top: "fc7" 498 | dropout_param { 499 | dropout_ratio: 0.5 500 | } 501 | } 502 | 503 | layer { 504 | bottom: "fc7" 505 | top: "classifier" 506 | name: "classifier" 507 | type: "InnerProduct" 508 | inner_product_param { 509 | num_output: 1000 510 | } 511 | } 512 | 513 | layer { 514 | name: "prob" 515 | type: "Softmax" 516 | bottom: "classifier" 517 | top: "prob" 518 | } 519 | -------------------------------------------------------------------------------- /cls/README.md: -------------------------------------------------------------------------------- 1 | ## CLS (Classification) 2 | 3 | Please install [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) for evaluating and finetuning. 4 | 5 | ### Disclaimer 6 | 7 | Most of the models are converted from other projects, the main contribution belongs to the original authors. 8 | 9 | Project links: 10 | 11 | [mxnet-model-gallery](https://github.com/dmlc/mxnet-model-gallery)、 [tensorflow slim](https://github.com/tensorflow/models/tree/master/slim)、 [craftGBD](https://github.com/craftGBD/craftGBD)、 [ResNeXt](https://github.com/facebookresearch/ResNeXt)、 [DenseNet](https://github.com/liuzhuang13/DenseNet)、 [wide-residual-networks](https://github.com/szagoruyko/wide-residual-networks)、 [keras deep-learning-models](https://github.com/fchollet/deep-learning-models)、 [ademxapp](https://github.com/itijyou/ademxapp)、 [DPNs](https://github.com/cypw/DPNs)、[Senet](https://github.com/hujie-frank/SENet) 12 | 13 | 14 | ### Performance on imagenet validation. 15 | **1. Top-1/5 error of pre-train models in this repository.** 16 | 17 | Network|224/299
(single-crop)|224/299
(12-crop)|320/395
(single-crop)|320/395
(12-crop) 18 | :---:|:---:|:---:|:---:|:---: 19 | resnet18-priv| 29.62/10.38 | 26.69/8.64 | 27.54/8.98 | 26.23/8.21 20 | resnext26-32x4d-priv| 24.93/7.75 | 23.54/6.89 | 24.20/7.21 | 23.19/6.60 21 | resnet101-v2| 21.95/6.12 | 19.99/5.04 | 20.37/5.16 | 19.29/4.57 22 | resnet152-v2| 20.85/5.42 | 19.24/4.68 | 19.66/4.73 | 18.84/4.32 23 | resnet269-v2| 19.71/5.00 | 18.25/4.20 | 18.70/4.33 | 17.87/3.85 24 | resnet38a| 20.66/5.27 | ../.. | 19.25/4.66 | ../.. 25 | inception-v3| 21.67/5.75 | 19.60/4.73 | 20.10/4.82 | 19.25/4.24 26 | xception| 20.90/5.49 | 19.68/4.90 | 19.58/4.77 | 18.91/4.39 27 | inception-v4| 20.03/5.09 | 18.60/4.30 | 18.68/4.32 |18.12/3.92 28 | inception-resnet-v2| 19.86/4.83 | 18.46/4.08 | 18.75/4.02 | 18.15/3.71 29 | resnext50-32x4d| 22.37/6.31 | 20.53/5.35 | 21.10/5.53 | 20.37/5.03 30 | resnext101-32x4d| 21.30/5.79 | 19.47/4.89 | 19.91/4.97 | 19.19/4.59 31 | resnext101-64x4d| 20.60/5.41 | 18.88/4.59 | 19.26/4.63 | 18.48/4.31 32 | wrn50-2
(resnet50-1x128d)| 22.13/6.13 | 20.09/5.06 | 20.68/5.28 | 19.83/4.87 33 | airx50-24x4d| 22.39/6.23 | 20.36/5.19 | 20.88/5.33 | 19.97/4.92 34 | air101| 21.32/5.76 | 19.36/4.84 | 19.92/4.75 | 19.05/4.43 35 | air152| 20.38/5.11 | 18.46/4.26 | 19.08/4.40 | 18.53/4.00 36 | airx101-32x4d| 21.15/5.74 | 19.43/4.86 | 19.61/4.93 | 18.90/4.49 37 | dpn-68-extra| 22.56/6.24 | 20.48/4.99 | 20.99/5.25 | 20.09/4.73 38 | dpn-92| 20.81/5.47 | 18.99/4.59 | 19.23/4.64 | 18.68/4.24 39 | dpn-98| 20.27/5.28 | 18.57/4.42 | 18.87/4.43 | 18.21/4.11 40 | dpn-131| 20.00/5.24 | 18.52/4.28 | 18.63/4.31 | 17.99/3.92 41 | dpn-107-extra| 19.70/5.06 | ../.. | 18.41/4.25 | ../.. 42 | se-inception-v2
(se-inception-bn)| 23.64/7.04 | 21.57/5.86 | 21.61/5.87 | 20.85/5.38 43 | se-resnet50| 22.39/6.37 | 20.61/5.34 | 20.49/5.22 | 20.02/4.85 44 | se-resnet50-hik| 21.98/5.80 | 20.06/4.88 | 20.51/5.04 | 19.92/4.68 45 | se-resnet101| 21.76/5.72 | 19.96/4.79 | 19.97/4.78 | 19.34/4.41 46 | se-resnet152| 21.34/5.54 | 19.56/4.66 | 19.34/4.59 | 18.83/4.32 47 | se-resnext50-32x4d| 20.96/5.53 | 19.39/4.69 | 19.36/4.66 | 18.70/4.38 48 | se-resnext101-32x4d| 19.83/4.95 | 18.44/4.16 | 18.14/4.08 | 17.68/3.86 49 | senet
(se-resnext152-64x4d)| 18.67/4.47 | 17.40/3.69 | 17.28/3.78 | 16.80/3.47 50 | 51 | - The resnet18-priv, resnext26-32x4d-priv are trained under [pytorch](https://github.com/soeaver/pytorch-classification) by bupt-priv. 52 | - The pre-train models are tested on original [caffe](https://github.com/BVLC/caffe) by [evaluation_cls.py](https://github.com/soeaver/caffe-model/blob/master/cls/evaluation_cls.py), **but ceil_mode:false(pooling_layer) is used for the models converted from torch, the detail in https://github.com/BVLC/caffe/pull/3057/files**. If you remove ceil_mode:false, the performance will decline about 1% top1. 53 | - 224x224(base_size=256) and 320x320(base_size=320) crop size for resnet-v2/resnext/wrn, 299x299(base_size=320) and 395x395(base_size=395) crop size for inception. 54 | 55 | **2. Top-1/5 accuracy with different crop sizes.** 56 | ![teaser](https://github.com/soeaver/caffe-model/blob/master/cls/accuracy.png) 57 | - Figure: Accuracy curves of inception_v3(left) and resnet101_v2(right) with different crop sizes. 58 | 59 | **3. Download url and forward/backward time cost for each model.** 60 | 61 | Forward/Backward time cost is evaluated with one image/mini-batch using cuDNN 5.1 on a Pascal Titan X GPU. 62 | 63 | We use 64 | ``` 65 | ~/caffe/build/tools/caffe -model deploy.prototxt time -gpu -iterations 1000 66 | ``` 67 | to test the forward/backward time cost, the result is really different with time cost of [evaluation_cls.py](https://github.com/soeaver/caffe-model/blob/master/cls/evaluation_cls.py) 68 | 69 | Network|F/B(224/299)|F/B(320/395)|Download
(BaiduCloud)|Download
(GoogleDrive)|Source 70 | :---:|:---:|:---:|:---:|:---:|:---: 71 | resnet18-priv | 4.48/5.07ms | 4.99/7.01ms | [44.6MB](https://pan.baidu.com/s/1hrYc3La)|44.6MB|[pytorch-cls](https://github.com/soeaver/pytorch-classification) 72 | resnext26-32x4d-priv | 8.53/10.12ms | 10.55/13.46ms | [58.9MB](https://pan.baidu.com/s/1dFzmUOh)|[58.9MB](https://drive.google.com/open?id=0B9mkjlmP0d7zZEh4dzZ3TVZUb2M)|[pytorch-cls](https://github.com/soeaver/pytorch-classification) 73 | resnet101-v2| 22.31/22.75ms | 26.02/29.50ms | [170.3MB](https://pan.baidu.com/s/1kVQDHFx)|[170.3MB](https://drive.google.com/open?id=0B9mkjlmP0d7zRlhISks0VktGOGs)|[craftGBD](https://github.com/craftGBD/craftGBD) 74 | resnet152-v2| 32.11/32.54ms | 37.46/41.84ms | [230.2MB](https://pan.baidu.com/s/1dFIc4vB)|[230.2MB](https://drive.google.com/open?id=0B9mkjlmP0d7zOXhrb1EyYVRHOEk)|[craftGBD](https://github.com/craftGBD/craftGBD) 75 | resnet269-v2| 58.20/59.15ms | 69.43/77.26ms | [390.4MB](https://pan.baidu.com/s/1qYbICs0)|[390.4MB](https://drive.google.com/open?id=0B9mkjlmP0d7zOGFxcTMySHN6bUE)|[craftGBD](https://github.com/craftGBD/craftGBD) 76 | inception-v3| 21.79/19.82ms | 22.14/24.88ms | [91.1MB](https://pan.baidu.com/s/1boC0HEf)|[91.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zTEJmNEh6c0RfYzg)|[mxnet](https://github.com/dmlc/mxnet-model-gallery/blob/master/imagenet-1k-inception-v3.md) 77 | xception | 14.03/30.39ms | 19.46/48.64ms | [87.4MB](https://pan.baidu.com/s/1gfiTShd)|87.4MB|[keras-models](https://github.com/fchollet/deep-learning-models) 78 | inception-v4| 32.96/32.19ms | 36.04/41.91ms | [163.1MB](https://pan.baidu.com/s/1c6D150)|[163.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zUEJ3aEJ2b3J0RFU)|[tf-slim](https://github.com/tensorflow/models/tree/master/slim) 79 | inception-resnet-v2| 49.06/54.83ms | 54.06/66.38ms | [213.4MB](https://pan.baidu.com/s/1jHPJCX4)|[213.4MB](https://drive.google.com/open?id=0B9mkjlmP0d7zc3A4NWlQQzdoM28)|[tf-slim](https://github.com/tensorflow/models/tree/master/slim) 80 | resnext50-32x4d| 17.29/20.08ms | 19.02/23.81ms | [95.8MB](https://pan.baidu.com/s/1kVqgfJL)|[95.8MB](https://drive.google.com/open?id=0B9mkjlmP0d7zYVgwanhVWnhrYlE)|[facebookresearch](https://github.com/facebookresearch/ResNeXt) 81 | resnext101-32x4d| 30.73/35.75ms | 34.33/41.02ms | [169.1MB](https://pan.baidu.com/s/1hswrNUG)|[169.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zTzYyelgyYlpOU3c)|[facebookresearch](https://github.com/facebookresearch/ResNeXt) 82 | resnext101-64x4d| 42.07/64.58ms | 51.99/77.71ms | [319.2MB](https://pan.baidu.com/s/1pLhk0Zp)|[319.2MB](https://drive.google.com/open?id=0B9mkjlmP0d7zQ0ZZOENnSFdQWnc)|[facebookresearch](https://github.com/facebookresearch/ResNeXt) 83 | wrn50-2
(resnet50_1x128d)| 16.48/25.28ms | 20.99/35.04ms | [263.1MB](https://pan.baidu.com/s/1nvhoCsh)|[263.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zYW40dUMxS3VPclU)|[szagoruyko](https://github.com/szagoruyko/wide-residual-networks) 84 | airx50-24x4d| 23.59/24.80ms | 26.64/30.92ms | .. | .. |[pytorch-cls](https://github.com/soeaver/pytorch-classification) 85 | air101| 35.78/35.94ms | 39.69/45.52ms | .. | .. |[pytorch-cls](https://github.com/soeaver/pytorch-classification) 86 | airx101-32x4d| 49.43/55.52ms | 54.64/66.31ms | .. | .. |[pytorch-cls](https://github.com/soeaver/pytorch-classification) 87 | dpn-68| ../..ms | ../..ms | [48.4MB](https://pan.baidu.com/s/1bphINV5) | .. |[DPNs](https://github.com/cypw/DPNs) 88 | dpn-92| 29.71/30.68ms | 35.19/37.13ms | [144.2MB](https://pan.baidu.com/s/1pL0VuWV)|[144.2MB](https://drive.google.com/open?id=0B9mkjlmP0d7zaWVKWFd2OXpRTVU)|[DPNs](https://github.com/cypw/DPNs) 89 | dpn-98| 36.24/44.06ms | 42.84/53.50ms | [235.6MB](https://pan.baidu.com/s/1pKHBRlD) | .. |[DPNs](https://github.com/cypw/DPNs) 90 | dpn-107| 45.21/59.77ms | 56.12/77.78ms | [332.4MB](https://pan.baidu.com/s/1i5b0Uih) | .. |[DPNs](https://github.com/cypw/DPNs) 91 | dpn-131| 48.20/59.43ms | 57.66/72.43ms | [303.3MB](https://pan.baidu.com/s/1miOdMHi) | .. |[DPNs](https://github.com/cypw/DPNs) 92 | se-inception-v2| 14.66/10.63ms | 15.71/13.52ms | .. | .. |[senet](https://github.com/hujie-frank/SENet) 93 | se-resnet50| 15.29/14.20ms | 17.96/19.69ms | .. | .. |[senet](https://github.com/hujie-frank/SENet) 94 | 95 | - For speeding up xception, we adopt [convolution depthwise layer](https://github.com/BVLC/caffe/pull/5665/files). 96 | 97 | ### Check the performance 98 | **1. Download the ILSVRC 2012 classification val set [6.3GB](http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar), and put the extracted images into the directory:** 99 | 100 | ~/Database/ILSVRC2012 101 | 102 | **2. Modify the parameter settings** 103 | 104 | Network|val_file|mean_value|std 105 | :---:|:---:|:---:|:---: 106 | resnet-v2(101/152/269)| ILSVRC2012_val | [102.98, 115.947, 122.772] | [1.0, 1.0, 1.0] 107 | resnet10/18/, resnext, air(x) | ILSVRC2012_val | [103.52, 116.28, 123.675] | [57.375, 57.12, 58.395] 108 | inception-v3| **ILSVRC2015_val** | [128.0, 128.0, 128.0] | [128.0, 128.0, 128.0] 109 | inception-v2, xception
inception-v4, inception-resnet-v2 | ILSVRC2012_val | [128.0, 128.0, 128.0] | [128.0, 128.0, 128.0] 110 | dpn(68/92/98/131/107)| ILSVRC2012_val | [104.0, 117.0, 124.0] | [59.88, 59.88, 59.88] 111 | official senet| **ILSVRC2015_val** | [104.0, 117.0, 123.0] | [1.0, 1.0, 1.0] 112 | 113 | 114 | **3. then run evaluation_cls.py** 115 | 116 | python evaluation_cls.py 117 | -------------------------------------------------------------------------------- /det/faster_rcnn/models/pascal_voc/resnet101-v2/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet101-v2-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "res31_scale" 2 | input_shape { 3 | dim: 1 4 | dim: 1024 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rois" 10 | input_shape { 11 | dim: 300 12 | dim: 5 13 | } 14 | 15 | 16 | #============== RCNN =============== 17 | layer { 18 | name: "roi_pool" 19 | type: "ROIPooling" 20 | bottom: "res31_scale" 21 | bottom: "rois" 22 | top: "roi_pool" 23 | roi_pooling_param { 24 | pooled_w: 14 25 | pooled_h: 14 26 | spatial_scale: 0.062500 27 | } 28 | } 29 | 30 | layer { 31 | name: "res31_conv1" 32 | type: "Convolution" 33 | bottom: "roi_pool" 34 | top: "res31_conv1" 35 | param { 36 | lr_mult: 1 37 | decay_mult: 1 38 | } 39 | convolution_param { 40 | bias_term: false 41 | num_output: 512 42 | pad: 0 43 | kernel_size: 1 44 | stride: 1 45 | } 46 | } 47 | 48 | layer { 49 | name: "res31_conv1_scale" 50 | type: "Scale" 51 | bottom: "res31_conv1" 52 | top: "res31_conv1" 53 | scale_param { 54 | bias_term: true 55 | } 56 | param { 57 | lr_mult: 0.0 58 | decay_mult: 0.0 59 | } 60 | param { 61 | lr_mult: 0.0 62 | decay_mult: 0.0 63 | } 64 | } 65 | layer { 66 | name: "res31_conv1_relu" 67 | type: "ReLU" 68 | bottom: "res31_conv1" 69 | top: "res31_conv1" 70 | } 71 | layer { 72 | name: "res31_conv2" 73 | type: "Convolution" 74 | bottom: "res31_conv1" 75 | top: "res31_conv2" 76 | param { 77 | lr_mult: 1 78 | decay_mult: 1 79 | } 80 | convolution_param { 81 | bias_term: false 82 | num_output: 512 83 | pad: 1 84 | kernel_size: 3 85 | stride: 2 86 | } 87 | } 88 | 89 | layer { 90 | name: "res31_conv2_scale" 91 | type: "Scale" 92 | bottom: "res31_conv2" 93 | top: "res31_conv2" 94 | scale_param { 95 | bias_term: true 96 | } 97 | param { 98 | lr_mult: 0.0 99 | decay_mult: 0.0 100 | } 101 | param { 102 | lr_mult: 0.0 103 | decay_mult: 0.0 104 | } 105 | } 106 | layer { 107 | name: "res31_conv2_relu" 108 | type: "ReLU" 109 | bottom: "res31_conv2" 110 | top: "res31_conv2" 111 | } 112 | layer { 113 | name: "res31_conv3" 114 | type: "Convolution" 115 | bottom: "res31_conv2" 116 | top: "res31_conv3" 117 | param { 118 | lr_mult: 1 119 | decay_mult: 1 120 | } 121 | convolution_param { 122 | bias_term: false 123 | num_output: 2048 124 | pad: 0 125 | kernel_size: 1 126 | stride: 1 127 | } 128 | } 129 | layer { 130 | name: "res31_match_conv" 131 | type: "Convolution" 132 | bottom: "roi_pool" 133 | top: "res31_match_conv" 134 | param { 135 | lr_mult: 1 136 | decay_mult: 1 137 | } 138 | convolution_param { 139 | bias_term: false 140 | num_output: 2048 141 | pad: 0 142 | kernel_size: 1 143 | stride: 2 144 | } 145 | } 146 | layer { 147 | name: "res31_eletwise" 148 | type: "Eltwise" 149 | bottom: "res31_match_conv" 150 | bottom: "res31_conv3" 151 | top: "res31_eletwise" 152 | eltwise_param { 153 | operation: SUM 154 | } 155 | } 156 | 157 | layer { 158 | name: "res32_scale" 159 | type: "Scale" 160 | bottom: "res31_eletwise" 161 | top: "res32_scale" 162 | scale_param { 163 | bias_term: true 164 | } 165 | param { 166 | lr_mult: 0.0 167 | decay_mult: 0.0 168 | } 169 | param { 170 | lr_mult: 0.0 171 | decay_mult: 0.0 172 | } 173 | } 174 | layer { 175 | name: "res32_relu" 176 | type: "ReLU" 177 | bottom: "res32_scale" 178 | top: "res32_scale" 179 | } 180 | layer { 181 | name: "res32_conv1" 182 | type: "Convolution" 183 | bottom: "res32_scale" 184 | top: "res32_conv1" 185 | param { 186 | lr_mult: 1 187 | decay_mult: 1 188 | } 189 | convolution_param { 190 | bias_term: false 191 | num_output: 512 192 | pad: 0 193 | kernel_size: 1 194 | stride: 1 195 | } 196 | } 197 | 198 | layer { 199 | name: "res32_conv1_scale" 200 | type: "Scale" 201 | bottom: "res32_conv1" 202 | top: "res32_conv1" 203 | scale_param { 204 | bias_term: true 205 | } 206 | param { 207 | lr_mult: 0.0 208 | decay_mult: 0.0 209 | } 210 | param { 211 | lr_mult: 0.0 212 | decay_mult: 0.0 213 | } 214 | } 215 | layer { 216 | name: "res32_conv1_relu" 217 | type: "ReLU" 218 | bottom: "res32_conv1" 219 | top: "res32_conv1" 220 | } 221 | layer { 222 | name: "res32_conv2" 223 | type: "Convolution" 224 | bottom: "res32_conv1" 225 | top: "res32_conv2" 226 | param { 227 | lr_mult: 1 228 | decay_mult: 1 229 | } 230 | convolution_param { 231 | bias_term: false 232 | num_output: 512 233 | pad: 1 234 | kernel_size: 3 235 | stride: 1 236 | } 237 | } 238 | 239 | layer { 240 | name: "res32_conv2_scale" 241 | type: "Scale" 242 | bottom: "res32_conv2" 243 | top: "res32_conv2" 244 | scale_param { 245 | bias_term: true 246 | } 247 | param { 248 | lr_mult: 0.0 249 | decay_mult: 0.0 250 | } 251 | param { 252 | lr_mult: 0.0 253 | decay_mult: 0.0 254 | } 255 | } 256 | layer { 257 | name: "res32_conv2_relu" 258 | type: "ReLU" 259 | bottom: "res32_conv2" 260 | top: "res32_conv2" 261 | } 262 | layer { 263 | name: "res32_conv3" 264 | type: "Convolution" 265 | bottom: "res32_conv2" 266 | top: "res32_conv3" 267 | param { 268 | lr_mult: 1 269 | decay_mult: 1 270 | } 271 | convolution_param { 272 | bias_term: false 273 | num_output: 2048 274 | pad: 0 275 | kernel_size: 1 276 | stride: 1 277 | } 278 | } 279 | layer { 280 | name: "res32_eletwise" 281 | type: "Eltwise" 282 | bottom: "res31_eletwise" 283 | bottom: "res32_conv3" 284 | top: "res32_eletwise" 285 | eltwise_param { 286 | operation: SUM 287 | } 288 | } 289 | 290 | layer { 291 | name: "res33_scale" 292 | type: "Scale" 293 | bottom: "res32_eletwise" 294 | top: "res33_scale" 295 | scale_param { 296 | bias_term: true 297 | } 298 | param { 299 | lr_mult: 0.0 300 | decay_mult: 0.0 301 | } 302 | param { 303 | lr_mult: 0.0 304 | decay_mult: 0.0 305 | } 306 | } 307 | layer { 308 | name: "res33_relu" 309 | type: "ReLU" 310 | bottom: "res33_scale" 311 | top: "res33_scale" 312 | } 313 | layer { 314 | name: "res33_conv1" 315 | type: "Convolution" 316 | bottom: "res33_scale" 317 | top: "res33_conv1" 318 | param { 319 | lr_mult: 1 320 | decay_mult: 1 321 | } 322 | convolution_param { 323 | bias_term: false 324 | num_output: 512 325 | pad: 0 326 | kernel_size: 1 327 | stride: 1 328 | } 329 | } 330 | 331 | layer { 332 | name: "res33_conv1_scale" 333 | type: "Scale" 334 | bottom: "res33_conv1" 335 | top: "res33_conv1" 336 | scale_param { 337 | bias_term: true 338 | } 339 | param { 340 | lr_mult: 0.0 341 | decay_mult: 0.0 342 | } 343 | param { 344 | lr_mult: 0.0 345 | decay_mult: 0.0 346 | } 347 | } 348 | layer { 349 | name: "res33_conv1_relu" 350 | type: "ReLU" 351 | bottom: "res33_conv1" 352 | top: "res33_conv1" 353 | } 354 | layer { 355 | name: "res33_conv2" 356 | type: "Convolution" 357 | bottom: "res33_conv1" 358 | top: "res33_conv2" 359 | param { 360 | lr_mult: 1 361 | decay_mult: 1 362 | } 363 | convolution_param { 364 | bias_term: false 365 | num_output: 512 366 | pad: 1 367 | kernel_size: 3 368 | stride: 1 369 | } 370 | } 371 | 372 | layer { 373 | name: "res33_conv2_scale" 374 | type: "Scale" 375 | bottom: "res33_conv2" 376 | top: "res33_conv2" 377 | scale_param { 378 | bias_term: true 379 | } 380 | param { 381 | lr_mult: 0.0 382 | decay_mult: 0.0 383 | } 384 | param { 385 | lr_mult: 0.0 386 | decay_mult: 0.0 387 | } 388 | } 389 | layer { 390 | name: "res33_conv2_relu" 391 | type: "ReLU" 392 | bottom: "res33_conv2" 393 | top: "res33_conv2" 394 | } 395 | layer { 396 | name: "res33_conv3" 397 | type: "Convolution" 398 | bottom: "res33_conv2" 399 | top: "res33_conv3" 400 | param { 401 | lr_mult: 1 402 | decay_mult: 1 403 | } 404 | convolution_param { 405 | bias_term: false 406 | num_output: 2048 407 | pad: 0 408 | kernel_size: 1 409 | stride: 1 410 | } 411 | } 412 | layer { 413 | name: "res33_eletwise" 414 | type: "Eltwise" 415 | bottom: "res32_eletwise" 416 | bottom: "res33_conv3" 417 | top: "res33_eletwise" 418 | eltwise_param { 419 | operation: SUM 420 | } 421 | } 422 | 423 | layer { 424 | name: "res33_eletwise_scale" 425 | type: "Scale" 426 | bottom: "res33_eletwise" 427 | top: "res33_eletwise_scale" 428 | scale_param { 429 | bias_term: true 430 | } 431 | param { 432 | lr_mult: 0.0 433 | decay_mult: 0.0 434 | } 435 | param { 436 | lr_mult: 0.0 437 | decay_mult: 0.0 438 | } 439 | } 440 | layer { 441 | name: "res33_eletwise_relu" 442 | type: "ReLU" 443 | bottom: "res33_eletwise_scale" 444 | top: "res33_eletwise_scale" 445 | } 446 | 447 | layer { 448 | bottom: "res33_eletwise_scale" 449 | top: "pool5" 450 | name: "pool5" 451 | type: "Pooling" 452 | pooling_param { 453 | pool: AVE 454 | global_pooling: true 455 | } 456 | } 457 | layer { 458 | name: "cls_score" 459 | type: "InnerProduct" 460 | bottom: "pool5" 461 | top: "cls_score" 462 | param { 463 | lr_mult: 1 464 | decay_mult: 1 465 | } 466 | param { 467 | lr_mult: 2 468 | decay_mult: 0 469 | } 470 | inner_product_param { 471 | num_output: 21 472 | weight_filler { 473 | type: "msra" 474 | std: 0.01 475 | } 476 | bias_filler { 477 | type: "constant" 478 | value: 0 479 | } 480 | } 481 | } 482 | layer { 483 | name: "bbox_pred" 484 | type: "InnerProduct" 485 | bottom: "pool5" 486 | top: "bbox_pred" 487 | param { 488 | lr_mult: 1 489 | decay_mult: 1 490 | } 491 | param { 492 | lr_mult: 2 493 | decay_mult: 0 494 | } 495 | inner_product_param { 496 | num_output: 84 497 | weight_filler { 498 | type: "msra" 499 | std: 0.01 500 | } 501 | bias_filler { 502 | type: "constant" 503 | value: 0 504 | } 505 | } 506 | } 507 | layer { 508 | name: "cls_prob" 509 | type: "Softmax" 510 | bottom: "cls_score" 511 | top: "cls_prob" 512 | } 513 | -------------------------------------------------------------------------------- /det/faster_rcnn/models/pascal_voc/resnet152-v2/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet152-v2-merge.prototxt: -------------------------------------------------------------------------------- 1 | input: "res48_scale" 2 | input_shape { 3 | dim: 1 4 | dim: 1024 5 | dim: 40 6 | dim: 40 7 | } 8 | 9 | input: "rois" 10 | input_shape { 11 | dim: 300 12 | dim: 5 13 | } 14 | 15 | #============== RCNN =============== 16 | layer { 17 | name: "roi_pool" 18 | type: "ROIPooling" 19 | bottom: "res48_scale" 20 | bottom: "rois" 21 | top: "roi_pool" 22 | roi_pooling_param { 23 | pooled_w: 14 24 | pooled_h: 14 25 | spatial_scale: 0.062500 26 | } 27 | } 28 | layer { 29 | name: "res48_conv1" 30 | type: "Convolution" 31 | bottom: "roi_pool" 32 | top: "res48_conv1" 33 | param { 34 | lr_mult: 1 35 | decay_mult: 1 36 | } 37 | convolution_param { 38 | bias_term: false 39 | num_output: 512 40 | pad: 0 41 | kernel_size: 1 42 | stride: 1 43 | } 44 | } 45 | 46 | layer { 47 | name: "res48_conv1_scale" 48 | type: "Scale" 49 | bottom: "res48_conv1" 50 | top: "res48_conv1" 51 | scale_param { 52 | bias_term: true 53 | } 54 | param { 55 | lr_mult: 0.0 56 | decay_mult: 0.0 57 | } 58 | param { 59 | lr_mult: 0.0 60 | decay_mult: 0.0 61 | } 62 | } 63 | layer { 64 | name: "res48_conv1_relu" 65 | type: "ReLU" 66 | bottom: "res48_conv1" 67 | top: "res48_conv1" 68 | } 69 | layer { 70 | name: "res48_conv2" 71 | type: "Convolution" 72 | bottom: "res48_conv1" 73 | top: "res48_conv2" 74 | param { 75 | lr_mult: 1 76 | decay_mult: 1 77 | } 78 | convolution_param { 79 | bias_term: false 80 | num_output: 512 81 | pad: 1 82 | kernel_size: 3 83 | stride: 2 84 | } 85 | } 86 | 87 | layer { 88 | name: "res48_conv2_scale" 89 | type: "Scale" 90 | bottom: "res48_conv2" 91 | top: "res48_conv2" 92 | scale_param { 93 | bias_term: true 94 | } 95 | param { 96 | lr_mult: 0.0 97 | decay_mult: 0.0 98 | } 99 | param { 100 | lr_mult: 0.0 101 | decay_mult: 0.0 102 | } 103 | } 104 | layer { 105 | name: "res48_conv2_relu" 106 | type: "ReLU" 107 | bottom: "res48_conv2" 108 | top: "res48_conv2" 109 | } 110 | layer { 111 | name: "res48_conv3" 112 | type: "Convolution" 113 | bottom: "res48_conv2" 114 | top: "res48_conv3" 115 | param { 116 | lr_mult: 1 117 | decay_mult: 1 118 | } 119 | convolution_param { 120 | bias_term: false 121 | num_output: 2048 122 | pad: 0 123 | kernel_size: 1 124 | stride: 1 125 | } 126 | } 127 | layer { 128 | name: "res48_match_conv" 129 | type: "Convolution" 130 | bottom: "roi_pool" 131 | top: "res48_match_conv" 132 | param { 133 | lr_mult: 1 134 | decay_mult: 1 135 | } 136 | convolution_param { 137 | bias_term: false 138 | num_output: 2048 139 | pad: 0 140 | kernel_size: 1 141 | stride: 2 142 | bias_filler { 143 | type: "constant" 144 | value: 0.2 145 | } 146 | } 147 | } 148 | layer { 149 | name: "res48_eletwise" 150 | type: "Eltwise" 151 | bottom: "res48_match_conv" 152 | bottom: "res48_conv3" 153 | top: "res48_eletwise" 154 | eltwise_param { 155 | operation: SUM 156 | } 157 | } 158 | 159 | layer { 160 | name: "res49_scale" 161 | type: "Scale" 162 | bottom: "res48_eletwise" 163 | top: "res49_scale" 164 | scale_param { 165 | bias_term: true 166 | } 167 | param { 168 | lr_mult: 0.0 169 | decay_mult: 0.0 170 | } 171 | param { 172 | lr_mult: 0.0 173 | decay_mult: 0.0 174 | } 175 | } 176 | layer { 177 | name: "res49_relu" 178 | type: "ReLU" 179 | bottom: "res49_scale" 180 | top: "res49_scale" 181 | } 182 | layer { 183 | name: "res49_conv1" 184 | type: "Convolution" 185 | bottom: "res49_scale" 186 | top: "res49_conv1" 187 | param { 188 | lr_mult: 1 189 | decay_mult: 1 190 | } 191 | convolution_param { 192 | bias_term: false 193 | num_output: 512 194 | pad: 0 195 | kernel_size: 1 196 | stride: 1 197 | } 198 | } 199 | 200 | layer { 201 | name: "res49_conv1_scale" 202 | type: "Scale" 203 | bottom: "res49_conv1" 204 | top: "res49_conv1" 205 | scale_param { 206 | bias_term: true 207 | } 208 | param { 209 | lr_mult: 0.0 210 | decay_mult: 0.0 211 | } 212 | param { 213 | lr_mult: 0.0 214 | decay_mult: 0.0 215 | } 216 | } 217 | layer { 218 | name: "res49_conv1_relu" 219 | type: "ReLU" 220 | bottom: "res49_conv1" 221 | top: "res49_conv1" 222 | } 223 | layer { 224 | name: "res49_conv2" 225 | type: "Convolution" 226 | bottom: "res49_conv1" 227 | top: "res49_conv2" 228 | param { 229 | lr_mult: 1 230 | decay_mult: 1 231 | } 232 | convolution_param { 233 | bias_term: false 234 | num_output: 512 235 | pad: 1 236 | kernel_size: 3 237 | stride: 1 238 | } 239 | } 240 | layer { 241 | name: "res49_conv2_scale" 242 | type: "Scale" 243 | bottom: "res49_conv2" 244 | top: "res49_conv2" 245 | scale_param { 246 | bias_term: true 247 | } 248 | param { 249 | lr_mult: 0.0 250 | decay_mult: 0.0 251 | } 252 | param { 253 | lr_mult: 0.0 254 | decay_mult: 0.0 255 | } 256 | } 257 | layer { 258 | name: "res49_conv2_relu" 259 | type: "ReLU" 260 | bottom: "res49_conv2" 261 | top: "res49_conv2" 262 | } 263 | layer { 264 | name: "res49_conv3" 265 | type: "Convolution" 266 | bottom: "res49_conv2" 267 | top: "res49_conv3" 268 | param { 269 | lr_mult: 1 270 | decay_mult: 1 271 | } 272 | convolution_param { 273 | bias_term: false 274 | num_output: 2048 275 | pad: 0 276 | kernel_size: 1 277 | stride: 1 278 | } 279 | } 280 | layer { 281 | name: "res49_eletwise" 282 | type: "Eltwise" 283 | bottom: "res48_eletwise" 284 | bottom: "res49_conv3" 285 | top: "res49_eletwise" 286 | eltwise_param { 287 | operation: SUM 288 | } 289 | } 290 | 291 | layer { 292 | name: "res50_scale" 293 | type: "Scale" 294 | bottom: "res49_eletwise" 295 | top: "res50_scale" 296 | scale_param { 297 | bias_term: true 298 | } 299 | param { 300 | lr_mult: 0.0 301 | decay_mult: 0.0 302 | } 303 | param { 304 | lr_mult: 0.0 305 | decay_mult: 0.0 306 | } 307 | } 308 | layer { 309 | name: "res50_relu" 310 | type: "ReLU" 311 | bottom: "res50_scale" 312 | top: "res50_scale" 313 | } 314 | layer { 315 | name: "res50_conv1" 316 | type: "Convolution" 317 | bottom: "res50_scale" 318 | top: "res50_conv1" 319 | param { 320 | lr_mult: 1 321 | decay_mult: 1 322 | } 323 | convolution_param { 324 | bias_term: false 325 | num_output: 512 326 | pad: 0 327 | kernel_size: 1 328 | stride: 1 329 | } 330 | } 331 | 332 | layer { 333 | name: "res50_conv1_scale" 334 | type: "Scale" 335 | bottom: "res50_conv1" 336 | top: "res50_conv1" 337 | scale_param { 338 | bias_term: true 339 | } 340 | param { 341 | lr_mult: 0.0 342 | decay_mult: 0.0 343 | } 344 | param { 345 | lr_mult: 0.0 346 | decay_mult: 0.0 347 | } 348 | } 349 | layer { 350 | name: "res50_conv1_relu" 351 | type: "ReLU" 352 | bottom: "res50_conv1" 353 | top: "res50_conv1" 354 | } 355 | layer { 356 | name: "res50_conv2" 357 | type: "Convolution" 358 | bottom: "res50_conv1" 359 | top: "res50_conv2" 360 | param { 361 | lr_mult: 1 362 | decay_mult: 1 363 | } 364 | convolution_param { 365 | bias_term: false 366 | num_output: 512 367 | pad: 1 368 | kernel_size: 3 369 | stride: 1 370 | } 371 | } 372 | 373 | layer { 374 | name: "res50_conv2_scale" 375 | type: "Scale" 376 | bottom: "res50_conv2" 377 | top: "res50_conv2" 378 | scale_param { 379 | bias_term: true 380 | } 381 | param { 382 | lr_mult: 0.0 383 | decay_mult: 0.0 384 | } 385 | param { 386 | lr_mult: 0.0 387 | decay_mult: 0.0 388 | } 389 | } 390 | layer { 391 | name: "res50_conv2_relu" 392 | type: "ReLU" 393 | bottom: "res50_conv2" 394 | top: "res50_conv2" 395 | } 396 | layer { 397 | name: "res50_conv3" 398 | type: "Convolution" 399 | bottom: "res50_conv2" 400 | top: "res50_conv3" 401 | param { 402 | lr_mult: 1 403 | decay_mult: 1 404 | } 405 | convolution_param { 406 | bias_term: false 407 | num_output: 2048 408 | pad: 0 409 | kernel_size: 1 410 | stride: 1 411 | } 412 | } 413 | layer { 414 | name: "res50_eletwise" 415 | type: "Eltwise" 416 | bottom: "res49_eletwise" 417 | bottom: "res50_conv3" 418 | top: "res50_eletwise" 419 | eltwise_param { 420 | operation: SUM 421 | } 422 | } 423 | 424 | layer { 425 | name: "res50_eletwise_scale" 426 | type: "Scale" 427 | bottom: "res50_eletwise" 428 | top: "res50_eletwise_scale" 429 | scale_param { 430 | bias_term: true 431 | } 432 | param { 433 | lr_mult: 0.0 434 | decay_mult: 0.0 435 | } 436 | param { 437 | lr_mult: 0.0 438 | decay_mult: 0.0 439 | } 440 | } 441 | layer { 442 | name: "res50_eletwise_relu" 443 | type: "ReLU" 444 | bottom: "res50_eletwise_scale" 445 | top: "res50_eletwise_scale" 446 | } 447 | layer { 448 | name: "pool5" 449 | type: "Pooling" 450 | bottom: "res50_eletwise_scale" 451 | top: "pool5" 452 | pooling_param { 453 | pool: AVE 454 | global_pooling: true 455 | } 456 | } 457 | layer { 458 | name: "cls_score" 459 | type: "InnerProduct" 460 | bottom: "pool5" 461 | top: "cls_score" 462 | param { 463 | lr_mult: 1 464 | decay_mult: 1 465 | } 466 | param { 467 | lr_mult: 2 468 | decay_mult: 0 469 | } 470 | inner_product_param { 471 | num_output: 21 472 | weight_filler { 473 | type: "msra" 474 | std: 0.01 475 | } 476 | bias_filler { 477 | type: "constant" 478 | value: 0 479 | } 480 | } 481 | } 482 | layer { 483 | name: "bbox_pred" 484 | type: "InnerProduct" 485 | bottom: "pool5" 486 | top: "bbox_pred" 487 | param { 488 | lr_mult: 1 489 | decay_mult: 1 490 | } 491 | param { 492 | lr_mult: 2 493 | decay_mult: 0 494 | } 495 | inner_product_param { 496 | num_output: 84 497 | weight_filler { 498 | type: "msra" 499 | std: 0.01 500 | } 501 | bias_filler { 502 | type: "constant" 503 | value: 0 504 | } 505 | } 506 | } 507 | layer { 508 | name: "cls_prob" 509 | type: "Softmax" 510 | bottom: "cls_score" 511 | top: "cls_prob" 512 | } 513 | 514 | --------------------------------------------------------------------------------