├── cls
├── accuracy.png
├── inception
│ └── README.md
├── resnet-v2
│ └── README.md
├── cls_lite
│ └── README.md
├── vgg
│ ├── deploy_vgg13-pytorch.prototxt
│ ├── deploy_vgg16-5x.prototxt
│ ├── deploy_vgg16-dsd.prototxt
│ ├── deploy_vgg16-pytorch.prototxt
│ ├── deploy_vgg16-tf.prototxt
│ ├── deploy_vgg19-pytorch.prototxt
│ └── deploy_vgg13bn-pytorch.prototxt
├── evaluation_cls.py
└── README.md
├── .gitmodules
├── det
├── rfcn
│ ├── README.md
│ ├── models
│ │ ├── pascal_voc
│ │ │ ├── solver.prototxt
│ │ │ ├── air101
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_rfcn_voc_air101-merge.prototxt
│ │ │ ├── resnet101-v2
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_rfcn_voc_resnet101-v2-merge.prototxt
│ │ │ ├── resnet18
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_rfcn_voc_resnet18-priv-merge.prototxt
│ │ │ ├── inception-v4
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_rfcn_voc_inception-v4-merge-aligned.prototxt
│ │ │ ├── resnext101-32x4d
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_rfcn_voc_resnext101-32x4d-merge.prototxt
│ │ │ ├── resnext101-64x4d
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_rfcn_voc_resnext101-64x4d-merge.prototxt
│ │ │ ├── se-inception-v2
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_rfcn_voc_se-inception-v2-merge.prototxt
│ │ │ └── resnext26-32x4d
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ └── rcnn_deploy_rfcn_voc_resnext26-32x4d-priv-merge.prototxt
│ │ └── coco
│ │ │ ├── air101
│ │ │ └── rpn_rcnn_deploys
│ │ │ │ └── rcnn_deploy_rfcn_coco_air101-merge.prototxt
│ │ │ ├── inception-v4
│ │ │ └── rpn_rcnn_deploys
│ │ │ │ └── rcnn_deploy_rfcn_coco_inception-v4-merge-aligned.prototxt
│ │ │ └── resnext101-32x4d
│ │ │ └── rpn_rcnn_deploys
│ │ │ └── rcnn_deploy_rfcn_coco_resnext101-32x4d-merge.prototxt
│ ├── experiments
│ │ └── cfgs
│ │ │ ├── rfcn_end2end.yml
│ │ │ └── rfcn_end2end_ohem.yml
│ └── tools
│ │ ├── train_net_multi_gpu.py
│ │ └── score.py
├── faster_rcnn
│ ├── models
│ │ ├── pascal_voc
│ │ │ ├── solver.prototxt
│ │ │ ├── airx101-32x4d
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_faster_voc_airx101-32x4d-merge-fc2-ohem-multigrid.prototxt
│ │ │ ├── 2007test400.txt
│ │ │ ├── resnet18
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_faster_voc_resnet18-priv-merge.prototxt
│ │ │ ├── mobilenet
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_faster_voc_mobilenet-dw.prototxt
│ │ │ ├── xception
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_faster_voc_xception-dw-merge-aligned.prototxt
│ │ │ ├── resnet38a
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_faster_voc_resnet38a-merge.prototxt
│ │ │ ├── resnet101-v2
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ │ └── rcnn_deploy_faster_voc_resnet101-v2-merge.prototxt
│ │ │ └── resnet152-v2
│ │ │ │ └── rpn_rcnn_deploys
│ │ │ │ └── rcnn_deploy_faster_voc_resnet152-v2-merge.prototxt
│ │ └── coco
│ │ │ ├── air101
│ │ │ └── rpn_rcnn_deploys
│ │ │ │ └── rcnn_deploy_faster_voc_air101-merge-fc2-multigrid.prototxt
│ │ │ └── inception-v4
│ │ │ └── rpn_rcnn_deploys
│ │ │ └── rcnn_deploy_faster_coco_inception-v4-merge-aligned-fpn.prototxt
│ ├── experiments
│ │ └── cfgs
│ │ │ ├── faster_rcnn_end2end.yml
│ │ │ └── faster_rcnn_end2end_ohem.yml
│ ├── README.md
│ └── tools
│ │ ├── train_net_multi_gpu.py
│ │ └── score.py
├── MSCOCO_Benchmark.md
├── README.md
└── VOC_Benchmark.md
├── LICENSE
├── seg
├── pspnet
│ └── tools
│ │ ├── train_net_multi.py
│ │ └── image_seg_data.py
├── score_seg.py
├── README.md
└── evaluation_seg.py
└── README.md
/cls/accuracy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/caffe-model/HEAD/cls/accuracy.png
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "pypriv"]
2 | path = pypriv
3 | url = https://github.com/soeaver/pypriv.git
4 |
--------------------------------------------------------------------------------
/det/rfcn/README.md:
--------------------------------------------------------------------------------
1 | ## RFCN
2 | ### Training rfcn networks on pascal voc
3 |
4 | 1.Download the network weights trained on imagenet.
5 |
--------------------------------------------------------------------------------
/cls/inception/README.md:
--------------------------------------------------------------------------------
1 | ### Note
2 | We adopt [convolution depthwise layer](https://github.com/BVLC/caffe/pull/5665/files) in deploy_xception-dw.prototxt for speeding up.
3 |
--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "./rfcn_voc_resnet101-v2-merge.prototxt"
2 | base_lr: 0.001
3 | lr_policy: "multistep"
4 | gamma: 0.1
5 | # stepsize: 30000
6 | stepvalue: 80000
7 | # stepvalue: 70000
8 | display: 20
9 | average_loss: 100
10 | # iter_size: 1
11 | momentum: 0.9
12 | weight_decay: 0.0001
13 | # We disable standard caffe solver snapshotting and implement our own snapshot
14 | # function
15 | snapshot: 0
16 | # We still use the snapshot prefix, though
17 | snapshot_prefix: "rfcn_voc_resnet101-v2"
18 | iter_size: 1
19 |
--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/solver.prototxt:
--------------------------------------------------------------------------------
1 | train_net: "~/caffe-model/det/faster_rcnn/models/pascal_voc/resnet101-v2/faster_voc_resnet101-v2-merge.prototxt"
2 | base_lr: 0.001
3 | lr_policy: "multistep"
4 | gamma: 0.1
5 | stepvalue: 50000
6 | display: 20
7 | average_loss: 100
8 |
9 | momentum: 0.9
10 | weight_decay: 0.0001
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "faster_voc_resnet101-v2"
16 | iter_size: 1
17 |
--------------------------------------------------------------------------------
/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml:
--------------------------------------------------------------------------------
1 | ROOT_DIR: ~/caffe-model/det/faster_rcnn/models/pascal_voc/resnet101-v2
2 | EXP_DIR: faster_rcnn_end2end
3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]]
4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]]
5 | TRAIN:
6 | SNAPSHOT_INFIX: 'ss'
7 | SNAPSHOT_ITERS: 10000
8 | # SCALES: [400, 600, 800, 1000, 1200] # for multi-scale training
9 | SCALES: [600]
10 | MAX_SIZE: 1000
11 | HAS_RPN: True
12 | IMS_PER_BATCH: 1
13 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
14 | RPN_POSITIVE_OVERLAP: 0.7
15 | RPN_BATCHSIZE: 256
16 | BATCH_SIZE: 128
17 | PROPOSAL_METHOD: gt
18 | BG_THRESH_LO: 0.0
19 | ASPECT_GROUPING: True
20 | TEST:
21 | HAS_RPN: True
22 | SCALES: [600]
23 | MAX_SIZE: 1000
24 |
25 |
--------------------------------------------------------------------------------
/cls/resnet-v2/README.md:
--------------------------------------------------------------------------------
1 | ### Resnet-v2
2 |
3 | At present, We have not finished the generator scripts of resnet-v2 yet. Maybe [ResNet_with_IdentityMapping](https://github.com/MichaelHunson/ResNet_with_IdentityMapping) is useful.
4 |
5 | The detail is described in the paper **Identity Mappings in Deep Residual Networks** (https://arxiv.org/abs/1603.05027).
6 |
7 | The caffe models are converted from **craftGBD** (https://github.com/craftGBD/craftGBD).
8 | Models in craftGBD are different in BN layer, we manually converted the modified 'bn_layer' to offical 'batch_norm_layer and scale_layer'.
9 |
10 | ### Notes
11 | - I appreciate **craftGBD** (https://github.com/craftGBD/craftGBD) for training the models.
12 | - There are some differences in layer naming with craftGBD version.
13 |
--------------------------------------------------------------------------------
/det/rfcn/experiments/cfgs/rfcn_end2end.yml:
--------------------------------------------------------------------------------
1 | ROOT_DIR: ~/caffe-model/det/rfcn/models/pascal_voc/resnet101-v2/ss
2 | EXP_DIR: rfcn_end2end
3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]]
4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]]
5 | TRAIN:
6 | SNAPSHOT_INFIX: 'ss'
7 | SNAPSHOT_ITERS: 10000
8 | # SCALES: [200, 400, 600, 800]
9 | SCALES: [600]
10 | MAX_SIZE: 1000
11 | HAS_RPN: True
12 | IMS_PER_BATCH: 1
13 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
14 | RPN_POSITIVE_OVERLAP: 0.7
15 | # ONLY_INSIDE_ANCHORS: False
16 | RPN_BATCHSIZE: 256
17 | BATCH_SIZE: 128
18 | PROPOSAL_METHOD: gt
19 | BG_THRESH_LO: 0.1
20 | AGNOSTIC: True
21 | RPN_PRE_NMS_TOP_N: 6000
22 | RPN_POST_NMS_TOP_N: 300
23 | TEST:
24 | HAS_RPN: True
25 | SCALES: [600]
26 | MAX_SIZE: 1000
27 |
28 |
--------------------------------------------------------------------------------
/det/rfcn/experiments/cfgs/rfcn_end2end_ohem.yml:
--------------------------------------------------------------------------------
1 | ROOT_DIR: ~/caffe-model/det/rfcn/models/pascal_voc/resnet101-v2/ss-ohem
2 | EXP_DIR: rfcn_end2end
3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]]
4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]]
5 | TRAIN:
6 | SNAPSHOT_INFIX: 'ss-ohem'
7 | SNAPSHOT_ITERS: 10000
8 | # SCALES: [200, 400, 600, 800]
9 | SCALES: [600]
10 | MAX_SIZE: 1000
11 | HAS_RPN: True
12 | IMS_PER_BATCH: 1
13 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
14 | RPN_POSITIVE_OVERLAP: 0.7
15 | RPN_NORMALIZE_TARGETS: True
16 | # ONLY_INSIDE_ANCHORS: False
17 | RPN_BATCHSIZE: 256
18 | BATCH_SIZE: -1
19 | PROPOSAL_METHOD: gt
20 | BG_THRESH_LO: 0.0
21 | AGNOSTIC: True
22 | RPN_PRE_NMS_TOP_N: 6000
23 | RPN_POST_NMS_TOP_N: 300
24 | TEST:
25 | HAS_RPN: True
26 | SCALES: [600]
27 | MAX_SIZE: 1000
28 |
29 |
--------------------------------------------------------------------------------
/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end_ohem.yml:
--------------------------------------------------------------------------------
1 | ROOT_DIR: ~/caffe-model/det/faster_rcnn/models/pascal_voc/resnet101-v2
2 | EXP_DIR: faster_rcnn_end2end
3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]]
4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]]
5 | TRAIN:
6 | SNAPSHOT_INFIX: 'ss-ohem'
7 | SNAPSHOT_ITERS: 10000
8 | # SCALES: [400, 600, 800, 1000, 1200] # for multi-scale training
9 | SCALES: [600]
10 | MAX_SIZE: 1000
11 | HAS_RPN: True
12 | IMS_PER_BATCH: 1
13 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
14 | RPN_POSITIVE_OVERLAP: 0.7
15 | RPN_NORMALIZE_TARGETS: True
16 | # ONLY_INSIDE_ANCHORS: False
17 | RPN_BATCHSIZE: 256
18 | BATCH_SIZE: -1
19 | RPN_PRE_NMS_TOP_N: 6000
20 | RPN_POST_NMS_TOP_N: 300
21 | PROPOSAL_METHOD: gt
22 | BG_THRESH_LO: 0.0
23 | TEST:
24 | HAS_RPN: True
25 | SCALES: [600]
26 | MAX_SIZE: 1000
27 |
28 |
--------------------------------------------------------------------------------
/det/faster_rcnn/README.md:
--------------------------------------------------------------------------------
1 | ## Faster RCNN
2 | ### Training faster rcnn networks on pascal voc
3 |
4 | 1.Download the network weights trained on imagenet.
5 |
6 |
7 | 2.Modify solver file
8 | ```
9 | caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt
10 | ```
11 | - You need modify 'train_net' and 'snapshot_prefix' to the correct path or name.
12 |
13 |
14 | 3.Modify yml file
15 | ```
16 | caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml
17 | ```
18 | - The faster rcnn models will saved in '{ROOT_DIR}/output/{EXP_DIR}/{imdb.name}/' folder.
19 |
20 |
21 | 4.Training
22 | ```
23 | python train_net_multi_gpu.py --gpu_id 0,1 --solver ~/caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt --iters 80000 --weights ~/caffe-model/cls/ilsvrc/resnet-v2/resnet101-v2/resnet101-v2_merge.caffemodel --cfg ~/caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml --imdb voc_0712_trainval
24 | ```
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 soeaver Yang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/det/rfcn/models/coco/air101/rpn_rcnn_deploys/rcnn_deploy_rfcn_coco_air101-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 3969
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 81
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/air101/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_air101-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 1029
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 21
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/resnet101-v2/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnet101-v2-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 1029
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 21
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/resnet18/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnet18-priv-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 1029
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 21
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/det/rfcn/models/coco/inception-v4/rpn_rcnn_deploys/rcnn_deploy_rfcn_coco_inception-v4-merge-aligned.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 3969
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 81
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/det/rfcn/models/coco/resnext101-32x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_coco_resnext101-32x4d-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 3969
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 81
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/inception-v4/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_inception-v4-merge-aligned.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 1029
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 21
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/resnext101-32x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnext101-32x4d-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 1029
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 21
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/resnext101-64x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnext101-64x4d-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 1029
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 21
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/se-inception-v2/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_se-inception-v2-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 1029
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 21
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/resnext26-32x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnext26-32x4d-priv-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "rfcn_cls"
2 | input_shape {
3 | dim: 1
4 | dim: 1029
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rfcn_bbox"
10 | input_shape {
11 | dim: 1
12 | dim: 392
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "rois"
18 | input_shape {
19 | dim: 300
20 | dim: 5
21 | }
22 |
23 |
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 | bottom: "rfcn_cls"
27 | bottom: "rois"
28 | top: "psroipooled_cls_rois"
29 | name: "psroipooled_cls_rois"
30 | type: "PSROIPooling"
31 | psroi_pooling_param {
32 | spatial_scale: 0.0625
33 | output_dim: 21
34 | group_size: 7
35 | }
36 | }
37 | layer {
38 | bottom: "psroipooled_cls_rois"
39 | top: "cls_score"
40 | name: "ave_cls_score_rois"
41 | type: "Pooling"
42 | pooling_param {
43 | pool: AVE
44 | global_pooling: true
45 | }
46 | }
47 | layer {
48 | bottom: "rfcn_bbox"
49 | bottom: "rois"
50 | top: "psroipooled_loc_rois"
51 | name: "psroipooled_loc_rois"
52 | type: "PSROIPooling"
53 | psroi_pooling_param {
54 | spatial_scale: 0.0625
55 | output_dim: 8
56 | group_size: 7
57 | }
58 | }
59 | layer {
60 | bottom: "psroipooled_loc_rois"
61 | top: "bbox_pred"
62 | name: "ave_bbox_pred_rois"
63 | type: "Pooling"
64 | pooling_param {
65 | pool: AVE
66 | global_pooling: true
67 | }
68 | }
69 | layer {
70 | name: "cls_prob"
71 | type: "Softmax"
72 | bottom: "cls_score"
73 | top: "cls_prob"
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/seg/pspnet/tools/train_net_multi.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from multiprocessing import Process
3 |
4 | # sys.setrecursionlimit(100000)
5 |
6 | sys.path.append('/home/prmct/workspace/py-RFCN-priv/caffe-priv/python')
7 | # sys.path.append('/home/yanglu/workspace/py-R-FCN-multiGPU-master-0619/caffe/python')
8 | import caffe
9 |
10 | # _snapshot='./aug_single_resnet101_iter_5000.solverstate'
11 | _weights = '/home/prmct/Program/classification/ilsvrc/resnet_v2/resnet101_v2/resnet101_v2_merge_bn_scale.caffemodel'
12 |
13 | solver_prototxt = './solver.prototxt'
14 | gpus = [0,1,2,3]
15 | max_iter = 200000
16 |
17 | def solve(proto, gpus, uid, rank, max_iter):
18 | caffe.set_mode_gpu()
19 | caffe.set_device(gpus[rank])
20 | caffe.set_solver_count(len(gpus))
21 | caffe.set_solver_rank(rank)
22 | caffe.set_multiprocess(True)
23 |
24 | solver = caffe.SGDSolver(proto)
25 | if rank == 0:
26 | # solver.restore(_snapshot)
27 | solver.net.copy_from(_weights)
28 |
29 | solver.net.layers[0].get_gpu_id(gpus[rank])
30 |
31 | nccl = caffe.NCCL(solver, uid)
32 | nccl.bcast()
33 | solver.add_callback(nccl)
34 |
35 | if solver.param.layer_wise_reduce:
36 | solver.net.after_backward(nccl)
37 |
38 | for _ in range(max_iter):
39 | solver.step(1)
40 |
41 |
42 | if __name__ == '__main__':
43 | uid = caffe.NCCL.new_uid()
44 | caffe.init_log()
45 | caffe.log('Using devices %s' % str(gpus))
46 | procs = []
47 |
48 | for rank in range(len(gpus)):
49 | p = Process(target=solve,
50 | args=(solver_prototxt, gpus, uid, rank, max_iter))
51 | p.daemon = False
52 | p.start()
53 | procs.append(p)
54 | for p in procs:
55 | p.join()
56 |
57 |
--------------------------------------------------------------------------------
/seg/score_seg.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | gt_root = '/home/prmct/Database/VOC_PASCAL/VOC2012_test/SegmentationClassAug/'
5 | pre_root = './predict/'
6 | val_pth = './val.txt'
7 | n_class = 21
8 |
9 |
10 | def fast_hist(a, b, n):
11 | k = (a >= 0) & (a < n)
12 | return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n)
13 |
14 |
15 | def compute_hist(val_list):
16 | hist = np.zeros((n_class, n_class))
17 | for idx in val_list:
18 | print idx
19 | label = cv2.imread(gt_root + idx + '.png', 0)
20 | gt = label.flatten()
21 | tmp = cv2.imread(pre_root + idx + '.png', 0)
22 |
23 | if label.shape != tmp.shape:
24 | pre = cv2.resize(tmp, (label.shape[1], label.shape[0]), interpolation=cv2.cv.CV_INTER_NN)
25 | pre = pre.flatten()
26 | else:
27 | pre = tmp.flatten()
28 |
29 | hist += fast_hist(gt, pre, n_class)
30 |
31 | # return hist[1:, 1:]
32 | return hist
33 |
34 |
35 | def mean_IoU(overall_h):
36 | iu = np.diag(overall_h) / (overall_h.sum(1) + overall_h.sum(0) - np.diag(overall_h))
37 | return np.nanmean(iu)
38 |
39 |
40 | def per_class_acc(overall_h):
41 | acc = np.diag(overall_h) / overall_h.sum(1)
42 | return np.nanmean(acc)
43 |
44 |
45 | def pixel_wise_acc(overall_h):
46 | return np.diag(overall_h).sum() / overall_h.sum()
47 |
48 |
49 | if __name__ == '__main__':
50 | val_list = []
51 |
52 | f = open(val_pth, 'r')
53 | for i in f:
54 | val_list.append(i.strip().split(' ')[-1].split('/')[-1])
55 |
56 | hist = compute_hist(val_list)
57 |
58 | print 'Mean IoU:', mean_IoU(hist)
59 | print 'Pixel Acc:', np.diag(hist).sum() / hist.sum()
60 | print 'Mean Acc:', per_class_acc(hist)
61 |
62 | # print np.diag(hist).sum() / hist.sum()
63 |
--------------------------------------------------------------------------------
/seg/README.md:
--------------------------------------------------------------------------------
1 | ## Object Segmentation
2 |
3 | ### We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)
4 | we are releasing the training code and files, the models and more experiments will come soon.
5 |
6 | ### Object Segmentation Performance on PASCAL VOC.
7 | **1. PSPNet training on [SBD](http://home.bharathh.info/pubs/pdfs/BharathICCV2011.pdf) (10,582 images) and testing on VOC 2012 validation (1,449 images).**
8 |
9 | Network|mIoU(%)|pixel acc(%)|training
speed|training
memory|testing
speed|testing
memory
10 | :---:|:---:|:---:|:---:|:---:|:---:|:---:
11 | resnet101-v2| 77.94 | 94.94 | 1.6 img/s | 8,023MB | 3.0 img/s | 4,071MB
12 | resnet101-v2-selu| 77.10 | 94.80 | 1.6 img/s | 8,017MB | 3.0 img/s | 4,065MB
13 | resnext101-32x4d| 77.79 | 94.92 | 1.3 img/s | 8,891MB | 2.6 img/s | 5,241MB
14 | air101| 77.64 | 94.93 | 1.3 img/s | 10,017MB | 2.5 img/s | 5,241MB
15 | inception-v4| 77.58 | 94.83 | -- img/s | --MB | -- img/s | --MB
16 | se-resnet50| 75.80 | 94.30 | -- img/s | --MB | -- img/s | --MB
17 | - To reduce memory usage, we merge all the models batchnorm layer parameters into scale layer, more details please refer to [faster-rcnn-resnet](https://github.com/Eniac-Xie/faster-rcnn-resnet#modification) or [pva-faster-rcnn](https://github.com/sanghoon/pva-faster-rcnn/blob/master/tools/gen_merged_model.py);
18 | - PSP module without batch normlization, the kernel_size of avepooling is 64, 32, 16 and 8 respectively;
19 | - All the models use 513x513 input with random crop, multi-scale traing (0.75x, 1.0x, 1.25x, 1.5x, 2.0x) and horizantal flipping;
20 | - The training and testing speed is calculated on a single Nvidia Titan pascal GPU with batch_size=1;
21 | - Training batch_size=16 for 2,0000 iterations, base_lr=0.001 with 'poly' learning rate policy (power=0.9);
22 | - Testing with single scale, base_size=555 and crop_size=513, no flipping, no crf;
23 |
--------------------------------------------------------------------------------
/det/faster_rcnn/models/coco/air101/rpn_rcnn_deploys/rcnn_deploy_faster_voc_air101-merge-fc2-multigrid.prototxt:
--------------------------------------------------------------------------------
1 | input: "conv_new_1"
2 | input_shape {
3 | dim: 1
4 | dim: 256
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rois"
10 | input_shape {
11 | dim: 300
12 | dim: 5
13 | }
14 |
15 | #============== RCNN ===============
16 | layer {
17 | bottom: "conv_new_1"
18 | bottom: "rois"
19 | top: "roi_pool"
20 | name: "roi_pool"
21 | type: "ROIPooling"
22 | roi_pooling_param {
23 | pooled_w: 7
24 | pooled_h: 7
25 | spatial_scale: 0.062500
26 | }
27 | }
28 | layer {
29 | name: "fc1"
30 | type: "InnerProduct"
31 | bottom: "roi_pool"
32 | top: "fc1"
33 | param {
34 | lr_mult: 1
35 | decay_mult: 1
36 | }
37 | param {
38 | lr_mult: 2
39 | decay_mult: 0
40 | }
41 | inner_product_param {
42 | num_output: 1024
43 | weight_filler {
44 | type: "gaussian"
45 | std: 0.01
46 | }
47 | bias_filler {
48 | type: "constant"
49 | value: 0
50 | }
51 | }
52 | }
53 | layer {
54 | name: "fc1_relu"
55 | type: "ReLU"
56 | bottom: "fc1"
57 | top: "fc1"
58 | }
59 | layer {
60 | name: "fc2"
61 | type: "InnerProduct"
62 | bottom: "fc1"
63 | top: "fc2"
64 | param {
65 | lr_mult: 1
66 | decay_mult: 1
67 | }
68 | param {
69 | lr_mult: 2
70 | decay_mult: 0
71 | }
72 | inner_product_param {
73 | num_output: 1024
74 | weight_filler {
75 | type: "gaussian"
76 | std: 0.01
77 | }
78 | bias_filler {
79 | type: "constant"
80 | value: 0
81 | }
82 | }
83 | }
84 | layer {
85 | name: "fc2_relu"
86 | type: "ReLU"
87 | bottom: "fc2"
88 | top: "fc2"
89 | }
90 | layer {
91 | name: "cls_score"
92 | type: "InnerProduct"
93 | bottom: "fc2"
94 | top: "cls_score"
95 | param {
96 | lr_mult: 1
97 | decay_mult: 1
98 | }
99 | param {
100 | lr_mult: 2
101 | decay_mult: 0
102 | }
103 | inner_product_param {
104 | num_output: 81
105 | weight_filler {
106 | type: "msra"
107 | std: 0.01
108 | }
109 | bias_filler {
110 | type: "constant"
111 | value: 0
112 | }
113 | }
114 | }
115 | layer {
116 | name: "bbox_pred"
117 | type: "InnerProduct"
118 | bottom: "fc2"
119 | top: "bbox_pred"
120 | param {
121 | lr_mult: 1
122 | decay_mult: 1
123 | }
124 | param {
125 | lr_mult: 2
126 | decay_mult: 0
127 | }
128 | inner_product_param {
129 | num_output: 324
130 | weight_filler {
131 | type: "msra"
132 | std: 0.01
133 | }
134 | bias_filler {
135 | type: "constant"
136 | value: 0
137 | }
138 | }
139 | }
140 | layer {
141 | name: "cls_prob"
142 | type: "Softmax"
143 | bottom: "cls_score"
144 | top: "cls_prob"
145 | }
146 |
147 |
--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/airx101-32x4d/rpn_rcnn_deploys/rcnn_deploy_faster_voc_airx101-32x4d-merge-fc2-ohem-multigrid.prototxt:
--------------------------------------------------------------------------------
1 | input: "conv_new_1"
2 | input_shape {
3 | dim: 1
4 | dim: 256
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rois"
10 | input_shape {
11 | dim: 300
12 | dim: 5
13 | }
14 |
15 | #============== RCNN ===============
16 | layer {
17 | bottom: "conv_new_1"
18 | bottom: "rois"
19 | top: "roi_pool"
20 | name: "roi_pool"
21 | type: "ROIPooling"
22 | roi_pooling_param {
23 | pooled_w: 7
24 | pooled_h: 7
25 | spatial_scale: 0.062500
26 | }
27 | }
28 | layer {
29 | name: "fc1"
30 | type: "InnerProduct"
31 | bottom: "roi_pool"
32 | top: "fc1"
33 | param {
34 | lr_mult: 1
35 | decay_mult: 1
36 | }
37 | param {
38 | lr_mult: 2
39 | decay_mult: 0
40 | }
41 | inner_product_param {
42 | num_output: 1024
43 | weight_filler {
44 | type: "gaussian"
45 | std: 0.01
46 | }
47 | bias_filler {
48 | type: "constant"
49 | value: 0
50 | }
51 | }
52 | }
53 | layer {
54 | name: "fc1_relu"
55 | type: "ReLU"
56 | bottom: "fc1"
57 | top: "fc1"
58 | }
59 | layer {
60 | name: "fc2"
61 | type: "InnerProduct"
62 | bottom: "fc1"
63 | top: "fc2"
64 | param {
65 | lr_mult: 1
66 | decay_mult: 1
67 | }
68 | param {
69 | lr_mult: 2
70 | decay_mult: 0
71 | }
72 | inner_product_param {
73 | num_output: 1024
74 | weight_filler {
75 | type: "gaussian"
76 | std: 0.01
77 | }
78 | bias_filler {
79 | type: "constant"
80 | value: 0
81 | }
82 | }
83 | }
84 | layer {
85 | name: "fc2_relu"
86 | type: "ReLU"
87 | bottom: "fc2"
88 | top: "fc2"
89 | }
90 | layer {
91 | name: "cls_score"
92 | type: "InnerProduct"
93 | bottom: "fc2"
94 | top: "cls_score"
95 | param {
96 | lr_mult: 1
97 | decay_mult: 1
98 | }
99 | param {
100 | lr_mult: 2
101 | decay_mult: 0
102 | }
103 | inner_product_param {
104 | num_output: 21
105 | weight_filler {
106 | type: "msra"
107 | std: 0.01
108 | }
109 | bias_filler {
110 | type: "constant"
111 | value: 0
112 | }
113 | }
114 | }
115 | layer {
116 | name: "bbox_pred"
117 | type: "InnerProduct"
118 | bottom: "fc2"
119 | top: "bbox_pred"
120 | param {
121 | lr_mult: 1
122 | decay_mult: 1
123 | }
124 | param {
125 | lr_mult: 2
126 | decay_mult: 0
127 | }
128 | inner_product_param {
129 | num_output: 84
130 | weight_filler {
131 | type: "msra"
132 | std: 0.01
133 | }
134 | bias_filler {
135 | type: "constant"
136 | value: 0
137 | }
138 | }
139 | }
140 | layer {
141 | name: "cls_prob"
142 | type: "Softmax"
143 | bottom: "cls_score"
144 | top: "cls_prob"
145 | }
146 |
147 |
--------------------------------------------------------------------------------
/det/faster_rcnn/models/coco/inception-v4/rpn_rcnn_deploys/rcnn_deploy_faster_coco_inception-v4-merge-aligned-fpn.prototxt:
--------------------------------------------------------------------------------
1 | input: "p2_elewise"
2 | input_shape {
3 | dim: 1
4 | dim: 256
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "p3_elewise"
10 | input_shape {
11 | dim: 1
12 | dim: 256
13 | dim: 40
14 | dim: 40
15 | }
16 |
17 | input: "p4_elewise"
18 | input_shape {
19 | dim: 1
20 | dim: 256
21 | dim: 40
22 | dim: 40
23 | }
24 |
25 | input: "p5_conv_1x1"
26 | input_shape {
27 | dim: 1
28 | dim: 256
29 | dim: 40
30 | dim: 40
31 | }
32 |
33 | input: "rois"
34 | input_shape {
35 | dim: 300
36 | dim: 5
37 | }
38 |
39 | #============== ROI Pooling ===============
40 | layer {
41 | name: "roi_pool4"
42 | type: "ROIPooling"
43 | bottom: "p2_elewise"
44 | bottom: "rois"
45 | top: "roi_pool4"
46 | roi_pooling_param {
47 | pooled_w: 8
48 | pooled_h: 8
49 | spatial_scale: 0.25
50 | }
51 | }
52 | layer {
53 | name: "roi_pool8"
54 | type: "ROIPooling"
55 | bottom: "p3_elewise"
56 | bottom: "rois"
57 | top: "roi_pool8"
58 | roi_pooling_param {
59 | pooled_w: 8
60 | pooled_h: 8
61 | spatial_scale: 0.125
62 | }
63 | }
64 | layer {
65 | name: "roi_pool16"
66 | type: "ROIPooling"
67 | bottom: "p4_elewise"
68 | bottom: "rois"
69 | top: "roi_pool16"
70 | roi_pooling_param {
71 | pooled_w: 8
72 | pooled_h: 8
73 | spatial_scale: 0.0625
74 | }
75 | }
76 | layer {
77 | name: "roi_pool32"
78 | type: "ROIPooling"
79 | bottom: "p5_conv_1x1"
80 | bottom: "rois"
81 | top: "roi_pool32"
82 | roi_pooling_param {
83 | pooled_w: 8
84 | pooled_h: 8
85 | spatial_scale: 0.03125
86 | }
87 | }
88 | layer {
89 | name: "roi_pool"
90 | type: "Eltwise"
91 | bottom: "roi_pool4"
92 | bottom: "roi_pool8"
93 | bottom: "roi_pool16"
94 | bottom: "roi_pool32"
95 | top: "roi_pool"
96 | eltwise_param {
97 | operation: SUM
98 | }
99 | }
100 | layer {
101 | name: "fc1"
102 | type: "InnerProduct"
103 | bottom: "roi_pool"
104 | top: "fc1"
105 | param {
106 | lr_mult: 1
107 | decay_mult: 1
108 | }
109 | param {
110 | lr_mult: 2
111 | decay_mult: 0
112 | }
113 | inner_product_param {
114 | num_output: 1024
115 | weight_filler {
116 | type: "gaussian"
117 | std: 0.01
118 | }
119 | bias_filler {
120 | type: "constant"
121 | value: 0
122 | }
123 | }
124 | }
125 | layer {
126 | name: "fc1_relu"
127 | type: "ReLU"
128 | bottom: "fc1"
129 | top: "fc1"
130 | }
131 | layer {
132 | name: "fc2"
133 | type: "InnerProduct"
134 | bottom: "fc1"
135 | top: "fc2"
136 | param {
137 | lr_mult: 1
138 | decay_mult: 1
139 | }
140 | param {
141 | lr_mult: 2
142 | decay_mult: 0
143 | }
144 | inner_product_param {
145 | num_output: 1024
146 | weight_filler {
147 | type: "gaussian"
148 | std: 0.01
149 | }
150 | bias_filler {
151 | type: "constant"
152 | value: 0
153 | }
154 | }
155 | }
156 | layer {
157 | name: "fc2_relu"
158 | type: "ReLU"
159 | bottom: "fc2"
160 | top: "fc2"
161 | }
162 | layer {
163 | name: "cls_score"
164 | type: "InnerProduct"
165 | bottom: "fc2"
166 | top: "cls_score"
167 | param {
168 | lr_mult: 1
169 | decay_mult: 1
170 | }
171 | param {
172 | lr_mult: 2
173 | decay_mult: 0
174 | }
175 | inner_product_param {
176 | num_output: 81
177 | weight_filler {
178 | type: "msra"
179 | std: 0.01
180 | }
181 | bias_filler {
182 | type: "constant"
183 | value: 0
184 | }
185 | }
186 | }
187 | layer {
188 | name: "bbox_pred"
189 | type: "InnerProduct"
190 | bottom: "fc2"
191 | top: "bbox_pred"
192 | param {
193 | lr_mult: 1
194 | decay_mult: 1
195 | }
196 | param {
197 | lr_mult: 2
198 | decay_mult: 0
199 | }
200 | inner_product_param {
201 | num_output: 324
202 | weight_filler {
203 | type: "msra"
204 | std: 0.01
205 | }
206 | bias_filler {
207 | type: "constant"
208 | value: 0
209 | }
210 | }
211 | }
212 | layer {
213 | name: "cls_prob"
214 | type: "Softmax"
215 | bottom: "cls_score"
216 | top: "cls_prob"
217 | }
218 |
219 |
220 |
--------------------------------------------------------------------------------
/det/rfcn/tools/train_net_multi_gpu.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # --------------------------------------------------------
4 | # Written by soeaver
5 | # Modified version of py-R-FCN-multiGPU
6 | # --------------------------------------------------------
7 |
8 | """Train a Fast R-CNN network on a region of interest database."""
9 |
10 | # import _init_paths
11 | import sys
12 | sys.path.append('~/py-RFCN-priv/caffe-priv/python')
13 | sys.path.append('~/py-RFCN-priv/lib')
14 | from fast_rcnn.train_multi_gpu import get_training_roidb, train_net_multi_gpu
15 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
16 | from datasets.factory import get_imdb
17 | import datasets.imdb
18 | import caffe
19 | import argparse
20 | import pprint
21 | import numpy as np
22 |
23 |
24 | def parse_args():
25 | """
26 | Parse input arguments
27 | """
28 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
29 | parser.add_argument("--gpu_id", type=str,
30 | default='0,1',
31 | help="List of device ids.")
32 | parser.add_argument('--solver', dest='solver',
33 | help='solver prototxt',
34 | default='~/caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt', type=str)
35 | parser.add_argument('--iters', dest='max_iters',
36 | help='number of iterations to train',
37 | default=80000, type=int)
38 | parser.add_argument('--weights', dest='pretrained_model',
39 | help='initialize with pretrained model weights',
40 | default='~/caffe-model/cls/ilsvrc/resnet-v2/resnet101-v2/resnet101-v2_merge.caffemodel', type=str)
41 | parser.add_argument('--cfg', dest='cfg_file',
42 | help='optional config file',
43 | default='~/caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml', type=str)
44 | parser.add_argument('--imdb', dest='imdb_name',
45 | help='dataset to train on',
46 | default='voc_0712_trainval', type=str)
47 | parser.add_argument('--rand', dest='randomize',
48 | help='randomize (do not use a fixed seed)',
49 | action='store_true')
50 | parser.add_argument('--set', dest='set_cfgs',
51 | help='set config keys', default=None,
52 | nargs=argparse.REMAINDER)
53 |
54 | if len(sys.argv) == 1:
55 | parser.print_help()
56 | sys.exit(1)
57 |
58 | args = parser.parse_args()
59 | return args
60 |
61 | def combined_roidb(imdb_names):
62 | def get_roidb(imdb_name):
63 | imdb = get_imdb(imdb_name)
64 | print 'Loaded dataset `{:s}` for training'.format(imdb.name)
65 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
66 | print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)
67 | roidb = get_training_roidb(imdb)
68 | return roidb
69 |
70 | roidbs = [get_roidb(s) for s in imdb_names.split('+')]
71 | roidb = roidbs[0]
72 | if len(roidbs) > 1:
73 | for r in roidbs[1:]:
74 | roidb.extend(r)
75 | imdb = datasets.imdb.imdb(imdb_names)
76 | else:
77 | imdb = get_imdb(imdb_names)
78 | return imdb, roidb
79 |
80 | if __name__ == '__main__':
81 | args = parse_args()
82 |
83 | print('Called with args:')
84 | print(args)
85 |
86 | if args.cfg_file is not None:
87 | cfg_from_file(args.cfg_file)
88 | if args.set_cfgs is not None:
89 | cfg_from_list(args.set_cfgs)
90 |
91 | gpu_id = args.gpu_id
92 | gpu_list = gpu_id.split(',')
93 | gpus = [int(i) for i in gpu_list]
94 |
95 | print('Using config:')
96 | pprint.pprint(cfg)
97 |
98 | if not args.randomize:
99 | # fix the random seeds (numpy and caffe) for reproducibility
100 | np.random.seed(cfg.RNG_SEED)
101 | #caffe.set_random_seed(cfg.RNG_SEED)
102 |
103 | # set up caffe
104 |
105 | imdb, roidb = combined_roidb(args.imdb_name)
106 | print '{:d} roidb entries'.format(len(roidb))
107 |
108 | output_dir = get_output_dir(imdb)
109 | print 'Output will be saved to `{:s}`'.format(output_dir)
110 |
111 | train_net_multi_gpu(args.solver, roidb, output_dir,
112 | pretrained_model=args.pretrained_model,
113 | max_iter=args.max_iters, gpus=gpus)
114 |
--------------------------------------------------------------------------------
/det/faster_rcnn/tools/train_net_multi_gpu.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # --------------------------------------------------------
4 | # Written by soeaver
5 | # Modified version of py-R-FCN-multiGPU
6 | # --------------------------------------------------------
7 |
8 | """Train a Fast R-CNN network on a region of interest database."""
9 |
10 | # import _init_paths
11 | import sys
12 | sys.path.append('~/py-RFCN-priv/caffe-priv/python')
13 | sys.path.append('~/py-RFCN-priv/lib')
14 | from fast_rcnn.train_multi_gpu import get_training_roidb, train_net_multi_gpu
15 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
16 | from datasets.factory import get_imdb
17 | import datasets.imdb
18 | import caffe
19 | import argparse
20 | import pprint
21 | import numpy as np
22 |
23 |
24 | def parse_args():
25 | """
26 | Parse input arguments
27 | """
28 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
29 | parser.add_argument("--gpu_id", type=str,
30 | default='0,1',
31 | help="List of device ids.")
32 | parser.add_argument('--solver', dest='solver',
33 | help='solver prototxt',
34 | default='~/caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt', type=str)
35 | parser.add_argument('--iters', dest='max_iters',
36 | help='number of iterations to train',
37 | default=80000, type=int)
38 | parser.add_argument('--weights', dest='pretrained_model',
39 | help='initialize with pretrained model weights',
40 | default='~/caffe-model/cls/ilsvrc/resnet-v2/resnet101-v2/resnet101-v2_merge.caffemodel', type=str)
41 | parser.add_argument('--cfg', dest='cfg_file',
42 | help='optional config file',
43 | default='~/caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml', type=str)
44 | parser.add_argument('--imdb', dest='imdb_name',
45 | help='dataset to train on',
46 | default='voc_0712_trainval', type=str)
47 | parser.add_argument('--rand', dest='randomize',
48 | help='randomize (do not use a fixed seed)',
49 | action='store_true')
50 | parser.add_argument('--set', dest='set_cfgs',
51 | help='set config keys', default=None,
52 | nargs=argparse.REMAINDER)
53 |
54 | if len(sys.argv) == 1:
55 | parser.print_help()
56 | sys.exit(1)
57 |
58 | args = parser.parse_args()
59 | return args
60 |
61 | def combined_roidb(imdb_names):
62 | def get_roidb(imdb_name):
63 | imdb = get_imdb(imdb_name)
64 | print 'Loaded dataset `{:s}` for training'.format(imdb.name)
65 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
66 | print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)
67 | roidb = get_training_roidb(imdb)
68 | return roidb
69 |
70 | roidbs = [get_roidb(s) for s in imdb_names.split('+')]
71 | roidb = roidbs[0]
72 | if len(roidbs) > 1:
73 | for r in roidbs[1:]:
74 | roidb.extend(r)
75 | imdb = datasets.imdb.imdb(imdb_names)
76 | else:
77 | imdb = get_imdb(imdb_names)
78 | return imdb, roidb
79 |
80 | if __name__ == '__main__':
81 | args = parse_args()
82 |
83 | print('Called with args:')
84 | print(args)
85 |
86 | if args.cfg_file is not None:
87 | cfg_from_file(args.cfg_file)
88 | if args.set_cfgs is not None:
89 | cfg_from_list(args.set_cfgs)
90 |
91 | gpu_id = args.gpu_id
92 | gpu_list = gpu_id.split(',')
93 | gpus = [int(i) for i in gpu_list]
94 |
95 | print('Using config:')
96 | pprint.pprint(cfg)
97 |
98 | if not args.randomize:
99 | # fix the random seeds (numpy and caffe) for reproducibility
100 | np.random.seed(cfg.RNG_SEED)
101 | #caffe.set_random_seed(cfg.RNG_SEED)
102 |
103 | # set up caffe
104 |
105 | imdb, roidb = combined_roidb(args.imdb_name)
106 | print '{:d} roidb entries'.format(len(roidb))
107 |
108 | output_dir = get_output_dir(imdb)
109 | print 'Output will be saved to `{:s}`'.format(output_dir)
110 |
111 | train_net_multi_gpu(args.solver, roidb, output_dir,
112 | pretrained_model=args.pretrained_model,
113 | max_iter=args.max_iters, gpus=gpus)
114 |
--------------------------------------------------------------------------------
/det/MSCOCO_Benchmark.md:
--------------------------------------------------------------------------------
1 | ## MSCOCO Detection Benchmark
2 |
3 | **We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)**
4 |
5 | ### **1. Results training on MSCOCO2017-trainval and testing on test-dev2017.**
6 |
7 | Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L
8 | :---:|:---:|:---:|:---:|:---:|:---:|:---:
9 | **RFCN-se-inception-v2**
with ms-train & ohem & multigrid | 32.6 | 53.6 | 34.5 | 12.5 | 35.1 | 48.4
10 | **RFCN-se-inception-v2**
with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & ms-test | 36.8 | 59.8 | 38.7 | 19.7 | 39.8 | 49.1
11 | **RFCN-se-resnet50**
with ms-train & ohem & multigrid | 32.9 | 54.4 | 34.8 | 13.0 | 35.3 | 48.1
12 | **FPN-Faster-inception-v4**
with ms-train | 36.5 | 58.5 | 38.8 | 16.5 | 38.8 | 52.1
13 | **FPN-Faster-inception-v4**
with ms-train & bbox-voting & soft-nms | 38.3 | 61.0 | 40.8 | 20.0 | 41.5 | 51.4
14 | **FPN-Faster-inception-v4**
with ms-train & bbox-voting & soft-nms & flipping & ms-test | 39.5 | 62.5 | 42.3 | 23.3 | 43.2 | 51.0
15 | **RFCN-air101**
with ms-train & ohem & multigrid | 38.2 | 60.1 | 41.2 | 18.2 | 41.9 | 53.0
16 | **RFCN-air101**
with extra-7-epochs & ms-train & ohem & multigrid | 38.5 | 60.2 | 41.4 | 18.3 | 42.1 | 53.4
17 | **RFCN-air101**
with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping | 40.4 | 63.5 | 43.5 | 22.6 | 44.4 | 52.0
18 | **RFCN-air101**
with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & ms-test | 41.8 | 65.3 | 45.3 | 26.1 | 45.6 | 52.4
19 | **RFCN-air101**
with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & assign-ms-test | 42.1 | 64.6 | 45.6 | 25.6 | 44.5 | 54.1
20 | **RFCN-air101**
with ms-train & ohem & multigrid & deformpsroi & bbox-voting & soft-nms & flipping & assign-ms-test | 43.2 | 66.0 | 46.7 | 25.6 | 46.3 | 55.9
21 | **Faster-2fc-air101**
with ms-train & ohem & multigrid | 36.5 | 60.4 | 38.1 | 15.5 | 39.5 | 53.5
22 |
23 | - All the models are test on a single scale (600*1000) without any bells and whistles;
24 |
25 |
26 | ### **2. Context Pyramid Attention Network (CPANet) results training on MSCOCO2017-trainval and testing on test-dev2017.**
27 |
28 | Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L
29 | :---:|:---:|:---:|:---:|:---:|:---:|:---:
30 | **CPANet-air101**
with ms-train & ohem & multigrid & 600-scale-test | 40.1 | 62.2 | 43.4 | 19.4 | 44.4 | 55.9
31 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test | 41.9 | 64.8 | 45.5 | 24.0 | 45.9 | 54.6
32 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test & snms | 42.7 | 65.4 | 46.7 | 24.6 | 46.8 | 55.6
33 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test & snms & flipping | 43.5 | 65.9 | 47.5 | 25.1 | 47.7 | 56.6
34 |
35 |
36 | ### **3. COCOPerson results training on MSCOCO2017-trainval and testing on test-dev2017.**
37 |
38 | Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L|mAR@10
39 | :---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:
40 | **RFCN-se-air14-thin-specific**
with ms-train & ohem & multigrid | 21.5 | 48.9 | 16.5 | 12.3 | 27.3 | 30.8 | 28.6
41 | **RFCN-resnet18-specific**
with ms-train & ohem & multigrid | 38.5 | 66.1 | 39.8 | 16.8 | 47.1 | 63.0 | 41.9
42 | **RFCN-se-resnet50-specific**
with 800-scale-train & ohem & multigrid | 39.0 | 64.1 | 41.1 | 13.5 | 48.4 | 66.4 | 43.9
43 | **RFCN-se-resnet50-specific**
with ms-train & ohem & multigrid | 41.9 | 67.7 | 44.3 | 18.6 | 51.0 | 67.9 | 46.0
44 | **RFCN-se-resnet50-specific**
with ms-train & ohem & multigrid & snms & flip & ms-test | 44.6 | 72.8 | 47.3 | 25.3 | 54.4 | 63.3 | 49.8
45 | **RFCN-se-resnet50**
with ms-train & ohem & multigrid | 42.7 | 72.0 | 44.5 | 21.0 | 51.1 | 66.4 | 45.4
46 | **RFCN-se-inception-v2-specific**
with ms-train & ohem & multigrid | 41.2 | 66.7 | 43.2 | 17.6 | 50.0 | 68.3 | 45.1
47 | **RFCN-se-inception-v2**
with ms-train & ohem & multigrid | 42.3 | 71.4 | 44.2 | 19.5 | 50.7 | 67.2 | 44.9
48 | **RFCN-se-inception-v2**
with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & ms-test | 48.0 | 79.5 | 50.0 | 28.3 | 55.8 | 67.5 | 50.8
49 | **RFCN-air101**
with ms-train & ohem & multigrid & deformpsroi & bbox-voting & soft-nms & flipping & assign-ms-test | 54.0 | 83.9 | 58.2 | 35.2 | 61.6 | 73.0 | 55.1
50 | **CPANet-air101**
with ms-train & ohem & multigrid & 600-scale-test | 47.7 | 76.4 | 51.1 | 25.3 | 56.8 | 70.6 | 50.2
51 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test & snms & flipping | 53.4 | 82.7 | 58.0 | 33.1 | 61.8 | 73.3 | 55.0
52 |
53 |
--------------------------------------------------------------------------------
/cls/cls_lite/README.md:
--------------------------------------------------------------------------------
1 | ## CLS Lite (Classification lite)
2 |
3 | Please install [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) for evaluating and finetuning.
4 |
5 |
6 | ### Performance of lite models on imagenet validation.
7 | **1. Top-1/5 error and CPU/GPU speed of lite models in this repository.**
8 |
9 | Network|Top-1/5 error|F/B on GPU|F/B on CPU|Source
10 | :---:|:---:|:---:|:---:|:---:
11 | resnet10-1x32d | 44.78/21.42 | 2.19/2.57ms | 42.84/38.00ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
12 | resnet10-1x48d | -- | 2.55/3.01ms | 83.66/75.97ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
13 | resnet10-1x64d | 35.93/14.59 | 2.93/3.86ms | 134.3/124.8ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
14 | resnet10-1x96d | 30.66/11.13 | 3.42/5.57ms | 220.7/204.9ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
15 | resnet18-1x16d | 51.37/26.35 | 3.03/3.22ms | 25.03/22.63ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
16 | resnet18-1x32d | 38.24/16.02 | 3.53/4.14ms | 69.2/63.2ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
17 | resnet18-1x48d | 32.55/11.87 | 4.30/4.83ms | 139.1/127.6ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
18 | resnet18-1x64d
(resnet18-priv) | 29.62/10.38 | 4.48/5.07ms | 213.2/193.3ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
19 | resnet18-1x96d | 26.11/8.31 | 6.16/9.94ms | 443.2/419.0ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
20 | resnet18-1x128d | 24.81/7.61 | 9.75/16.94ms | 729.1/695.4ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
21 | resnext26-32x4d | 25.57/8.12 | 9.68/11.16ms | 331.4/300.2ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
22 | vgg13-pytorch | 31.07/11.13 | 5.70/9.35ms | 1318/1279ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
23 | vgg13bn-pytorch | 29.50/10.18 | 8.35/13.49ms | 1443/1336ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
24 | vgg16-pytorch | 29.14/10.00 | 6.79/11.78ms | 1684/1643ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
25 | vgg16-tf | 29.03/10.12 | 13.04/48.90ms | 1787/1647ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim)
26 | vgg16-dsd | 27.62/9.02 | 6.81/11.80ms | 1753/1660ms | [dsd](https://github.com/songhan/DSD)
27 | vgg16-5x | 31.67/11.60 | 4.46/7.15ms | 580.5/593.0ms | [channel-pruning](https://github.com/yihui-he/channel-pruning)
28 | vgg16-3c4x | 28.79/9.78 | 7.53/9.77ms | 753.4/772.4ms | [channel-pruning](https://github.com/yihui-he/channel-pruning)
29 | vgg16bn-pytorch | 27.53/8.99 | 9.14/15.83ms | 1783/1695ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
30 | vgg19-pytorch | 28.23/9.60 | 8.03/14.26ms | 2076/2012ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
31 | vgg19bn-pytorch | 26.58/8.45 | 10.75/18.77ms | 2224/2081ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
32 | inception-v1-tf | 31.37/11.10 | 10.66/7.84ms | 186.2/155.8ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim)
33 | inception-v2-tf | 27.91/9.40 | 13.93/10.65ms | 286.4/255.0ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim)
34 | xception | 20.90/5.49 | 15.21/31.65ms | 1262/1253ms | [keras-models](https://github.com/fchollet/deep-learning-models)
35 | mobilenet-v1-1.0 | 29.98/10.52 | 6.16/9.50ms | 169.4/138.1ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim)
36 | air14-1x8d | 56.28/31.25 | 4.28/3.08ms | 21.01/3.29ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
37 | air14-1x16d | 44.23/20.68 | 5.13/3.56ms | 45.45/6.41ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
38 | air26-1x16d | 36.31/14.59 | 7.32/4.70ms | 62.02/8.52ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
39 | air26-1x32d | 28.71/9.59 | 8.77/5.05ms | 170.7/19.25ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
40 | air50-1x16d | 31.19/11.26 | 14.73/8.31ms | 91.65/16.06ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
41 | air50-1x32d | 25.59/7.89 | 15.39/7.64ms | 229.6/22.81ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
42 | dpn68 | 22.56/6.24 | 22.70/21.41ms | 371.1/329.3ms | [DPNs](https://github.com/cypw/DPNs)
43 | se-resnet50 | 22.39/6.37 | 17.91/19.49ms | 932.2/821.4ms | [senet](https://github.com/hujie-frank/SENet)
44 | se-resnet50-hik | 21.98/5.80 | 17.43/20.13ms | 581.1/482.7ms | [senet-caffe](https://github.com/shicai/SENet-Caffe)
45 | se-inception-v2 | 23.64/7.04 | 15.31/11.21ms | 251.9/218.5ms | [senet](https://github.com/hujie-frank/SENet)
46 |
--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/2007test400.txt:
--------------------------------------------------------------------------------
1 | 000001
2 | 000002
3 | 000003
4 | 000004
5 | 000006
6 | 000008
7 | 000010
8 | 000011
9 | 000013
10 | 000014
11 | 000015
12 | 000018
13 | 000022
14 | 000025
15 | 000027
16 | 000028
17 | 000029
18 | 000031
19 | 000037
20 | 000038
21 | 000040
22 | 000043
23 | 000045
24 | 000049
25 | 000053
26 | 000054
27 | 000055
28 | 000056
29 | 000057
30 | 000058
31 | 000059
32 | 000062
33 | 000067
34 | 000068
35 | 000069
36 | 000070
37 | 000071
38 | 000074
39 | 000075
40 | 000076
41 | 000079
42 | 000080
43 | 000082
44 | 000084
45 | 000085
46 | 000086
47 | 000087
48 | 000088
49 | 000090
50 | 000092
51 | 000094
52 | 000096
53 | 000097
54 | 000098
55 | 000100
56 | 000103
57 | 000105
58 | 000106
59 | 000108
60 | 000111
61 | 000114
62 | 000115
63 | 000116
64 | 000119
65 | 000124
66 | 000126
67 | 000127
68 | 000128
69 | 000135
70 | 000136
71 | 000137
72 | 000139
73 | 000144
74 | 000145
75 | 000148
76 | 000149
77 | 000151
78 | 000152
79 | 000155
80 | 000157
81 | 000160
82 | 000166
83 | 000167
84 | 000168
85 | 000172
86 | 000175
87 | 000176
88 | 000178
89 | 000179
90 | 000181
91 | 000182
92 | 000183
93 | 000185
94 | 000186
95 | 000188
96 | 000191
97 | 000195
98 | 000196
99 | 000197
100 | 000199
101 | 000201
102 | 000202
103 | 000204
104 | 000205
105 | 000206
106 | 000212
107 | 000213
108 | 000216
109 | 000217
110 | 000223
111 | 000226
112 | 000227
113 | 000230
114 | 000231
115 | 000234
116 | 000237
117 | 000238
118 | 000239
119 | 000240
120 | 000243
121 | 000247
122 | 000248
123 | 000252
124 | 000253
125 | 000254
126 | 000255
127 | 000258
128 | 000260
129 | 000261
130 | 000264
131 | 000265
132 | 000267
133 | 000271
134 | 000272
135 | 000273
136 | 000274
137 | 000277
138 | 000279
139 | 000280
140 | 000281
141 | 000283
142 | 000284
143 | 000286
144 | 000287
145 | 000290
146 | 000291
147 | 000292
148 | 000293
149 | 000295
150 | 000297
151 | 000299
152 | 000300
153 | 000301
154 | 000309
155 | 000310
156 | 000313
157 | 000314
158 | 000315
159 | 000316
160 | 000319
161 | 000324
162 | 000326
163 | 000327
164 | 000330
165 | 000333
166 | 000335
167 | 000339
168 | 000341
169 | 000342
170 | 000345
171 | 000346
172 | 000348
173 | 000350
174 | 000351
175 | 000353
176 | 000356
177 | 000357
178 | 000358
179 | 000360
180 | 000361
181 | 000362
182 | 000364
183 | 000365
184 | 000366
185 | 000368
186 | 000369
187 | 000371
188 | 000375
189 | 000376
190 | 000377
191 | 000378
192 | 000383
193 | 000384
194 | 000385
195 | 000386
196 | 000388
197 | 000389
198 | 000390
199 | 000392
200 | 000393
201 | 000397
202 | 000398
203 | 000399
204 | 000401
205 | 000402
206 | 000405
207 | 000409
208 | 000410
209 | 000412
210 | 000413
211 | 000414
212 | 000415
213 | 000418
214 | 000421
215 | 000422
216 | 000423
217 | 000425
218 | 000426
219 | 000429
220 | 000432
221 | 000434
222 | 000436
223 | 000437
224 | 000440
225 | 000441
226 | 000442
227 | 000444
228 | 000445
229 | 000447
230 | 000449
231 | 000451
232 | 000452
233 | 000453
234 | 000455
235 | 000456
236 | 000457
237 | 000458
238 | 000465
239 | 000466
240 | 000467
241 | 000471
242 | 000472
243 | 000473
244 | 000475
245 | 000478
246 | 000479
247 | 000481
248 | 000485
249 | 000487
250 | 000488
251 | 000490
252 | 000493
253 | 000495
254 | 000497
255 | 000502
256 | 000504
257 | 000505
258 | 000506
259 | 000507
260 | 000510
261 | 000511
262 | 000512
263 | 000517
264 | 000521
265 | 000527
266 | 000529
267 | 000532
268 | 000533
269 | 000534
270 | 000536
271 | 000538
272 | 000539
273 | 000542
274 | 000546
275 | 000547
276 | 000548
277 | 000551
278 | 000553
279 | 000556
280 | 000557
281 | 000558
282 | 000560
283 | 000561
284 | 000562
285 | 000566
286 | 000567
287 | 000568
288 | 000569
289 | 000570
290 | 000571
291 | 000572
292 | 000573
293 | 000574
294 | 000575
295 | 000576
296 | 000578
297 | 000580
298 | 000584
299 | 000585
300 | 000586
301 | 000587
302 | 000593
303 | 000594
304 | 000595
305 | 000596
306 | 000600
307 | 000602
308 | 000603
309 | 000604
310 | 000606
311 | 000607
312 | 000611
313 | 000614
314 | 000615
315 | 000616
316 | 000617
317 | 000618
318 | 000621
319 | 000623
320 | 000624
321 | 000627
322 | 000629
323 | 000630
324 | 000631
325 | 000634
326 | 000636
327 | 000638
328 | 000639
329 | 000640
330 | 000641
331 | 000642
332 | 000643
333 | 000644
334 | 000646
335 | 000649
336 | 000650
337 | 000651
338 | 000652
339 | 000655
340 | 000658
341 | 000659
342 | 000662
343 | 000664
344 | 000665
345 | 000666
346 | 000668
347 | 000669
348 | 000670
349 | 000673
350 | 000674
351 | 000678
352 | 000679
353 | 000681
354 | 000683
355 | 000687
356 | 000691
357 | 000692
358 | 000693
359 | 000696
360 | 000697
361 | 000698
362 | 000701
363 | 000703
364 | 000704
365 | 000706
366 | 000708
367 | 000715
368 | 000716
369 | 000718
370 | 000719
371 | 000721
372 | 000722
373 | 000723
374 | 000724
375 | 000725
376 | 000727
377 | 000732
378 | 000734
379 | 000735
380 | 000736
381 | 000737
382 | 000741
383 | 000743
384 | 000744
385 | 000745
386 | 000747
387 | 000749
388 | 000751
389 | 000757
390 | 000758
391 | 000759
392 | 000762
393 | 000765
394 | 000766
395 | 000769
396 | 000773
397 | 000775
398 | 000778
399 | 000779
400 | 000781
--------------------------------------------------------------------------------
/det/README.md:
--------------------------------------------------------------------------------
1 |
2 | ## Object Detection
3 |
4 | **We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)
5 | we are releasing the training code and files, the models and more experiments will come soon.**
6 |
7 | ### Object Detection Performance on PASCAL VOC. ([More experiments](https://github.com/soeaver/caffe-model/blob/master/det/VOC_Benchmark.md))
8 |
9 | #### **1. Original Faster-RCNN training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
10 |
11 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory
12 | :---:|:---:|:---:|:---:|:---:|:---:
13 | resnet18 | 70.02 | 9.5 img/s | 1,235MB | 17.5 img/s | 989MB
14 | resnet101-v2| 79.6 | 3.1 img/s | 6,495MB | 7.1 img/s | 4,573MB
15 | wrn50-2| 78.59 | 2.1 img/s | 4,895MB | 4.9 img/s | 3,499MB
16 | resnext50-32x4d| 77.99 | 3.6 img/s | 5,315MB | 7.4 img/s | 4,305MB
17 | resnext101-32x4d| 79.98 | 2.7 img/s | 7,836MB | 6.3 img/s | 5,705MB
18 | inception-v4| 81.49 | 2.6 img/s | 6,759MB | 5.4 img/s | 4,683MB
19 | inception-resnet-v2| 80.0 | 2.0 img/s
(batch=112) | 11,497MB | 3.2 img/s | 8,409MB
20 | air101| 81.0 | 2.4 img/s | 7,747MB | 5.1 img/s | 5,777MB
21 |
22 | - To reduce memory usage, we merge all the models batchnorm layer parameters into scale layer, more details please refer to [faster-rcnn-resnet](https://github.com/Eniac-Xie/faster-rcnn-resnet#modification) or [pva-faster-rcnn](https://github.com/sanghoon/pva-faster-rcnn/blob/master/tools/gen_merged_model.py);
23 | - We also split the deploy file to rpn deploy file and rcnn deploy file for adopting more testing tricks.
24 | - Performanc, speed and memory are calculated on [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) with Nvidia Titan pascal, we do not guarantee that the results can be reproduced under any other conditions;
25 | - All the models are trained on a single scale (600*1000) with image flipping and train-batch=128 for 80,000 iterations, tested on the same single scale with test-batch=300 and nms=0.3;
26 |
27 |
28 | #### **2. Faster-RCNN-2fc-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
29 |
30 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory
31 | :---:|:---:|:---:|:---:|:---:|:---:
32 | se-inception-v2 | (77.57) | 9.4 img/s | 2,453MB | 15.9 img/s | 1,573MB
33 | se-resnet50 | (79.73) | 6.2 img/s | 4,129MB | 12.8 img/s | 2,175MB
34 | resnet101-v2 | 80.6(80.49) | 5.0 img/s | 5,833MB | 10.5 img/s | 3,147MB
35 | air101 | (81.47) | 3.4 img/s | 6,653MB | 8.7 img/s | 4,503MB
36 | inception-v4-3x3 | 81.12(81.30) | 3.73 img/s | 5,383MB | 10.1 img/s | 3,217MB
37 |
38 | - 2fc means: conv256d --- fc1024d --- fc1024d;
39 | - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587);
40 |
41 |
42 | #### **3. RFCN-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
43 |
44 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory
45 | :---:|:---:|:---:|:---:|:---:|:---:
46 | resnet18 | 71.82 | 14.3 img/s | 1,215MB | 23.4 img/s | 899MB
47 | se-inception-v2| (78.23) | 10.2 img/s | 2,303MB | 14.0 img/s | 1,567MB
48 | se-resnet50 | (79.19) | 6.3 img/s | 3.999MB | 11.7 img/s | 2,205MB
49 | resnet101-v2| 78.93(79.9) | 4.9 img/s | 5,719MB | 10.4 img/s | 3,097MB
50 | resnext101-32x4d| 79.98(80.35) | 3.8 img/s | 6,977MB | 8.8 img/s | 4,761M
51 | air101| 79.42(80.93) | 3.4 img/s | 6,525MB | 8.5 img/s | 4,477MB
52 | inception-v4| 80.2 | 4.1 img/s | 4,371MB | 10.3 img/s | 2,343MB
53 |
54 | - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587);
55 |
56 |
57 | ### Object Detection Performance on MSCOCO. ([More experiments](https://github.com/soeaver/caffe-model/blob/master/det/MSCOCO_Benchmark.md))
58 |
59 | #### **1. Results training on MSCOCO2017-trainval and testing on test-dev2017.**
60 |
61 | Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L
62 | :---:|:---:|:---:|:---:|:---:|:---:|:---:
63 | **RFCN-se-inception-v2**
with ms-train & ohem & multigrid | 32.6 | 53.6 | 34.5 | 12.5 | 35.1 | 48.4
64 | **RFCN-se-resnet50**
with ms-train & ohem & multigrid | 32.9 | 54.4 | 34.8 | 13.0 | 35.3 | 48.1
65 | **RFCN-air101**
with ms-train & ohem & multigrid | 38.2 | 60.1 | 41.2 | 18.2 | 41.9 | 53.0
66 | **Faster-2fc-air101**
with ms-train & ohem & multigrid | 36.5 | 60.4 | 38.1 | 15.5 | 39.5 | 53.5
67 |
68 | - All the models are test on a single scale (600*1000) without any bells and whistles;
69 |
70 |
71 | #### **2. Context Pyramid Attention Network (CPANet) results training on MSCOCO2017-trainval and testing on test-dev2017.**
72 |
73 | Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L
74 | :---:|:---:|:---:|:---:|:---:|:---:|:---:
75 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test | 41.9 | 64.8 | 45.5 | 24.0 | 45.9 | 54.6
76 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test & snms | 42.7 | 65.4 | 46.7 | 24.6 | 46.8 | 55.6
77 | **CPANet-air101**
with ms-train & ohem & multigrid & 800-scale-test & snms & flipping | 43.5 | 65.9 | 47.5 | 25.1 | 47.7 | 56.6
78 |
--------------------------------------------------------------------------------
/det/VOC_Benchmark.md:
--------------------------------------------------------------------------------
1 | ## VOC Detection Benchmark
2 |
3 | **We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)**
4 |
5 | ### **1. Original Faster-RCNN training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
6 |
7 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory
8 | :---:|:---:|:---:|:---:|:---:|:---:
9 | resnet18 | 70.02 | 9.5 img/s | 1,235MB | 17.5 img/s | 989MB
10 | resnet101-v2| 79.6 | 3.1 img/s | 6,495MB | 7.1 img/s | 4,573MB
11 | resnet152-v2| 80.72 | 2.8 img/s | 9,315MB | 6.2 img/s | 6,021MB
12 | wrn50-2| 78.59 | 2.1 img/s | 4,895MB | 4.9 img/s | 3,499MB
13 | resnext50-32x4d| 77.99 | 3.6 img/s | 5,315MB | 7.4 img/s | 4,305MB
14 | resnext101-32x4d| 79.98 | 2.7 img/s | 7,836MB | 6.3 img/s | 5,705MB
15 | resnext101-64x4d| 80.71 | 2.0 img/s
(batch=96) | 11,277MB | 3.7 img/s | 9,461MB
16 | inception-v3| 78.6 | 4.1 img/s | 4,325MB | 7.3 img/s | 3,445MB
17 | xception| 76.6 | 3.3 img/s | 7,341MB | 7.8 img/s | 2,979MB
18 | inception-v4| 81.49 | 2.6 img/s | 6,759MB | 5.4 img/s | 4,683MB
19 | inception-resnet-v2| 80.0 | 2.0 img/s
(batch=112) | 11,497MB | 3.2 img/s | 8,409MB
20 | densenet-201| 77.53 | 3.9 img/s
(batch=72) | 10,073MB | 5.5 img/s | 9,955MB
21 | resnet38a| 80.1 | 1.4 img/s | 8,723MB | 3.4 img/s | 5,501MB
22 | air101| 81.0 | 2.4 img/s | 7,747MB | 5.1 img/s | 5,777MB
23 |
24 | - To reduce memory usage, we merge all the models batchnorm layer parameters into scale layer, more details please refer to [faster-rcnn-resnet](https://github.com/Eniac-Xie/faster-rcnn-resnet#modification) or [pva-faster-rcnn](https://github.com/sanghoon/pva-faster-rcnn/blob/master/tools/gen_merged_model.py);
25 | - We also split the deploy file to rpn deploy file and rcnn deploy file for adopting more testing tricks.
26 | - Performanc, speed and memory are calculated on [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) with Nvidia Titan pascal, we do not guarantee that the results can be reproduced under any other conditions;
27 | - All the models are trained on a single scale (600*1000) with image flipping and train-batch=128 for 80,000 iterations, tested on the same single scale with test-batch=300 and nms=0.3;
28 |
29 |
30 | **Comparisons on VOC 2007 test using Faster-RCNN with inception-v4.**
31 |
32 | Method|mAP@50| improvment |test speed
33 | :---|:---:|:---:|:---:
34 | baseline inception-v4 | 81.49 | -- | 5.4 img/s
35 | +multi-scale training | 83.79 | 2.30 | 5.4 img/s
36 | +box voting | 83.95 | 0.16 | 5.4 img/s
37 | +nms=0.4 | 84.22 | 0.27 | 5.4 img/s
38 | +image flipping test | 84.54 | 0.32 | 2.7 img/s
39 | +multi-scale testing | 85.78 | 1.24 | 0.13 img/s
40 |
41 | - The SCALES for multi-scale training is (200, 400, 600, 800, 1000) and MAX_SIZE is 1666;
42 | - For multi-scale training, we double the training iterations (160000 for VOC0712trainval);
43 | - The SCALES for multi-scale testing is (400, 600, 800, 1000, 1200) and MAX_SIZE is 2000;
44 |
45 | ### **2. Faster-RCNN-2fc-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
46 |
47 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory
48 | :---:|:---:|:---:|:---:|:---:|:---:
49 | se-inception-v2 | (77.57) | 9.4 img/s | 2,453MB | 15.9 img/s | 1,573MB
50 | se-resnet50 | (79.73) | 6.2 img/s | 4,129MB | 12.8 img/s | 2,175MB
51 | resnet101-v2 w/o OHEM | 80.18 | 5.4 img/s | 5,807MB | 10.5 img/s | 3,147MB
52 | resnet101-v2 | 80.6(80.49) | 5.0 img/s | 5,833MB | 10.5 img/s | 3,147MB
53 | air101 | (81.47) | 3.4 img/s | 6,653MB | 8.7 img/s | 4,503MB
54 | air101-context | (82.09) | 3.3 img/s | 6,773MB | 8.6 img/s | 4,577MB
55 | air101-fpn w/o OHEM | 81.44 | 2.4 img/s | 7,063MB | 3.8 img/s | 4,433MB
56 | inception-v4-3x3 | 81.12(81.30) | 3.73 img/s | 5,383MB | 10.1 img/s | 3,217MB
57 |
58 | - 2fc means: conv256d --- fc1024d --- fc1024d;
59 | - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587);
60 |
61 |
62 | ### **3. RFCN-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
63 |
64 | Network|mAP@50(%)|training
speed|training
memory|testing
speed|testing
memory
65 | :---:|:---:|:---:|:---:|:---:|:---:
66 | resnet18 | 71.82 | 14.3 img/s | 1,215MB | 23.4 img/s | 899MB
67 | resnext26-32x4d| 72.07 | 7.5 img/s | 2,521MB | 15.0 img/s | 1,797MB
68 | se-inception-v2| (78.23) | 10.2 img/s | 2,303MB | 14.0 img/s | 1,567MB
69 | se-resnet50 | (79.19) | 6.3 img/s | 3.999MB | 11.7 img/s | 2,205MB
70 | resnet101-v2| 78.93(79.9) | 4.9 img/s | 5,719MB | 10.4 img/s | 3,097MB
71 | resnext101-32x4d| 79.98(80.35) | 3.8 img/s | 6,977MB | 8.8 img/s | 4,761MB
72 | resnext101-64x4d| 80.26(79.88) | 2.4 img/s | 10,203MB | 6.2 img/s | 8,529MB
73 | air101| 79.42(80.93) | 3.4 img/s | 6,525MB | 8.5 img/s | 4,477MB
74 | air152| (81.18) | 2.6 img/s | 9,331MB | 6.7 img/s | 6,151MB
75 | inception-v4| 80.2 | 4.1 img/s | 4,371MB | 10.3 img/s | 2,343MB
76 | inception-v4-3x3 | 81.15 | 3.7 img/s | 5,207MB | 9.5 img/s | 3,151MB
77 |
78 | - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587);
79 |
80 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Caffe-model
2 | Caffe models (include classification, detection and segmentation) and deploy prototxt for resnet, resnext, inception_v3, inception_v4, inception_resnet, wider_resnet, densenet, aligned-inception-resne(x)t, DPNs and other networks.
3 |
4 | Clone the caffe-model repository
5 | ```Shell
6 | git clone https://github.com/soeaver/caffe-model --recursive
7 | ```
8 |
9 | ## We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)
10 | Please install [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) for evaluating and finetuning.
11 |
12 | ## Disclaimer
13 |
14 | Most of the pre-train models are converted from other projects, the main contribution belongs to the original authors.
15 |
16 | Project links:
17 |
18 | [mxnet-model-gallery](https://github.com/dmlc/mxnet-model-gallery)、 [tensorflow slim](https://github.com/tensorflow/models/tree/master/slim)、 [craftGBD](https://github.com/craftGBD/craftGBD)、 [ResNeXt](https://github.com/facebookresearch/ResNeXt)、 [DenseNet](https://github.com/liuzhuang13/DenseNet)、 [wide-residual-networks](https://github.com/szagoruyko/wide-residual-networks)、 [keras deep-learning-models](https://github.com/fchollet/deep-learning-models)、 [ademxapp](https://github.com/itijyou/ademxapp)、 [DPNs](https://github.com/cypw/DPNs)、[Senet](https://github.com/hujie-frank/SENet)
19 |
20 |
21 | ## CLS (Classification, more details are in [cls](https://github.com/soeaver/caffe-model/tree/master/cls))
22 | ### Performance on imagenet validation.
23 | **Top-1/5 error of pre-train models in this repository (Pre-train models download [urls](https://github.com/soeaver/caffe-model/tree/master/cls#performance-on-imagenet-validation)).**
24 |
25 | Network|224/299
(single-crop)|224/299
(12-crop)|320/395
(single-crop)|320/395
(12-crop)
26 | :---:|:---:|:---:|:---:|:---:
27 | resnet101-v2| 21.95/6.12 | 19.99/5.04 | 20.37/5.16 | 19.29/4.57
28 | resnet152-v2| 20.85/5.42 | 19.24/4.68 | 19.66/4.73 | 18.84/4.32
29 | resnet269-v2| 19.71/5.00 | **18.25**/4.20 | 18.70/4.33 | **17.87**/3.85
30 | inception-v3| 21.67/5.75 | 19.60/4.73 | 20.10/4.82 | 19.25/4.24
31 | xception| 20.90/5.49 | 19.68/4.90 | 19.58/4.77 | 18.91/4.39
32 | inception-v4| 20.03/5.09 | 18.60/4.30 | 18.68/4.32 |18.12/3.92
33 | inception-resnet-v2| 19.86/**4.83** | 18.46/**4.08** | 18.75/**4.02** | 18.15/**3.71**
34 | resnext50-32x4d| 22.37/6.31 | 20.53/5.35 | 21.10/5.53 | 20.37/5.03
35 | resnext101-32x4d| 21.30/5.79 | 19.47/4.89 | 19.91/4.97 | 19.19/4.59
36 | resnext101-64x4d| 20.60/5.41 | 18.88/4.59 | 19.26/4.63 | 18.48/4.31
37 | wrn50-2
(resnet50-1x128d)| 22.13/6.13 | 20.09/5.06 | 20.68/5.28 | 19.83/4.87
38 | air101| 21.32/5.76 | 19.36/4.84 | 19.92/4.75 | 19.05/4.43
39 | dpn-92| 20.81/5.47 | 18.99/4.59 | 19.23/4.64 | 18.68/4.24
40 | dpn-107| 19.70/5.06 | ../.. | 18.41/4.25 | ../..
41 |
42 |
43 | ## DET (Detection, more details are in [det](https://github.com/soeaver/caffe-model/tree/master/det))
44 | ### Object Detection Performance on PASCAL VOC.
45 | **Original faster rcnn train on VOC 2007+2012 trainval and test on VOC 2007 test.**
46 |
47 | Network|mAP@50|train speed|train memory|test speed|test memory
48 | :---:|:---:|:---:|:---:|:---:|:---:
49 | resnet18 | 70.02 | 9.5 img/s | 1,235MB | 17.5 img/s | 989MB
50 | resnet101-v2| 79.6 | 3.1 img/s | 6,495MB | 7.1 img/s | 4,573MB
51 | resnet152-v2| 80.72 | 2.8 img/s | 9,315MB | 6.2 img/s | 6,021MB
52 | wrn50-2| 78.59 | 2.1 img/s | 4,895MB | 4.9 img/s | 3,499MB
53 | resnext50-32x4d| 77.99 | 3.6 img/s | 5,315MB | 7.4 img/s | 4,305MB
54 | resnext101-32x4d| 79.98 | 2.7 img/s | 7,836MB | 6.3 img/s | 5,705MB
55 | resnext101-64x4d| 80.71 | 2.0 img/s
(batch=96) | 11,277MB | 3.7 img/s | 9,461MB
56 | inception-v3| 78.6 | 4.1 img/s | 4,325MB | 7.3 img/s | 3,445MB
57 | inception-v4| 81.49 | 2.6 img/s | 6,759MB | 5.4 img/s | 4,683MB
58 | inception-resnet-v2| 80.0 | 2.0 img/s
(batch=112) | 11,497MB | 3.2 img/s | 8,409MB
59 | densenet-201| 77.53 | 3.9 img/s
(batch=72) | 10,073MB | 5.5 img/s | 9,955MB
60 | resnet38a| 80.1 | 1.4 img/s | 8,723MB | 3.4 img/s | 5,501MB
61 |
62 |
63 | ## SEG (Segmentation, more details are in [seg](https://github.com/soeaver/caffe-model/tree/master/seg))
64 | ### Object Segmentation Performance on PASCAL VOC.
65 | **PSPNet training on [SBD](http://home.bharathh.info/pubs/pdfs/BharathICCV2011.pdf) (10,582 images) and testing on VOC 2012 validation (1,449 images).**
66 |
67 | Network|mIoU(%)|pixel acc(%)|training
speed|training
memory|testing
speed|testing
memory
68 | :---:|:---:|:---:|:---:|:---:|:---:|:---:
69 | resnet101-v2| 77.94 | 94.94 | 1.6 img/s | 8,023MB | 3.0 img/s | 4,071MB
70 | resnet101-v2-selu| 77.10 | 94.80 | 1.6 img/s | 8,017MB | 3.0 img/s | 4,065MB
71 | resnext101-32x4d| 77.79 | 94.92 | 1.3 img/s | 8,891MB | 2.6 img/s | 5,241MB
72 | air101| 77.64 | 94.93 | 1.3 img/s | 10,017MB | 2.5 img/s | 5,241MB
73 | inception-v4| 77.58 | 94.83 | -- img/s | --MB | -- img/s | --MB
74 |
75 |
76 | ## License
77 |
78 | caffe-model is released under the MIT License (refer to the LICENSE file for details).
79 |
80 |
81 | ## Acknowlegement
82 |
83 | I greatly thank [Yangqing Jia](https://github.com/Yangqing) and [BVLC group](https://www.github.com/BVLC/caffe) for developing Caffe.
84 |
85 | And I would like to thank all the authors of every network.
86 |
--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/resnet18/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet18-priv-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "res4b"
2 | input_shape {
3 | dim: 1
4 | dim: 256
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rois"
10 | input_shape {
11 | dim: 300
12 | dim: 5
13 | }
14 |
15 | #============== RCNN ===============
16 | layer {
17 | name: "roi_pool"
18 | type: "ROIPooling"
19 | bottom: "res4b"
20 | bottom: "rois"
21 | top: "roi_pool"
22 | roi_pooling_param {
23 | pooled_w: 14
24 | pooled_h: 14
25 | spatial_scale: 0.062500
26 | }
27 | }
28 | layer {
29 | bottom: "roi_pool"
30 | top: "res5a_branch1"
31 | name: "res5a_branch1"
32 | type: "Convolution"
33 | param {
34 | lr_mult: 1
35 | decay_mult: 1
36 | }
37 | convolution_param {
38 | num_output: 512
39 | kernel_size: 1
40 | pad: 0
41 | stride: 2
42 | bias_term: false
43 | }
44 | }
45 | layer {
46 | bottom: "res5a_branch1"
47 | top: "res5a_branch1"
48 | name: "scale5a_branch1"
49 | type: "Scale"
50 | scale_param {
51 | bias_term: true
52 | }
53 | param {
54 | lr_mult: 0.0
55 | decay_mult: 0.0
56 | }
57 | param {
58 | lr_mult: 0.0
59 | decay_mult: 0.0
60 | }
61 | }
62 | layer {
63 | bottom: "roi_pool"
64 | top: "res5a_branch2a"
65 | name: "res5a_branch2a"
66 | type: "Convolution"
67 | param {
68 | lr_mult: 1
69 | decay_mult: 1
70 | }
71 | convolution_param {
72 | num_output: 512
73 | kernel_size: 3
74 | pad: 1
75 | stride: 2
76 | bias_term: false
77 | }
78 | }
79 | layer {
80 | bottom: "res5a_branch2a"
81 | top: "res5a_branch2a"
82 | name: "scale5a_branch2a"
83 | type: "Scale"
84 | scale_param {
85 | bias_term: true
86 | }
87 | param {
88 | lr_mult: 0.0
89 | decay_mult: 0.0
90 | }
91 | param {
92 | lr_mult: 0.0
93 | decay_mult: 0.0
94 | }
95 | }
96 | layer {
97 | bottom: "res5a_branch2a"
98 | top: "res5a_branch2a"
99 | name: "res5a_branch2a_relu"
100 | type: "ReLU"
101 | }
102 | layer {
103 | bottom: "res5a_branch2a"
104 | top: "res5a_branch2b"
105 | name: "res5a_branch2b"
106 | type: "Convolution"
107 | param {
108 | lr_mult: 1
109 | decay_mult: 1
110 | }
111 | convolution_param {
112 | num_output: 512
113 | kernel_size: 3
114 | pad: 1
115 | stride: 1
116 | bias_term: false
117 | }
118 | }
119 | layer {
120 | bottom: "res5a_branch2b"
121 | top: "res5a_branch2b"
122 | name: "scale5a_branch2b"
123 | type: "Scale"
124 | scale_param {
125 | bias_term: true
126 | }
127 | param {
128 | lr_mult: 0.0
129 | decay_mult: 0.0
130 | }
131 | param {
132 | lr_mult: 0.0
133 | decay_mult: 0.0
134 | }
135 | }
136 | layer {
137 | bottom: "res5a_branch1"
138 | bottom: "res5a_branch2b"
139 | top: "res5a"
140 | name: "res5a"
141 | type: "Eltwise"
142 | }
143 | layer {
144 | bottom: "res5a"
145 | top: "res5a"
146 | name: "res5a_relu"
147 | type: "ReLU"
148 | }
149 | layer {
150 | bottom: "res5a"
151 | top: "res5b_branch2a"
152 | name: "res5b_branch2a"
153 | type: "Convolution"
154 | param {
155 | lr_mult: 1
156 | decay_mult: 1
157 | }
158 | convolution_param {
159 | num_output: 512
160 | kernel_size: 3
161 | pad: 1
162 | stride: 1
163 | bias_term: false
164 | }
165 | }
166 | layer {
167 | bottom: "res5b_branch2a"
168 | top: "res5b_branch2a"
169 | name: "scale5b_branch2a"
170 | type: "Scale"
171 | scale_param {
172 | bias_term: true
173 | }
174 | param {
175 | lr_mult: 0.0
176 | decay_mult: 0.0
177 | }
178 | param {
179 | lr_mult: 0.0
180 | decay_mult: 0.0
181 | }
182 | }
183 | layer {
184 | bottom: "res5b_branch2a"
185 | top: "res5b_branch2a"
186 | name: "res5b_branch2a_relu"
187 | type: "ReLU"
188 | }
189 | layer {
190 | bottom: "res5b_branch2a"
191 | top: "res5b_branch2b"
192 | name: "res5b_branch2b"
193 | type: "Convolution"
194 | param {
195 | lr_mult: 1
196 | decay_mult: 1
197 | }
198 | convolution_param {
199 | num_output: 512
200 | kernel_size: 3
201 | pad: 1
202 | stride: 1
203 | bias_term: false
204 | }
205 | }
206 | layer {
207 | bottom: "res5b_branch2b"
208 | top: "res5b_branch2b"
209 | name: "scale5b_branch2b"
210 | type: "Scale"
211 | scale_param {
212 | bias_term: true
213 | }
214 | param {
215 | lr_mult: 0.0
216 | decay_mult: 0.0
217 | }
218 | param {
219 | lr_mult: 0.0
220 | decay_mult: 0.0
221 | }
222 | }
223 | layer {
224 | bottom: "res5a"
225 | bottom: "res5b_branch2b"
226 | top: "res5b"
227 | name: "res5b"
228 | type: "Eltwise"
229 | }
230 | layer {
231 | bottom: "res5b"
232 | top: "res5b"
233 | name: "res5b_relu"
234 | type: "ReLU"
235 | }
236 | layer {
237 | bottom: "res5b"
238 | top: "pool5"
239 | name: "pool5"
240 | type: "Pooling"
241 | pooling_param {
242 | global_pooling: true
243 | pool: AVE
244 | }
245 | }
246 | layer {
247 | name: "cls_score"
248 | type: "InnerProduct"
249 | bottom: "pool5"
250 | top: "cls_score"
251 | param {
252 | lr_mult: 1
253 | decay_mult: 1
254 | }
255 | param {
256 | lr_mult: 2
257 | decay_mult: 0
258 | }
259 | inner_product_param {
260 | num_output: 21
261 | weight_filler {
262 | type: "msra"
263 | std: 0.01
264 | }
265 | bias_filler {
266 | type: "constant"
267 | value: 0
268 | }
269 | }
270 | }
271 | layer {
272 | name: "bbox_pred"
273 | type: "InnerProduct"
274 | bottom: "pool5"
275 | top: "bbox_pred"
276 | param {
277 | lr_mult: 1
278 | decay_mult: 1
279 | }
280 | param {
281 | lr_mult: 2
282 | decay_mult: 0
283 | }
284 | inner_product_param {
285 | num_output: 84
286 | weight_filler {
287 | type: "msra"
288 | std: 0.01
289 | }
290 | bias_filler {
291 | type: "constant"
292 | value: 0
293 | }
294 | }
295 | }
296 | layer {
297 | name: "cls_prob"
298 | type: "Softmax"
299 | bottom: "cls_score"
300 | top: "cls_prob"
301 | }
302 |
303 |
304 |
--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg13-pytorch.prototxt:
--------------------------------------------------------------------------------
1 | input: "data"
2 | input_shape {
3 | dim: 1
4 | dim: 3
5 | dim: 224
6 | dim: 224
7 | }
8 |
9 | layer {
10 | name: "conv1_1"
11 | type: "Convolution"
12 | bottom: "data"
13 | top: "conv1_1"
14 | convolution_param {
15 | bias_term: true
16 | num_output: 64
17 | pad: 1
18 | kernel_size: 3
19 | stride: 1
20 | }
21 | }
22 |
23 | layer {
24 | name: "relu1_1"
25 | type: "ReLU"
26 | bottom: "conv1_1"
27 | top: "conv1_1"
28 | }
29 |
30 | layer {
31 | name: "conv1_2"
32 | type: "Convolution"
33 | bottom: "conv1_1"
34 | top: "conv1_2"
35 | convolution_param {
36 | bias_term: true
37 | num_output: 64
38 | pad: 1
39 | kernel_size: 3
40 | stride: 1
41 | }
42 | }
43 |
44 | layer {
45 | name: "relu1_2"
46 | type: "ReLU"
47 | bottom: "conv1_2"
48 | top: "conv1_2"
49 | }
50 |
51 | layer {
52 | name: "pool1"
53 | type: "Pooling"
54 | bottom: "conv1_2"
55 | top: "pool1"
56 | pooling_param {
57 | pool: MAX
58 | kernel_size: 2
59 | stride: 2
60 | }
61 | }
62 |
63 | layer {
64 | name: "conv2_1"
65 | type: "Convolution"
66 | bottom: "pool1"
67 | top: "conv2_1"
68 | convolution_param {
69 | bias_term: true
70 | num_output: 128
71 | pad: 1
72 | kernel_size: 3
73 | stride: 1
74 | }
75 | }
76 |
77 | layer {
78 | name: "relu2_1"
79 | type: "ReLU"
80 | bottom: "conv2_1"
81 | top: "conv2_1"
82 | }
83 |
84 | layer {
85 | name: "conv2_2"
86 | type: "Convolution"
87 | bottom: "conv2_1"
88 | top: "conv2_2"
89 | convolution_param {
90 | bias_term: true
91 | num_output: 128
92 | pad: 1
93 | kernel_size: 3
94 | stride: 1
95 | }
96 | }
97 |
98 | layer {
99 | name: "relu2_2"
100 | type: "ReLU"
101 | bottom: "conv2_2"
102 | top: "conv2_2"
103 | }
104 |
105 | layer {
106 | name: "pool2"
107 | type: "Pooling"
108 | bottom: "conv2_2"
109 | top: "pool2"
110 | pooling_param {
111 | pool: MAX
112 | kernel_size: 2
113 | stride: 2
114 | }
115 | }
116 |
117 | layer {
118 | name: "conv3_1"
119 | type: "Convolution"
120 | bottom: "pool2"
121 | top: "conv3_1"
122 | convolution_param {
123 | bias_term: true
124 | num_output: 256
125 | pad: 1
126 | kernel_size: 3
127 | stride: 1
128 | }
129 | }
130 |
131 | layer {
132 | name: "relu3_1"
133 | type: "ReLU"
134 | bottom: "conv3_1"
135 | top: "conv3_1"
136 | }
137 |
138 | layer {
139 | name: "conv3_2"
140 | type: "Convolution"
141 | bottom: "conv3_1"
142 | top: "conv3_2"
143 | convolution_param {
144 | bias_term: true
145 | num_output: 256
146 | pad: 1
147 | kernel_size: 3
148 | stride: 1
149 | }
150 | }
151 |
152 | layer {
153 | name: "relu3_2"
154 | type: "ReLU"
155 | bottom: "conv3_2"
156 | top: "conv3_2"
157 | }
158 |
159 | layer {
160 | name: "pool3"
161 | type: "Pooling"
162 | bottom: "conv3_2"
163 | top: "pool3"
164 | pooling_param {
165 | pool: MAX
166 | kernel_size: 2
167 | stride: 2
168 | }
169 | }
170 |
171 | layer {
172 | name: "conv4_1"
173 | type: "Convolution"
174 | bottom: "pool3"
175 | top: "conv4_1"
176 | convolution_param {
177 | bias_term: true
178 | num_output: 512
179 | pad: 1
180 | kernel_size: 3
181 | stride: 1
182 | }
183 | }
184 |
185 | layer {
186 | name: "relu4_1"
187 | type: "ReLU"
188 | bottom: "conv4_1"
189 | top: "conv4_1"
190 | }
191 |
192 | layer {
193 | name: "conv4_2"
194 | type: "Convolution"
195 | bottom: "conv4_1"
196 | top: "conv4_2"
197 | convolution_param {
198 | bias_term: true
199 | num_output: 512
200 | pad: 1
201 | kernel_size: 3
202 | stride: 1
203 | }
204 | }
205 |
206 | layer {
207 | name: "relu4_2"
208 | type: "ReLU"
209 | bottom: "conv4_2"
210 | top: "conv4_2"
211 | }
212 |
213 | layer {
214 | name: "pool4"
215 | type: "Pooling"
216 | bottom: "conv4_2"
217 | top: "pool4"
218 | pooling_param {
219 | pool: MAX
220 | kernel_size: 2
221 | stride: 2
222 | }
223 | }
224 |
225 | layer {
226 | name: "conv5_1"
227 | type: "Convolution"
228 | bottom: "pool4"
229 | top: "conv5_1"
230 | convolution_param {
231 | bias_term: true
232 | num_output: 512
233 | pad: 1
234 | kernel_size: 3
235 | stride: 1
236 | }
237 | }
238 |
239 | layer {
240 | name: "relu5_1"
241 | type: "ReLU"
242 | bottom: "conv5_1"
243 | top: "conv5_1"
244 | }
245 |
246 | layer {
247 | name: "conv5_2"
248 | type: "Convolution"
249 | bottom: "conv5_1"
250 | top: "conv5_2"
251 | convolution_param {
252 | bias_term: true
253 | num_output: 512
254 | pad: 1
255 | kernel_size: 3
256 | stride: 1
257 | }
258 | }
259 |
260 | layer {
261 | name: "relu5_2"
262 | type: "ReLU"
263 | bottom: "conv5_2"
264 | top: "conv5_2"
265 | }
266 |
267 | layer {
268 | name: "pool5"
269 | type: "Pooling"
270 | bottom: "conv5_2"
271 | top: "pool5"
272 | pooling_param {
273 | pool: MAX
274 | kernel_size: 2
275 | stride: 2
276 | }
277 | }
278 |
279 | layer {
280 | bottom: "pool5"
281 | top: "fc6"
282 | name: "fc6"
283 | type: "InnerProduct"
284 | inner_product_param {
285 | num_output: 4096
286 | }
287 | }
288 |
289 | layer {
290 | name: "relu6"
291 | type: "ReLU"
292 | bottom: "fc6"
293 | top: "fc6"
294 | }
295 |
296 | layer {
297 | name: "dropout6"
298 | type: "Dropout"
299 | bottom: "fc6"
300 | top: "fc6"
301 | dropout_param {
302 | dropout_ratio: 0.5
303 | }
304 | }
305 |
306 | layer {
307 | bottom: "fc6"
308 | top: "fc7"
309 | name: "fc7"
310 | type: "InnerProduct"
311 | inner_product_param {
312 | num_output: 4096
313 | }
314 | }
315 |
316 | layer {
317 | name: "relu7"
318 | type: "ReLU"
319 | bottom: "fc7"
320 | top: "fc7"
321 | }
322 |
323 | layer {
324 | name: "dropout7"
325 | type: "Dropout"
326 | bottom: "fc7"
327 | top: "fc7"
328 | dropout_param {
329 | dropout_ratio: 0.5
330 | }
331 | }
332 |
333 | layer {
334 | bottom: "fc7"
335 | top: "classifier"
336 | name: "classifier"
337 | type: "InnerProduct"
338 | inner_product_param {
339 | num_output: 1000
340 | }
341 | }
342 |
343 | layer {
344 | name: "prob"
345 | type: "Softmax"
346 | bottom: "classifier"
347 | top: "prob"
348 | }
349 |
--------------------------------------------------------------------------------
/seg/evaluation_seg.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | sys.path.append('/home/prmct/workspace/PSPNet-0120/python/')
4 |
5 | import caffe
6 | import cv2
7 | import numpy as np
8 | import datetime
9 |
10 | gpu_mode = True
11 | gpu_id = 3
12 | data_root = '/home/prmct/Database/VOC_PASCAL/VOC2012_test/JPEGImages/'
13 | val_file = 'test_205.txt'
14 | save_root = './predict205_40000_ms/'
15 | model_weights = 'psp_resnext101_32x4d_coco_sbd_iter_40000.caffemodel'
16 | model_deploy = 'deploy_psp_resnext101_32x4d_merge_bn_scale.prototxt'
17 | prob_layer = 'prob' # output layer, normally Softmax
18 | class_num = 21
19 | base_size = 512
20 | crop_size = 473
21 | raw_scale = 57.5 # image scale factor, 1.0 or 128.0
22 | # mean_value = np.array([104.008, 116.669, 122.675])
23 | # mean_value = np.array([128, 128, 128])
24 | mean_value = np.array([103.52, 116.28, 123.675])
25 | scale_array = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # multi scale
26 | # scale_array = [1.0] # single scale
27 | flip = True
28 | class_offset = 0
29 | crf = False
30 | crf_deploy = '/home/prmct/Program/segmentation/deploy_crf.prototxt'
31 | crf_factor = 4.0
32 |
33 | if gpu_mode:
34 | caffe.set_mode_gpu()
35 | caffe.set_device(gpu_id)
36 | else:
37 | caffe.set_mode_cpu()
38 | net = caffe.Net(model_deploy, model_weights, caffe.TEST)
39 |
40 | if crf:
41 | net_crf = caffe.Net(crf_deploy, caffe.TEST)
42 |
43 |
44 | def eval_batch():
45 | eval_images = []
46 | f = open(val_file, 'r')
47 | for i in f:
48 | eval_images.append(i.strip())
49 |
50 | skip_num = 0
51 | eval_len = len(eval_images)
52 | start_time = datetime.datetime.now()
53 | for i in xrange(eval_len - skip_num):
54 | _img = cv2.imread(data_root + eval_images[i + skip_num] + '.jpg')
55 | h, w, d = _img.shape
56 |
57 | score_map = np.zeros((h, w, class_num), dtype=np.float32)
58 | for j in scale_array:
59 | long_size = float(base_size) * j + 1
60 | ratio = long_size / max(h, w)
61 | new_size = (int(w * ratio), int(h * ratio))
62 | _scale = cv2.resize(_img, new_size)
63 | score_map += cv2.resize(scale_process(_scale), (w, h))
64 | score_map /= len(scale_array)
65 |
66 | if crf:
67 | tmp_data = np.asarray([_img.transpose(2, 0, 1)], dtype=np.float32)
68 | tmp_score = np.asarray([score_map.transpose(2, 0, 1)], dtype=np.float32)
69 | net_crf.blobs['data'].reshape(*tmp_data.shape)
70 | net_crf.blobs['data'].data[...] = tmp_data / raw_scale
71 | net_crf.blobs['data_dim'].data[...] = [[[h, w]]]
72 | net_crf.blobs['score'].reshape(*tmp_score.shape)
73 | net_crf.blobs['score'].data[...] = tmp_score * crf_factor
74 | net_crf.forward()
75 | score_map = net_crf.blobs[prob_layer].data[0].transpose(1, 2, 0)
76 |
77 | cv2.imwrite(save_root + eval_images[i + skip_num] + '.png', score_map.argmax(2) + class_offset)
78 | print 'Testing image: ' + str(i + 1) + '/' + str(eval_len) + ' ' + str(eval_images[i + skip_num])
79 | end_time = datetime.datetime.now()
80 | print '\nEvaluation process ends at: {}. \nTime cost is: {}. '.format(str(end_time), str(end_time - start_time))
81 | print '\n{} images has been tested. \nThe model is: {}'.format(str(eval_len), model_weights)
82 |
83 |
84 | def scale_process(_scale):
85 | sh, sw, sd = _scale.shape
86 | _scale = np.asarray(_scale, dtype=np.float32)
87 | long_size = max(sh, sw)
88 | short_size = min(sh, sw)
89 | if long_size <= crop_size:
90 | input_data = pad_img(_scale - mean_value)
91 | score = caffe_process(input_data)[:sh, :sw, :]
92 | else:
93 | stride_rate = 2.0 / 3
94 | stride = np.ceil(crop_size * stride_rate)
95 | _pad = _scale
96 | if short_size < crop_size:
97 | _pad = pad_img(_scale - mean_value) + mean_value
98 |
99 | ph, pw, pd = _pad.shape
100 | h_grid = int(np.ceil(float(ph - crop_size) / stride)) + 1
101 | w_grid = int(np.ceil(float(pw - crop_size) / stride)) + 1
102 | data_scale = np.zeros((ph, pw, class_num), dtype=np.float32)
103 | count_scale = np.zeros((ph, pw, class_num), dtype=np.float32)
104 | for grid_yidx in xrange(0, h_grid):
105 | for grid_xidx in xrange(0, w_grid):
106 | s_x = int(grid_xidx * stride)
107 | s_y = int(grid_yidx * stride)
108 | e_x = min(s_x + crop_size, pw)
109 | e_y = min(s_y + crop_size, ph)
110 | s_x = int(e_x - crop_size)
111 | s_y = int(e_y - crop_size)
112 | _sub = _pad[s_y:e_y, s_x:e_x, :]
113 | count_scale[s_y:e_y, s_x:e_x, :] += 1.0
114 | input_data = pad_img(_sub - mean_value)
115 | data_scale[s_y:e_y, s_x:e_x, :] += caffe_process(input_data)
116 | score = data_scale / count_scale
117 | score = score[:sh, :sw, :]
118 |
119 | return score
120 |
121 |
122 | def pad_img(_scale):
123 | sh, sw, sd = _scale.shape
124 | if sh < crop_size:
125 | _pad = np.zeros((crop_size, sw, sd), dtype=np.float32)
126 | _pad[:sh, :, :] = _scale
127 | _scale = _pad
128 | sh, sw, sd = _scale.shape
129 | if sw < crop_size:
130 | _pad = np.zeros((sh, crop_size, sd), dtype=np.float32)
131 | _pad[:, :sw, :] = _scale
132 | _scale = _pad
133 |
134 | return _scale
135 |
136 |
137 | def caffe_process(_input):
138 | h, w, d = _input.shape
139 | _score = np.zeros((h, w, class_num), dtype=np.float32)
140 | if flip:
141 | _flip = _input[:, ::-1]
142 | _flip = _flip.transpose(2, 0, 1)
143 | _flip = _flip.reshape((1,) + _flip.shape)
144 | net.blobs['data'].reshape(*_flip.shape)
145 | net.blobs['data'].data[...] = _flip / raw_scale
146 | # net.blobs['data_dim'].data[...] = [[[h, w]]]
147 | net.forward()
148 | _score += net.blobs[prob_layer].data[0].transpose(1, 2, 0)[:, ::-1]
149 |
150 | _input = _input.transpose(2, 0, 1)
151 | _input = _input.reshape((1,) + _input.shape)
152 | net.blobs['data'].reshape(*_input.shape)
153 | net.blobs['data'].data[...] = _input / raw_scale
154 | # net.blobs['data_dim'].data[...] = [[[h, w]]]
155 | net.forward()
156 | _score += net.blobs[prob_layer].data[0].transpose(1, 2, 0)
157 |
158 | return _score / int(flip + 1)
159 |
160 | if __name__ == '__main__':
161 | eval_batch()
162 |
163 |
--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg16-5x.prototxt:
--------------------------------------------------------------------------------
1 | input: "data"
2 | input_shape {
3 | dim: 1
4 | dim: 3
5 | dim: 224
6 | dim: 224
7 | }
8 | layer {
9 | name: "conv1_1"
10 | type: "Convolution"
11 | bottom: "data"
12 | top: "conv1_1"
13 | convolution_param {
14 | num_output: 24
15 | pad: 1
16 | kernel_size: 3
17 | }
18 | }
19 | layer {
20 | name: "relu1_1"
21 | type: "ReLU"
22 | bottom: "conv1_1"
23 | top: "conv1_1"
24 | }
25 | layer {
26 | name: "conv1_2"
27 | type: "Convolution"
28 | bottom: "conv1_1"
29 | top: "conv1_2"
30 | convolution_param {
31 | num_output: 22
32 | pad: 1
33 | kernel_size: 3
34 | }
35 | }
36 | layer {
37 | name: "relu1_2"
38 | type: "ReLU"
39 | bottom: "conv1_2"
40 | top: "conv1_2"
41 | }
42 | layer {
43 | name: "pool1"
44 | type: "Pooling"
45 | bottom: "conv1_2"
46 | top: "pool1"
47 | pooling_param {
48 | pool: MAX
49 | kernel_size: 2
50 | stride: 2
51 | }
52 | }
53 | layer {
54 | name: "conv2_1"
55 | type: "Convolution"
56 | bottom: "pool1"
57 | top: "conv2_1"
58 | convolution_param {
59 | num_output: 41
60 | pad: 1
61 | kernel_size: 3
62 | }
63 | }
64 | layer {
65 | name: "relu2_1"
66 | type: "ReLU"
67 | bottom: "conv2_1"
68 | top: "conv2_1"
69 | }
70 | layer {
71 | name: "conv2_2"
72 | type: "Convolution"
73 | bottom: "conv2_1"
74 | top: "conv2_2"
75 | convolution_param {
76 | num_output: 51
77 | pad: 1
78 | kernel_size: 3
79 | }
80 | }
81 | layer {
82 | name: "relu2_2"
83 | type: "ReLU"
84 | bottom: "conv2_2"
85 | top: "conv2_2"
86 | }
87 | layer {
88 | name: "pool2"
89 | type: "Pooling"
90 | bottom: "conv2_2"
91 | top: "pool2"
92 | pooling_param {
93 | pool: MAX
94 | kernel_size: 2
95 | stride: 2
96 | }
97 | }
98 | layer {
99 | name: "conv3_1"
100 | type: "Convolution"
101 | bottom: "pool2"
102 | top: "conv3_1"
103 | convolution_param {
104 | num_output: 108
105 | pad: 1
106 | kernel_size: 3
107 | }
108 | }
109 | layer {
110 | name: "relu3_1"
111 | type: "ReLU"
112 | bottom: "conv3_1"
113 | top: "conv3_1"
114 | }
115 | layer {
116 | name: "conv3_2"
117 | type: "Convolution"
118 | bottom: "conv3_1"
119 | top: "conv3_2"
120 | convolution_param {
121 | num_output: 89
122 | pad: 1
123 | kernel_size: 3
124 | }
125 | }
126 | layer {
127 | name: "relu3_2"
128 | type: "ReLU"
129 | bottom: "conv3_2"
130 | top: "conv3_2"
131 | }
132 | layer {
133 | name: "conv3_3"
134 | type: "Convolution"
135 | bottom: "conv3_2"
136 | top: "conv3_3"
137 | convolution_param {
138 | num_output: 111
139 | pad: 1
140 | kernel_size: 3
141 | }
142 | }
143 | layer {
144 | name: "relu3_3"
145 | type: "ReLU"
146 | bottom: "conv3_3"
147 | top: "conv3_3"
148 | }
149 | layer {
150 | name: "pool3"
151 | type: "Pooling"
152 | bottom: "conv3_3"
153 | top: "pool3"
154 | pooling_param {
155 | pool: MAX
156 | kernel_size: 2
157 | stride: 2
158 | }
159 | }
160 | layer {
161 | name: "conv4_1"
162 | type: "Convolution"
163 | bottom: "pool3"
164 | top: "conv4_1"
165 | convolution_param {
166 | num_output: 184
167 | pad: 1
168 | kernel_size: 3
169 | }
170 | }
171 | layer {
172 | name: "relu4_1"
173 | type: "ReLU"
174 | bottom: "conv4_1"
175 | top: "conv4_1"
176 | }
177 | layer {
178 | name: "conv4_2"
179 | type: "Convolution"
180 | bottom: "conv4_1"
181 | top: "conv4_2"
182 | convolution_param {
183 | num_output: 276
184 | pad: 1
185 | kernel_size: 3
186 | }
187 | }
188 | layer {
189 | name: "relu4_2"
190 | type: "ReLU"
191 | bottom: "conv4_2"
192 | top: "conv4_2"
193 | }
194 | layer {
195 | name: "conv4_3"
196 | type: "Convolution"
197 | bottom: "conv4_2"
198 | top: "conv4_3"
199 | convolution_param {
200 | num_output: 228
201 | pad: 1
202 | kernel_size: 3
203 | }
204 | }
205 | layer {
206 | name: "relu4_3"
207 | type: "ReLU"
208 | bottom: "conv4_3"
209 | top: "conv4_3"
210 | }
211 | layer {
212 | name: "pool4"
213 | type: "Pooling"
214 | bottom: "conv4_3"
215 | top: "pool4"
216 | pooling_param {
217 | pool: MAX
218 | kernel_size: 2
219 | stride: 2
220 | }
221 | }
222 | layer {
223 | name: "conv5_1"
224 | type: "Convolution"
225 | bottom: "pool4"
226 | top: "conv5_1"
227 | convolution_param {
228 | num_output: 512
229 | pad: 1
230 | kernel_size: 3
231 | }
232 | }
233 | layer {
234 | name: "relu5_1"
235 | type: "ReLU"
236 | bottom: "conv5_1"
237 | top: "conv5_1"
238 | }
239 | layer {
240 | name: "conv5_2"
241 | type: "Convolution"
242 | bottom: "conv5_1"
243 | top: "conv5_2"
244 | convolution_param {
245 | num_output: 512
246 | pad: 1
247 | kernel_size: 3
248 | }
249 | }
250 | layer {
251 | name: "relu5_2"
252 | type: "ReLU"
253 | bottom: "conv5_2"
254 | top: "conv5_2"
255 | }
256 | layer {
257 | name: "conv5_3"
258 | type: "Convolution"
259 | bottom: "conv5_2"
260 | top: "conv5_3"
261 | convolution_param {
262 | num_output: 512
263 | pad: 1
264 | kernel_size: 3
265 | }
266 | }
267 | layer {
268 | name: "relu5_3"
269 | type: "ReLU"
270 | bottom: "conv5_3"
271 | top: "conv5_3"
272 | }
273 | layer {
274 | name: "pool5"
275 | type: "Pooling"
276 | bottom: "conv5_3"
277 | top: "pool5"
278 | pooling_param {
279 | pool: MAX
280 | kernel_size: 2
281 | stride: 2
282 | }
283 | }
284 | layer {
285 | name: "fc6"
286 | type: "InnerProduct"
287 | bottom: "pool5"
288 | top: "fc6"
289 | inner_product_param {
290 | num_output: 4096
291 | }
292 | }
293 | layer {
294 | name: "relu6"
295 | type: "ReLU"
296 | bottom: "fc6"
297 | top: "fc6"
298 | }
299 | layer {
300 | name: "drop6"
301 | type: "Dropout"
302 | bottom: "fc6"
303 | top: "fc6"
304 | dropout_param {
305 | dropout_ratio: 0.5
306 | }
307 | }
308 | layer {
309 | name: "fc7"
310 | type: "InnerProduct"
311 | bottom: "fc6"
312 | top: "fc7"
313 | inner_product_param {
314 | num_output: 4096
315 | }
316 | }
317 | layer {
318 | name: "relu7"
319 | type: "ReLU"
320 | bottom: "fc7"
321 | top: "fc7"
322 | }
323 | layer {
324 | name: "drop7"
325 | type: "Dropout"
326 | bottom: "fc7"
327 | top: "fc7"
328 | dropout_param {
329 | dropout_ratio: 0.5
330 | }
331 | }
332 | layer {
333 | name: "fc8"
334 | type: "InnerProduct"
335 | bottom: "fc7"
336 | top: "fc8"
337 | inner_product_param {
338 | num_output: 1000
339 | }
340 | }
341 | layer {
342 | name: "prob"
343 | type: "Softmax"
344 | bottom: "fc8"
345 | top: "prob"
346 | }
347 |
348 |
--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg16-dsd.prototxt:
--------------------------------------------------------------------------------
1 | input: "data"
2 | input_shape {
3 | dim: 1
4 | dim: 3
5 | dim: 224
6 | dim: 224
7 | }
8 | layer {
9 | bottom: "data"
10 | top: "conv1_1"
11 | name: "conv1_1"
12 | type: "Convolution"
13 | convolution_param {
14 | num_output: 64
15 | pad: 1
16 | kernel_size: 3
17 | }
18 | }
19 | layer {
20 | bottom: "conv1_1"
21 | top: "conv1_1"
22 | name: "relu1_1"
23 | type: "ReLU"
24 | }
25 | layer {
26 | bottom: "conv1_1"
27 | top: "conv1_2"
28 | name: "conv1_2"
29 | type: "Convolution"
30 | convolution_param {
31 | num_output: 64
32 | pad: 1
33 | kernel_size: 3
34 | }
35 | }
36 | layer {
37 | bottom: "conv1_2"
38 | top: "conv1_2"
39 | name: "relu1_2"
40 | type: "ReLU"
41 | }
42 | layer {
43 | bottom: "conv1_2"
44 | top: "pool1"
45 | name: "pool1"
46 | type: "Pooling"
47 | pooling_param {
48 | pool: MAX
49 | kernel_size: 2
50 | stride: 2
51 | }
52 | }
53 | layer {
54 | bottom: "pool1"
55 | top: "conv2_1"
56 | name: "conv2_1"
57 | type: "Convolution"
58 | convolution_param {
59 | num_output: 128
60 | pad: 1
61 | kernel_size: 3
62 | }
63 | }
64 | layer {
65 | bottom: "conv2_1"
66 | top: "conv2_1"
67 | name: "relu2_1"
68 | type: "ReLU"
69 | }
70 | layer {
71 | bottom: "conv2_1"
72 | top: "conv2_2"
73 | name: "conv2_2"
74 | type: "Convolution"
75 | convolution_param {
76 | num_output: 128
77 | pad: 1
78 | kernel_size: 3
79 | }
80 | }
81 | layer {
82 | bottom: "conv2_2"
83 | top: "conv2_2"
84 | name: "relu2_2"
85 | type: "ReLU"
86 | }
87 | layer {
88 | bottom: "conv2_2"
89 | top: "pool2"
90 | name: "pool2"
91 | type: "Pooling"
92 | pooling_param {
93 | pool: MAX
94 | kernel_size: 2
95 | stride: 2
96 | }
97 | }
98 | layer {
99 | bottom: "pool2"
100 | top: "conv3_1"
101 | name: "conv3_1"
102 | type: "Convolution"
103 | convolution_param {
104 | num_output: 256
105 | pad: 1
106 | kernel_size: 3
107 | }
108 | }
109 | layer {
110 | bottom: "conv3_1"
111 | top: "conv3_1"
112 | name: "relu3_1"
113 | type: "ReLU"
114 | }
115 | layer {
116 | bottom: "conv3_1"
117 | top: "conv3_2"
118 | name: "conv3_2"
119 | type: "Convolution"
120 | convolution_param {
121 | num_output: 256
122 | pad: 1
123 | kernel_size: 3
124 | }
125 | }
126 | layer {
127 | bottom: "conv3_2"
128 | top: "conv3_2"
129 | name: "relu3_2"
130 | type: "ReLU"
131 | }
132 | layer {
133 | bottom: "conv3_2"
134 | top: "conv3_3"
135 | name: "conv3_3"
136 | type: "Convolution"
137 | convolution_param {
138 | num_output: 256
139 | pad: 1
140 | kernel_size: 3
141 | }
142 | }
143 | layer {
144 | bottom: "conv3_3"
145 | top: "conv3_3"
146 | name: "relu3_3"
147 | type: "ReLU"
148 | }
149 | layer {
150 | bottom: "conv3_3"
151 | top: "pool3"
152 | name: "pool3"
153 | type: "Pooling"
154 | pooling_param {
155 | pool: MAX
156 | kernel_size: 2
157 | stride: 2
158 | }
159 | }
160 | layer {
161 | bottom: "pool3"
162 | top: "conv4_1"
163 | name: "conv4_1"
164 | type: "Convolution"
165 | convolution_param {
166 | num_output: 512
167 | pad: 1
168 | kernel_size: 3
169 | }
170 | }
171 | layer {
172 | bottom: "conv4_1"
173 | top: "conv4_1"
174 | name: "relu4_1"
175 | type: "ReLU"
176 | }
177 | layer {
178 | bottom: "conv4_1"
179 | top: "conv4_2"
180 | name: "conv4_2"
181 | type: "Convolution"
182 | convolution_param {
183 | num_output: 512
184 | pad: 1
185 | kernel_size: 3
186 | }
187 | }
188 | layer {
189 | bottom: "conv4_2"
190 | top: "conv4_2"
191 | name: "relu4_2"
192 | type: "ReLU"
193 | }
194 | layer {
195 | bottom: "conv4_2"
196 | top: "conv4_3"
197 | name: "conv4_3"
198 | type: "Convolution"
199 | convolution_param {
200 | num_output: 512
201 | pad: 1
202 | kernel_size: 3
203 | }
204 | }
205 | layer {
206 | bottom: "conv4_3"
207 | top: "conv4_3"
208 | name: "relu4_3"
209 | type: "ReLU"
210 | }
211 | layer {
212 | bottom: "conv4_3"
213 | top: "pool4"
214 | name: "pool4"
215 | type: "Pooling"
216 | pooling_param {
217 | pool: MAX
218 | kernel_size: 2
219 | stride: 2
220 | }
221 | }
222 | layer {
223 | bottom: "pool4"
224 | top: "conv5_1"
225 | name: "conv5_1"
226 | type: "Convolution"
227 | convolution_param {
228 | num_output: 512
229 | pad: 1
230 | kernel_size: 3
231 | }
232 | }
233 | layer {
234 | bottom: "conv5_1"
235 | top: "conv5_1"
236 | name: "relu5_1"
237 | type: "ReLU"
238 | }
239 | layer {
240 | bottom: "conv5_1"
241 | top: "conv5_2"
242 | name: "conv5_2"
243 | type: "Convolution"
244 | convolution_param {
245 | num_output: 512
246 | pad: 1
247 | kernel_size: 3
248 | }
249 | }
250 | layer {
251 | bottom: "conv5_2"
252 | top: "conv5_2"
253 | name: "relu5_2"
254 | type: "ReLU"
255 | }
256 | layer {
257 | bottom: "conv5_2"
258 | top: "conv5_3"
259 | name: "conv5_3"
260 | type: "Convolution"
261 | convolution_param {
262 | num_output: 512
263 | pad: 1
264 | kernel_size: 3
265 | }
266 | }
267 | layer {
268 | bottom: "conv5_3"
269 | top: "conv5_3"
270 | name: "relu5_3"
271 | type: "ReLU"
272 | }
273 | layer {
274 | bottom: "conv5_3"
275 | top: "pool5"
276 | name: "pool5"
277 | type: "Pooling"
278 | pooling_param {
279 | pool: MAX
280 | kernel_size: 2
281 | stride: 2
282 | }
283 | }
284 | layer {
285 | bottom: "pool5"
286 | top: "fc6"
287 | name: "fc6"
288 | type: "InnerProduct"
289 | inner_product_param {
290 | num_output: 4096
291 | }
292 | }
293 | layer {
294 | bottom: "fc6"
295 | top: "fc6"
296 | name: "relu6"
297 | type: "ReLU"
298 | }
299 | layer {
300 | bottom: "fc6"
301 | top: "fc6"
302 | name: "drop6"
303 | type: "Dropout"
304 | dropout_param {
305 | dropout_ratio: 0.5
306 | }
307 | }
308 | layer {
309 | bottom: "fc6"
310 | top: "fc7"
311 | name: "fc7"
312 | type: "InnerProduct"
313 | inner_product_param {
314 | num_output: 4096
315 | }
316 | }
317 | layer {
318 | bottom: "fc7"
319 | top: "fc7"
320 | name: "relu7"
321 | type: "ReLU"
322 | }
323 | layer {
324 | bottom: "fc7"
325 | top: "fc7"
326 | name: "drop7"
327 | type: "Dropout"
328 | dropout_param {
329 | dropout_ratio: 0.5
330 | }
331 | }
332 | layer {
333 | bottom: "fc7"
334 | top: "fc8"
335 | name: "fc8"
336 | type: "InnerProduct"
337 | inner_product_param {
338 | num_output: 1000
339 | }
340 | }
341 | layer {
342 | name: "prob"
343 | type: "Softmax"
344 | bottom: "fc8"
345 | top: "prob"
346 | }
347 |
348 |
--------------------------------------------------------------------------------
/det/rfcn/tools/score.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import xml.etree.ElementTree as ET
3 |
4 | gt_root = '~/Database/VOC_PASCAL/VOC2007_test/Annotations/'
5 | val_file = '~/2007test.txt'
6 | det_root = '~/predict_ss/'
7 |
8 | _classes = ('__background__', # always index 0
9 | 'aeroplane', 'bicycle', 'bird', 'boat',
10 | 'bottle', 'bus', 'car', 'cat', 'chair',
11 | 'cow', 'diningtable', 'dog', 'horse',
12 | 'motorbike', 'person', 'pottedplant',
13 | 'sheep', 'sofa', 'train', 'tvmonitor')
14 |
15 | def parse_rec(filename):
16 | """ Parse a PASCAL VOC xml file """
17 | tree = ET.parse(filename)
18 | objects = []
19 | for obj in tree.findall('object'):
20 | obj_struct = {}
21 | obj_struct['name'] = obj.find('name').text
22 | obj_struct['pose'] = obj.find('pose').text
23 | obj_struct['truncated'] = int(obj.find('truncated').text)
24 | obj_struct['difficult'] = int(obj.find('difficult').text)
25 | bbox = obj.find('bndbox')
26 | obj_struct['bbox'] = [int(bbox.find('xmin').text),
27 | int(bbox.find('ymin').text),
28 | int(bbox.find('xmax').text),
29 | int(bbox.find('ymax').text)]
30 | objects.append(obj_struct)
31 |
32 | return objects
33 |
34 | def voc_ap(rec, prec, use_07_metric=False):
35 | """ ap = voc_ap(rec, prec, [use_07_metric])
36 | Compute VOC AP given precision and recall.
37 | If use_07_metric is true, uses the
38 | VOC 07 11 point method (default:False).
39 | """
40 | if use_07_metric:
41 | # 11 point metric
42 | ap = 0.
43 | for t in np.arange(0., 1.1, 0.1):
44 | if np.sum(rec >= t) == 0:
45 | p = 0
46 | else:
47 | p = np.max(prec[rec >= t])
48 | ap = ap + p / 11.
49 | else:
50 | # correct AP calculation
51 | # first append sentinel values at the end
52 | mrec = np.concatenate(([0.], rec, [1.]))
53 | mpre = np.concatenate(([0.], prec, [0.]))
54 |
55 | # compute the precision envelope
56 | for i in range(mpre.size - 1, 0, -1):
57 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
58 |
59 | # to calculate area under PR curve, look for points
60 | # where X axis (recall) changes value
61 | i = np.where(mrec[1:] != mrec[:-1])[0]
62 |
63 | # and sum (\Delta recall) * prec
64 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
65 | return ap
66 |
67 | def voc_eval(filename, classname, ovthresh=0.5, use_07_metric=False):
68 |
69 | eval_images = []
70 | f = open(val_file, 'r')
71 | for i in f:
72 | eval_images.append(i.strip())
73 |
74 | recs = {}
75 | for imagename in eval_images:
76 | recs[imagename] = parse_rec(gt_root + imagename + '.xml')
77 |
78 | class_recs = {}
79 |
80 | npos = 0
81 | for imagename in eval_images:
82 | R = [obj for obj in recs[imagename] if obj['name'] == classname]
83 | bbox = np.array([x['bbox'] for x in R])
84 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
85 | det = [False] * len(R)
86 | npos = npos + sum(~difficult)
87 | class_recs[imagename] = {'bbox': bbox,
88 | 'difficult': difficult,
89 | 'det': det}
90 | detfile = det_root + 'comp4' + '_det' + '_test_' + classname + '.txt'
91 | with open(detfile, 'r') as f:
92 | lines = f.readlines()
93 | splitlines = [x.strip().split(' ') for x in lines]
94 | image_ids = [x[0] for x in splitlines]
95 | confidence = np.array([float(x[1]) for x in splitlines])
96 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
97 |
98 | # sort by confidence
99 | sorted_ind = np.argsort(-confidence)
100 | sorted_scores = np.sort(-confidence)
101 | BB = BB[sorted_ind, :]
102 | image_ids = [image_ids[x] for x in sorted_ind]
103 |
104 | # go down dets and mark TPs and FPs
105 | nd = len(image_ids)
106 | tp = np.zeros(nd)
107 | fp = np.zeros(nd)
108 | for d in range(nd):
109 | R = class_recs[image_ids[d]]
110 | bb = BB[d, :].astype(float)
111 | ovmax = -np.inf
112 | BBGT = R['bbox'].astype(float)
113 |
114 | if BBGT.size > 0:
115 | # compute overlaps
116 | # intersection
117 | ixmin = np.maximum(BBGT[:, 0], bb[0])
118 | iymin = np.maximum(BBGT[:, 1], bb[1])
119 | ixmax = np.minimum(BBGT[:, 2], bb[2])
120 | iymax = np.minimum(BBGT[:, 3], bb[3])
121 | iw = np.maximum(ixmax - ixmin + 1., 0.)
122 | ih = np.maximum(iymax - iymin + 1., 0.)
123 | inters = iw * ih
124 |
125 | # union
126 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
127 | (BBGT[:, 2] - BBGT[:, 0] + 1.) *
128 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
129 |
130 | overlaps = inters / uni
131 | ovmax = np.max(overlaps)
132 | jmax = np.argmax(overlaps)
133 |
134 | if ovmax > ovthresh:
135 | if not R['difficult'][jmax]:
136 | if not R['det'][jmax]:
137 | tp[d] = 1.
138 | R['det'][jmax] = 1
139 | else:
140 | fp[d] = 1.
141 | else:
142 | fp[d] = 1.
143 |
144 | # compute precision recall
145 | fp = np.cumsum(fp)
146 | tp = np.cumsum(tp)
147 | rec = tp / float(npos)
148 | # avoid divide by zero in case the first detection matches a difficult
149 | # ground truth
150 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
151 | ap = voc_ap(rec, prec, use_07_metric)
152 |
153 | return rec, prec, ap
154 |
155 | def compute_ap():
156 |
157 | aps = []
158 | for i, cls in enumerate(_classes):
159 | if cls == '__background__':
160 | continue
161 | filename = det_root + 'comp4' + '_det' + '_test_' + cls + '.txt'
162 | rec, prec, ap = voc_eval(filename, cls, ovthresh=0.5, use_07_metric=True)
163 |
164 | aps += [ap]
165 | print('AP for {} = {:.4f}'.format(cls, ap))
166 |
167 | print('Mean AP = {:.4f}'.format(np.mean(aps)))
168 | print('~~~~~~~~')
169 | print('Results:')
170 |
171 | for ap in aps:
172 | print('{:.3f}'.format(ap))
173 | print('{:.3f}'.format(np.mean(aps)))
174 | print('~~~~~~~~')
175 |
176 | if __name__ == '__main__':
177 | compute_ap()
178 |
179 |
180 |
181 |
182 |
183 |
--------------------------------------------------------------------------------
/det/faster_rcnn/tools/score.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import xml.etree.ElementTree as ET
3 |
4 | gt_root = '~/Database/VOC_PASCAL/VOC2007_test/Annotations/'
5 | val_file = '~/2007test.txt'
6 | det_root = '~/predict_ss/'
7 |
8 | _classes = ('__background__', # always index 0
9 | 'aeroplane', 'bicycle', 'bird', 'boat',
10 | 'bottle', 'bus', 'car', 'cat', 'chair',
11 | 'cow', 'diningtable', 'dog', 'horse',
12 | 'motorbike', 'person', 'pottedplant',
13 | 'sheep', 'sofa', 'train', 'tvmonitor')
14 |
15 | def parse_rec(filename):
16 | """ Parse a PASCAL VOC xml file """
17 | tree = ET.parse(filename)
18 | objects = []
19 | for obj in tree.findall('object'):
20 | obj_struct = {}
21 | obj_struct['name'] = obj.find('name').text
22 | obj_struct['pose'] = obj.find('pose').text
23 | obj_struct['truncated'] = int(obj.find('truncated').text)
24 | obj_struct['difficult'] = int(obj.find('difficult').text)
25 | bbox = obj.find('bndbox')
26 | obj_struct['bbox'] = [int(bbox.find('xmin').text),
27 | int(bbox.find('ymin').text),
28 | int(bbox.find('xmax').text),
29 | int(bbox.find('ymax').text)]
30 | objects.append(obj_struct)
31 |
32 | return objects
33 |
34 | def voc_ap(rec, prec, use_07_metric=False):
35 | """ ap = voc_ap(rec, prec, [use_07_metric])
36 | Compute VOC AP given precision and recall.
37 | If use_07_metric is true, uses the
38 | VOC 07 11 point method (default:False).
39 | """
40 | if use_07_metric:
41 | # 11 point metric
42 | ap = 0.
43 | for t in np.arange(0., 1.1, 0.1):
44 | if np.sum(rec >= t) == 0:
45 | p = 0
46 | else:
47 | p = np.max(prec[rec >= t])
48 | ap = ap + p / 11.
49 | else:
50 | # correct AP calculation
51 | # first append sentinel values at the end
52 | mrec = np.concatenate(([0.], rec, [1.]))
53 | mpre = np.concatenate(([0.], prec, [0.]))
54 |
55 | # compute the precision envelope
56 | for i in range(mpre.size - 1, 0, -1):
57 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
58 |
59 | # to calculate area under PR curve, look for points
60 | # where X axis (recall) changes value
61 | i = np.where(mrec[1:] != mrec[:-1])[0]
62 |
63 | # and sum (\Delta recall) * prec
64 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
65 | return ap
66 |
67 | def voc_eval(filename, classname, ovthresh=0.5, use_07_metric=False):
68 |
69 | eval_images = []
70 | f = open(val_file, 'r')
71 | for i in f:
72 | eval_images.append(i.strip())
73 |
74 | recs = {}
75 | for imagename in eval_images:
76 | recs[imagename] = parse_rec(gt_root + imagename + '.xml')
77 |
78 | class_recs = {}
79 |
80 | npos = 0
81 | for imagename in eval_images:
82 | R = [obj for obj in recs[imagename] if obj['name'] == classname]
83 | bbox = np.array([x['bbox'] for x in R])
84 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
85 | det = [False] * len(R)
86 | npos = npos + sum(~difficult)
87 | class_recs[imagename] = {'bbox': bbox,
88 | 'difficult': difficult,
89 | 'det': det}
90 | detfile = det_root + 'comp4' + '_det' + '_test_' + classname + '.txt'
91 | with open(detfile, 'r') as f:
92 | lines = f.readlines()
93 | splitlines = [x.strip().split(' ') for x in lines]
94 | image_ids = [x[0] for x in splitlines]
95 | confidence = np.array([float(x[1]) for x in splitlines])
96 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
97 |
98 | # sort by confidence
99 | sorted_ind = np.argsort(-confidence)
100 | sorted_scores = np.sort(-confidence)
101 | BB = BB[sorted_ind, :]
102 | image_ids = [image_ids[x] for x in sorted_ind]
103 |
104 | # go down dets and mark TPs and FPs
105 | nd = len(image_ids)
106 | tp = np.zeros(nd)
107 | fp = np.zeros(nd)
108 | for d in range(nd):
109 | R = class_recs[image_ids[d]]
110 | bb = BB[d, :].astype(float)
111 | ovmax = -np.inf
112 | BBGT = R['bbox'].astype(float)
113 |
114 | if BBGT.size > 0:
115 | # compute overlaps
116 | # intersection
117 | ixmin = np.maximum(BBGT[:, 0], bb[0])
118 | iymin = np.maximum(BBGT[:, 1], bb[1])
119 | ixmax = np.minimum(BBGT[:, 2], bb[2])
120 | iymax = np.minimum(BBGT[:, 3], bb[3])
121 | iw = np.maximum(ixmax - ixmin + 1., 0.)
122 | ih = np.maximum(iymax - iymin + 1., 0.)
123 | inters = iw * ih
124 |
125 | # union
126 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
127 | (BBGT[:, 2] - BBGT[:, 0] + 1.) *
128 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
129 |
130 | overlaps = inters / uni
131 | ovmax = np.max(overlaps)
132 | jmax = np.argmax(overlaps)
133 |
134 | if ovmax > ovthresh:
135 | if not R['difficult'][jmax]:
136 | if not R['det'][jmax]:
137 | tp[d] = 1.
138 | R['det'][jmax] = 1
139 | else:
140 | fp[d] = 1.
141 | else:
142 | fp[d] = 1.
143 |
144 | # compute precision recall
145 | fp = np.cumsum(fp)
146 | tp = np.cumsum(tp)
147 | rec = tp / float(npos)
148 | # avoid divide by zero in case the first detection matches a difficult
149 | # ground truth
150 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
151 | ap = voc_ap(rec, prec, use_07_metric)
152 |
153 | return rec, prec, ap
154 |
155 | def compute_ap():
156 |
157 | aps = []
158 | for i, cls in enumerate(_classes):
159 | if cls == '__background__':
160 | continue
161 | filename = det_root + 'comp4' + '_det' + '_test_' + cls + '.txt'
162 | rec, prec, ap = voc_eval(filename, cls, ovthresh=0.5, use_07_metric=True)
163 |
164 | aps += [ap]
165 | print('AP for {} = {:.4f}'.format(cls, ap))
166 |
167 | print('Mean AP = {:.4f}'.format(np.mean(aps)))
168 | print('~~~~~~~~')
169 | print('Results:')
170 |
171 | for ap in aps:
172 | print('{:.3f}'.format(ap))
173 | print('{:.3f}'.format(np.mean(aps)))
174 | print('~~~~~~~~')
175 |
176 | if __name__ == '__main__':
177 | compute_ap()
178 |
179 |
180 |
181 |
182 |
183 |
--------------------------------------------------------------------------------
/cls/evaluation_cls.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | sys.path.append('~/caffe-master-0116/python')
4 |
5 | import numpy as np
6 | import caffe
7 | import cv2
8 | import datetime
9 |
10 | gpu_mode = True
11 | gpu_id = 0
12 | data_root = '~/Database/ILSVRC2012'
13 | val_file = 'ILSVRC2012_val.txt'
14 | save_log = 'log{}.txt'.format(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
15 | model_weights = 'resnet-v2/resnet101_v2.caffemodel'
16 | model_deploy = 'resnet-v2/deploy_resnet101_v2.prototxt'
17 | prob_layer = 'prob'
18 | class_num = 1000
19 | base_size = 256 # short size
20 | crop_size = 224
21 | # mean_value = np.array([128.0, 128.0, 128.0]) # BGR
22 | mean_value = np.array([102.9801, 115.9465, 122.7717]) # BGR
23 | # std = np.array([128.0, 128.0, 128.0]) # BGR
24 | std = np.array([1.0, 1.0, 1.0]) # BGR
25 | crop_num = 1 # 1 and others for center(single)-crop, 12 for mirror(12)-crop, 144 for multi(144)-crop
26 | batch_size = 1
27 | top_k = (1, 5)
28 |
29 | if gpu_mode:
30 | caffe.set_mode_gpu()
31 | caffe.set_device(gpu_id)
32 | else:
33 | caffe.set_mode_cpu()
34 | net = caffe.Net(model_deploy, model_weights, caffe.TEST)
35 |
36 |
37 | def eval_batch():
38 | eval_images = []
39 | ground_truth = []
40 | f = open(val_file, 'r')
41 | for i in f:
42 | eval_images.append(i.strip().split(' ')[0])
43 | ground_truth.append(int(i.strip().split(' ')[1]))
44 | f.close()
45 |
46 | skip_num = 0
47 | eval_len = len(eval_images)
48 | accuracy = np.zeros(len(top_k))
49 | # eval_len = 100
50 | start_time = datetime.datetime.now()
51 | for i in xrange(eval_len - skip_num):
52 | _img = cv2.imread(data_root + eval_images[i + skip_num])
53 | _img = cv2.resize(_img, (int(_img.shape[1] * base_size / min(_img.shape[:2])),
54 | int(_img.shape[0] * base_size / min(_img.shape[:2])))
55 | )
56 | _img = image_preprocess(_img)
57 |
58 | score_vec = np.zeros(class_num, dtype=np.float32)
59 | crops = []
60 | if crop_num == 1:
61 | crops.append(center_crop(_img))
62 | elif crop_num == 12:
63 | crops.extend(mirror_crop(_img))
64 | elif crop_num == 144:
65 | crops.extend(multi_crop(_img))
66 | else:
67 | crops.append(center_crop(_img))
68 |
69 | iter_num = int(len(crops) / batch_size)
70 | for j in xrange(iter_num):
71 | score_vec += caffe_process(np.asarray(crops, dtype=np.float32)[j*batch_size:(j+1)*batch_size])
72 | score_index = (-score_vec / len(crops)).argsort()
73 |
74 | print 'Testing image: ' + str(i + 1) + '/' + str(eval_len - skip_num) + ' ' + str(score_index[0]) + '/' + str(
75 | ground_truth[i + skip_num]),
76 | for j in xrange(len(top_k)):
77 | if ground_truth[i + skip_num] in score_index[:top_k[j]]:
78 | accuracy[j] += 1
79 | tmp_acc = float(accuracy[j]) / float(i + 1)
80 | if top_k[j] == 1:
81 | print '\ttop_' + str(top_k[j]) + ':' + str(tmp_acc),
82 | else:
83 | print 'top_' + str(top_k[j]) + ':' + str(tmp_acc)
84 |
85 | end_time = datetime.datetime.now()
86 | w = open(save_log, 'w')
87 | s1 = 'Evaluation process ends at: {}. \nTime cost is: {}. '.format(str(end_time), str(end_time - start_time))
88 | s2 = '\nThe model is: {}. \nThe val file is: {}. \n{} images has been tested, crop_num is: {}, base_size is: {}, ' \
89 | 'crop_size is: {}.'.format(model_weights, val_file, str(eval_len), str(crop_num), str(base_size), str(crop_size))
90 | s3 = '\nThe mean value is: ({}, {}, {}).'.format(str(mean_value[0]), str(mean_value[1]), str(mean_value[2]))
91 | s4 = ''
92 | for i in xrange(len(top_k)):
93 | _acc = float(accuracy[i]) / float(eval_len)
94 | s4 += '\nAccuracy of top_{} is: {}; correct num is {}.'.format(str(top_k[i]), str(_acc), str(int(accuracy[i])))
95 | print s1, s2, s3, s4
96 | w.write(s1 + s2 + s3 + s4)
97 | w.close()
98 |
99 |
100 | def image_preprocess(img):
101 | b, g, r = cv2.split(img)
102 | return cv2.merge([(b-mean_value[0])/std[0], (g-mean_value[1])/std[1], (r-mean_value[2])/std[2]])
103 |
104 |
105 | def center_crop(img): # single crop
106 | short_edge = min(img.shape[:2])
107 | if short_edge < crop_size:
108 | return
109 | yy = int((img.shape[0] - crop_size) / 2)
110 | xx = int((img.shape[1] - crop_size) / 2)
111 | return img[yy: yy + crop_size, xx: xx + crop_size]
112 |
113 |
114 | def over_sample(img): # 12 crops of image
115 | short_edge = min(img.shape[:2])
116 | if short_edge < crop_size:
117 | return
118 | yy = int((img.shape[0] - crop_size) / 2)
119 | xx = int((img.shape[1] - crop_size) / 2)
120 | sample_list = [img[:crop_size, :crop_size], img[-crop_size:, -crop_size:], img[:crop_size, -crop_size:],
121 | img[-crop_size:, :crop_size], img[yy: yy + crop_size, xx: xx + crop_size],
122 | cv2.resize(img, (crop_size, crop_size))]
123 | return sample_list
124 |
125 |
126 | def mirror_crop(img): # 12*len(size_list) crops
127 | crop_list = []
128 | img_resize = cv2.resize(img, (base_size, base_size))
129 | mirror = img_resize[:, ::-1]
130 | crop_list.extend(over_sample(img_resize))
131 | crop_list.extend(over_sample(mirror))
132 | return crop_list
133 |
134 |
135 | def multi_crop(img): # 144(12*12) crops
136 | crop_list = []
137 | size_list = [256, 288, 320, 352] # crop_size: 224
138 | # size_list = [270, 300, 330, 360] # crop_size: 235
139 | # size_list = [320, 352, 384, 416] # crop_size: 299
140 | # size_list = [352, 384, 416, 448] # crop_size: 320
141 | short_edge = min(img.shape[:2])
142 | for i in size_list:
143 | img_resize = cv2.resize(img, (img.shape[1] * i / short_edge, img.shape[0] * i / short_edge))
144 | yy = int((img_resize.shape[0] - i) / 2)
145 | xx = int((img_resize.shape[1] - i) / 2)
146 | for j in xrange(3):
147 | left_center_right = img_resize[yy * j: yy * j + i, xx * j: xx * j + i]
148 | mirror = left_center_right[:, ::-1]
149 | crop_list.extend(over_sample(left_center_right))
150 | crop_list.extend(over_sample(mirror))
151 | return crop_list
152 |
153 |
154 | def caffe_process(_input):
155 | _input = _input.transpose(0, 3, 1, 2)
156 | net.blobs['data'].reshape(*_input.shape)
157 | net.blobs['data'].data[...] = _input
158 | net.forward()
159 |
160 | return np.sum(net.blobs[prob_layer].data, axis=0)
161 |
162 |
163 | if __name__ == '__main__':
164 | eval_batch()
165 |
--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/mobilenet/rpn_rcnn_deploys/rcnn_deploy_faster_voc_mobilenet-dw.prototxt:
--------------------------------------------------------------------------------
1 | input: "conv5_5/sep"
2 | input_shape {
3 | dim: 1
4 | dim: 512
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rois"
10 | input_shape {
11 | dim: 300
12 | dim: 5
13 | }
14 |
15 | #============== RCNN ===============
16 | layer {
17 | name: "roi_pool"
18 | type: "ROIPooling"
19 | bottom: "conv5_5/sep"
20 | bottom: "rois"
21 | top: "roi_pool"
22 | roi_pooling_param {
23 | pooled_w: 14
24 | pooled_h: 14
25 | spatial_scale: 0.062500
26 | }
27 | }
28 |
29 | layer {
30 | name: "conv5_6/dw"
31 | type: "ConvolutionDepthwise"
32 | bottom: "roi_pool"
33 | top: "conv5_6/dw"
34 | param {
35 | lr_mult: 1
36 | decay_mult: 1
37 | }
38 | convolution_param {
39 | num_output: 512
40 | bias_term: false
41 | pad: 1
42 | kernel_size: 3
43 | group: 512
44 | #engine: CAFFE
45 | stride: 2
46 | weight_filler {
47 | type: "msra"
48 | }
49 | }
50 | }
51 | layer {
52 | name: "conv5_6/dw/bn"
53 | type: "BatchNorm"
54 | bottom: "conv5_6/dw"
55 | top: "conv5_6/dw"
56 | param {
57 | lr_mult: 0
58 | decay_mult: 0
59 | }
60 | param {
61 | lr_mult: 0
62 | decay_mult: 0
63 | }
64 | param {
65 | lr_mult: 0
66 | decay_mult: 0
67 | }
68 | }
69 | layer {
70 | name: "conv5_6/dw/scale"
71 | type: "Scale"
72 | bottom: "conv5_6/dw"
73 | top: "conv5_6/dw"
74 | scale_param {
75 | filler {
76 | value: 1
77 | }
78 | bias_term: true
79 | bias_filler {
80 | value: 0
81 | }
82 | }
83 | param {
84 | lr_mult: 0.0
85 | decay_mult: 0.0
86 | }
87 | param {
88 | lr_mult: 0.0
89 | decay_mult: 0.0
90 | }
91 | }
92 | layer {
93 | name: "relu5_6/dw"
94 | type: "ReLU"
95 | bottom: "conv5_6/dw"
96 | top: "conv5_6/dw"
97 | }
98 | layer {
99 | name: "conv5_6/sep"
100 | type: "Convolution"
101 | bottom: "conv5_6/dw"
102 | top: "conv5_6/sep"
103 | param {
104 | lr_mult: 1
105 | decay_mult: 1
106 | }
107 | convolution_param {
108 | num_output: 1024
109 | bias_term: false
110 | pad: 0
111 | kernel_size: 1
112 | stride: 1
113 | weight_filler {
114 | type: "msra"
115 | }
116 | }
117 | }
118 | layer {
119 | name: "conv5_6/sep/bn"
120 | type: "BatchNorm"
121 | bottom: "conv5_6/sep"
122 | top: "conv5_6/sep"
123 | param {
124 | lr_mult: 0
125 | decay_mult: 0
126 | }
127 | param {
128 | lr_mult: 0
129 | decay_mult: 0
130 | }
131 | param {
132 | lr_mult: 0
133 | decay_mult: 0
134 | }
135 | }
136 | layer {
137 | name: "conv5_6/sep/scale"
138 | type: "Scale"
139 | bottom: "conv5_6/sep"
140 | top: "conv5_6/sep"
141 | scale_param {
142 | filler {
143 | value: 1
144 | }
145 | bias_term: true
146 | bias_filler {
147 | value: 0
148 | }
149 | }
150 | param {
151 | lr_mult: 0.0
152 | decay_mult: 0.0
153 | }
154 | param {
155 | lr_mult: 0.0
156 | decay_mult: 0.0
157 | }
158 | }
159 | layer {
160 | name: "relu5_6/sep"
161 | type: "ReLU"
162 | bottom: "conv5_6/sep"
163 | top: "conv5_6/sep"
164 | }
165 | layer {
166 | name: "conv6/dw"
167 | type: "ConvolutionDepthwise"
168 | bottom: "conv5_6/sep"
169 | top: "conv6/dw"
170 | param {
171 | lr_mult: 1
172 | decay_mult: 1
173 | }
174 | convolution_param {
175 | num_output: 1024
176 | bias_term: false
177 | pad: 1
178 | kernel_size: 3
179 | group: 1024
180 | #engine: CAFFE
181 | stride: 1
182 | weight_filler {
183 | type: "msra"
184 | }
185 | }
186 | }
187 | layer {
188 | name: "conv6/dw/bn"
189 | type: "BatchNorm"
190 | bottom: "conv6/dw"
191 | top: "conv6/dw"
192 | param {
193 | lr_mult: 0
194 | decay_mult: 0
195 | }
196 | param {
197 | lr_mult: 0
198 | decay_mult: 0
199 | }
200 | param {
201 | lr_mult: 0
202 | decay_mult: 0
203 | }
204 | }
205 | layer {
206 | name: "conv6/dw/scale"
207 | type: "Scale"
208 | bottom: "conv6/dw"
209 | top: "conv6/dw"
210 | scale_param {
211 | filler {
212 | value: 1
213 | }
214 | bias_term: true
215 | bias_filler {
216 | value: 0
217 | }
218 | }
219 | param {
220 | lr_mult: 0.0
221 | decay_mult: 0.0
222 | }
223 | param {
224 | lr_mult: 0.0
225 | decay_mult: 0.0
226 | }
227 | }
228 | layer {
229 | name: "relu6/dw"
230 | type: "ReLU"
231 | bottom: "conv6/dw"
232 | top: "conv6/dw"
233 | }
234 | layer {
235 | name: "conv6/sep"
236 | type: "Convolution"
237 | bottom: "conv6/dw"
238 | top: "conv6/sep"
239 | param {
240 | lr_mult: 1
241 | decay_mult: 1
242 | }
243 | convolution_param {
244 | num_output: 1024
245 | bias_term: false
246 | pad: 0
247 | kernel_size: 1
248 | stride: 1
249 | weight_filler {
250 | type: "msra"
251 | }
252 | }
253 | }
254 | layer {
255 | name: "conv6/sep/bn"
256 | type: "BatchNorm"
257 | bottom: "conv6/sep"
258 | top: "conv6/sep"
259 | param {
260 | lr_mult: 0
261 | decay_mult: 0
262 | }
263 | param {
264 | lr_mult: 0
265 | decay_mult: 0
266 | }
267 | param {
268 | lr_mult: 0
269 | decay_mult: 0
270 | }
271 | }
272 | layer {
273 | name: "conv6/sep/scale"
274 | type: "Scale"
275 | bottom: "conv6/sep"
276 | top: "conv6/sep"
277 | scale_param {
278 | filler {
279 | value: 1
280 | }
281 | bias_term: true
282 | bias_filler {
283 | value: 0
284 | }
285 | }
286 | param {
287 | lr_mult: 0.0
288 | decay_mult: 0.0
289 | }
290 | param {
291 | lr_mult: 0.0
292 | decay_mult: 0.0
293 | }
294 | }
295 | layer {
296 | name: "relu6/sep"
297 | type: "ReLU"
298 | bottom: "conv6/sep"
299 | top: "conv6/sep"
300 | }
301 | layer {
302 | name: "pool6"
303 | type: "Pooling"
304 | bottom: "conv6/sep"
305 | top: "pool6"
306 | pooling_param {
307 | pool: AVE
308 | global_pooling: true
309 | }
310 | }
311 | layer {
312 | name: "cls_score"
313 | type: "InnerProduct"
314 | bottom: "pool6"
315 | top: "cls_score"
316 | param {
317 | lr_mult: 1
318 | decay_mult: 1
319 | }
320 | param {
321 | lr_mult: 2
322 | decay_mult: 0
323 | }
324 | inner_product_param {
325 | num_output: 21
326 | weight_filler {
327 | type: "msra"
328 | std: 0.01
329 | }
330 | bias_filler {
331 | type: "constant"
332 | value: 0
333 | }
334 | }
335 | }
336 | layer {
337 | name: "bbox_pred"
338 | type: "InnerProduct"
339 | bottom: "pool6"
340 | top: "bbox_pred"
341 | param {
342 | lr_mult: 1
343 | decay_mult: 1
344 | }
345 | param {
346 | lr_mult: 2
347 | decay_mult: 0
348 | }
349 | inner_product_param {
350 | num_output: 84
351 | weight_filler {
352 | type: "msra"
353 | std: 0.01
354 | }
355 | bias_filler {
356 | type: "constant"
357 | value: 0
358 | }
359 | }
360 | }
361 | layer {
362 | name: "cls_prob"
363 | type: "Softmax"
364 | bottom: "cls_score"
365 | top: "cls_prob"
366 | }
367 |
368 |
--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg16-pytorch.prototxt:
--------------------------------------------------------------------------------
1 | input: "data"
2 | input_shape {
3 | dim: 1
4 | dim: 3
5 | dim: 224
6 | dim: 224
7 | }
8 |
9 | layer {
10 | name: "conv1_1"
11 | type: "Convolution"
12 | bottom: "data"
13 | top: "conv1_1"
14 | convolution_param {
15 | bias_term: true
16 | num_output: 64
17 | pad: 1
18 | kernel_size: 3
19 | stride: 1
20 | }
21 | }
22 |
23 | layer {
24 | name: "relu1_1"
25 | type: "ReLU"
26 | bottom: "conv1_1"
27 | top: "conv1_1"
28 | }
29 |
30 | layer {
31 | name: "conv1_2"
32 | type: "Convolution"
33 | bottom: "conv1_1"
34 | top: "conv1_2"
35 | convolution_param {
36 | bias_term: true
37 | num_output: 64
38 | pad: 1
39 | kernel_size: 3
40 | stride: 1
41 | }
42 | }
43 |
44 | layer {
45 | name: "relu1_2"
46 | type: "ReLU"
47 | bottom: "conv1_2"
48 | top: "conv1_2"
49 | }
50 |
51 | layer {
52 | name: "pool1"
53 | type: "Pooling"
54 | bottom: "conv1_2"
55 | top: "pool1"
56 | pooling_param {
57 | pool: MAX
58 | kernel_size: 2
59 | stride: 2
60 | }
61 | }
62 |
63 | layer {
64 | name: "conv2_1"
65 | type: "Convolution"
66 | bottom: "pool1"
67 | top: "conv2_1"
68 | convolution_param {
69 | bias_term: true
70 | num_output: 128
71 | pad: 1
72 | kernel_size: 3
73 | stride: 1
74 | }
75 | }
76 |
77 | layer {
78 | name: "relu2_1"
79 | type: "ReLU"
80 | bottom: "conv2_1"
81 | top: "conv2_1"
82 | }
83 |
84 | layer {
85 | name: "conv2_2"
86 | type: "Convolution"
87 | bottom: "conv2_1"
88 | top: "conv2_2"
89 | convolution_param {
90 | bias_term: true
91 | num_output: 128
92 | pad: 1
93 | kernel_size: 3
94 | stride: 1
95 | }
96 | }
97 |
98 | layer {
99 | name: "relu2_2"
100 | type: "ReLU"
101 | bottom: "conv2_2"
102 | top: "conv2_2"
103 | }
104 |
105 | layer {
106 | name: "pool2"
107 | type: "Pooling"
108 | bottom: "conv2_2"
109 | top: "pool2"
110 | pooling_param {
111 | pool: MAX
112 | kernel_size: 2
113 | stride: 2
114 | }
115 | }
116 |
117 | layer {
118 | name: "conv3_1"
119 | type: "Convolution"
120 | bottom: "pool2"
121 | top: "conv3_1"
122 | convolution_param {
123 | bias_term: true
124 | num_output: 256
125 | pad: 1
126 | kernel_size: 3
127 | stride: 1
128 | }
129 | }
130 |
131 | layer {
132 | name: "relu3_1"
133 | type: "ReLU"
134 | bottom: "conv3_1"
135 | top: "conv3_1"
136 | }
137 |
138 | layer {
139 | name: "conv3_2"
140 | type: "Convolution"
141 | bottom: "conv3_1"
142 | top: "conv3_2"
143 | convolution_param {
144 | bias_term: true
145 | num_output: 256
146 | pad: 1
147 | kernel_size: 3
148 | stride: 1
149 | }
150 | }
151 |
152 | layer {
153 | name: "relu3_2"
154 | type: "ReLU"
155 | bottom: "conv3_2"
156 | top: "conv3_2"
157 | }
158 |
159 | layer {
160 | name: "conv3_3"
161 | type: "Convolution"
162 | bottom: "conv3_2"
163 | top: "conv3_3"
164 | convolution_param {
165 | bias_term: true
166 | num_output: 256
167 | pad: 1
168 | kernel_size: 3
169 | stride: 1
170 | }
171 | }
172 |
173 | layer {
174 | name: "relu3_3"
175 | type: "ReLU"
176 | bottom: "conv3_3"
177 | top: "conv3_3"
178 | }
179 |
180 | layer {
181 | name: "pool3"
182 | type: "Pooling"
183 | bottom: "conv3_3"
184 | top: "pool3"
185 | pooling_param {
186 | pool: MAX
187 | kernel_size: 2
188 | stride: 2
189 | }
190 | }
191 |
192 | layer {
193 | name: "conv4_1"
194 | type: "Convolution"
195 | bottom: "pool3"
196 | top: "conv4_1"
197 | convolution_param {
198 | bias_term: true
199 | num_output: 512
200 | pad: 1
201 | kernel_size: 3
202 | stride: 1
203 | }
204 | }
205 |
206 | layer {
207 | name: "relu4_1"
208 | type: "ReLU"
209 | bottom: "conv4_1"
210 | top: "conv4_1"
211 | }
212 |
213 | layer {
214 | name: "conv4_2"
215 | type: "Convolution"
216 | bottom: "conv4_1"
217 | top: "conv4_2"
218 | convolution_param {
219 | bias_term: true
220 | num_output: 512
221 | pad: 1
222 | kernel_size: 3
223 | stride: 1
224 | }
225 | }
226 |
227 | layer {
228 | name: "relu4_2"
229 | type: "ReLU"
230 | bottom: "conv4_2"
231 | top: "conv4_2"
232 | }
233 |
234 | layer {
235 | name: "conv4_3"
236 | type: "Convolution"
237 | bottom: "conv4_2"
238 | top: "conv4_3"
239 | convolution_param {
240 | bias_term: true
241 | num_output: 512
242 | pad: 1
243 | kernel_size: 3
244 | stride: 1
245 | }
246 | }
247 |
248 | layer {
249 | name: "relu4_3"
250 | type: "ReLU"
251 | bottom: "conv4_3"
252 | top: "conv4_3"
253 | }
254 |
255 | layer {
256 | name: "pool4"
257 | type: "Pooling"
258 | bottom: "conv4_3"
259 | top: "pool4"
260 | pooling_param {
261 | pool: MAX
262 | kernel_size: 2
263 | stride: 2
264 | }
265 | }
266 |
267 | layer {
268 | name: "conv5_1"
269 | type: "Convolution"
270 | bottom: "pool4"
271 | top: "conv5_1"
272 | convolution_param {
273 | bias_term: true
274 | num_output: 512
275 | pad: 1
276 | kernel_size: 3
277 | stride: 1
278 | }
279 | }
280 |
281 | layer {
282 | name: "relu5_1"
283 | type: "ReLU"
284 | bottom: "conv5_1"
285 | top: "conv5_1"
286 | }
287 |
288 | layer {
289 | name: "conv5_2"
290 | type: "Convolution"
291 | bottom: "conv5_1"
292 | top: "conv5_2"
293 | convolution_param {
294 | bias_term: true
295 | num_output: 512
296 | pad: 1
297 | kernel_size: 3
298 | stride: 1
299 | }
300 | }
301 |
302 | layer {
303 | name: "relu5_2"
304 | type: "ReLU"
305 | bottom: "conv5_2"
306 | top: "conv5_2"
307 | }
308 |
309 | layer {
310 | name: "conv5_3"
311 | type: "Convolution"
312 | bottom: "conv5_2"
313 | top: "conv5_3"
314 | convolution_param {
315 | bias_term: true
316 | num_output: 512
317 | pad: 1
318 | kernel_size: 3
319 | stride: 1
320 | }
321 | }
322 |
323 | layer {
324 | name: "relu5_3"
325 | type: "ReLU"
326 | bottom: "conv5_3"
327 | top: "conv5_3"
328 | }
329 |
330 | layer {
331 | name: "pool5"
332 | type: "Pooling"
333 | bottom: "conv5_3"
334 | top: "pool5"
335 | pooling_param {
336 | pool: MAX
337 | kernel_size: 2
338 | stride: 2
339 | }
340 | }
341 |
342 | layer {
343 | bottom: "pool5"
344 | top: "fc6"
345 | name: "fc6"
346 | type: "InnerProduct"
347 | inner_product_param {
348 | num_output: 4096
349 | }
350 | }
351 |
352 | layer {
353 | name: "relu6"
354 | type: "ReLU"
355 | bottom: "fc6"
356 | top: "fc6"
357 | }
358 |
359 | layer {
360 | name: "dropout6"
361 | type: "Dropout"
362 | bottom: "fc6"
363 | top: "fc6"
364 | dropout_param {
365 | dropout_ratio: 0.5
366 | }
367 | }
368 |
369 | layer {
370 | bottom: "fc6"
371 | top: "fc7"
372 | name: "fc7"
373 | type: "InnerProduct"
374 | inner_product_param {
375 | num_output: 4096
376 | }
377 | }
378 |
379 | layer {
380 | name: "relu7"
381 | type: "ReLU"
382 | bottom: "fc7"
383 | top: "fc7"
384 | }
385 |
386 | layer {
387 | name: "dropout7"
388 | type: "Dropout"
389 | bottom: "fc7"
390 | top: "fc7"
391 | dropout_param {
392 | dropout_ratio: 0.5
393 | }
394 | }
395 |
396 | layer {
397 | bottom: "fc7"
398 | top: "classifier"
399 | name: "classifier"
400 | type: "InnerProduct"
401 | inner_product_param {
402 | num_output: 1000
403 | }
404 | }
405 |
406 | layer {
407 | name: "prob"
408 | type: "Softmax"
409 | bottom: "classifier"
410 | top: "prob"
411 | }
412 |
--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg16-tf.prototxt:
--------------------------------------------------------------------------------
1 | input: "data"
2 | input_shape {
3 | dim: 1
4 | dim: 3
5 | dim: 224
6 | dim: 224
7 | }
8 |
9 | layer {
10 | name: "conv1_1"
11 | type: "Convolution"
12 | bottom: "data"
13 | top: "conv1_1"
14 | convolution_param {
15 | bias_term: true
16 | num_output: 64
17 | pad: 1
18 | kernel_size: 3
19 | stride: 1
20 | }
21 | }
22 |
23 | layer {
24 | name: "relu1_1"
25 | type: "ReLU"
26 | bottom: "conv1_1"
27 | top: "conv1_1"
28 | }
29 |
30 | layer {
31 | name: "conv1_2"
32 | type: "Convolution"
33 | bottom: "conv1_1"
34 | top: "conv1_2"
35 | convolution_param {
36 | bias_term: true
37 | num_output: 64
38 | pad: 1
39 | kernel_size: 3
40 | stride: 1
41 | }
42 | }
43 |
44 | layer {
45 | name: "relu1_2"
46 | type: "ReLU"
47 | bottom: "conv1_2"
48 | top: "conv1_2"
49 | }
50 |
51 | layer {
52 | name: "pool1"
53 | type: "Pooling"
54 | bottom: "conv1_2"
55 | top: "pool1"
56 | pooling_param {
57 | pool: MAX
58 | kernel_size: 2
59 | stride: 2
60 | }
61 | }
62 |
63 | layer {
64 | name: "conv2_1"
65 | type: "Convolution"
66 | bottom: "pool1"
67 | top: "conv2_1"
68 | convolution_param {
69 | bias_term: true
70 | num_output: 128
71 | pad: 1
72 | kernel_size: 3
73 | stride: 1
74 | }
75 | }
76 |
77 | layer {
78 | name: "relu2_1"
79 | type: "ReLU"
80 | bottom: "conv2_1"
81 | top: "conv2_1"
82 | }
83 |
84 | layer {
85 | name: "conv2_2"
86 | type: "Convolution"
87 | bottom: "conv2_1"
88 | top: "conv2_2"
89 | convolution_param {
90 | bias_term: true
91 | num_output: 128
92 | pad: 1
93 | kernel_size: 3
94 | stride: 1
95 | }
96 | }
97 |
98 | layer {
99 | name: "relu2_2"
100 | type: "ReLU"
101 | bottom: "conv2_2"
102 | top: "conv2_2"
103 | }
104 |
105 | layer {
106 | name: "pool2"
107 | type: "Pooling"
108 | bottom: "conv2_2"
109 | top: "pool2"
110 | pooling_param {
111 | pool: MAX
112 | kernel_size: 2
113 | stride: 2
114 | }
115 | }
116 |
117 | layer {
118 | name: "conv3_1"
119 | type: "Convolution"
120 | bottom: "pool2"
121 | top: "conv3_1"
122 | convolution_param {
123 | bias_term: true
124 | num_output: 256
125 | pad: 1
126 | kernel_size: 3
127 | stride: 1
128 | }
129 | }
130 |
131 | layer {
132 | name: "relu3_1"
133 | type: "ReLU"
134 | bottom: "conv3_1"
135 | top: "conv3_1"
136 | }
137 |
138 | layer {
139 | name: "conv3_2"
140 | type: "Convolution"
141 | bottom: "conv3_1"
142 | top: "conv3_2"
143 | convolution_param {
144 | bias_term: true
145 | num_output: 256
146 | pad: 1
147 | kernel_size: 3
148 | stride: 1
149 | }
150 | }
151 |
152 | layer {
153 | name: "relu3_2"
154 | type: "ReLU"
155 | bottom: "conv3_2"
156 | top: "conv3_2"
157 | }
158 |
159 | layer {
160 | name: "conv3_3"
161 | type: "Convolution"
162 | bottom: "conv3_2"
163 | top: "conv3_3"
164 | convolution_param {
165 | bias_term: true
166 | num_output: 256
167 | pad: 1
168 | kernel_size: 3
169 | stride: 1
170 | }
171 | }
172 |
173 | layer {
174 | name: "relu3_3"
175 | type: "ReLU"
176 | bottom: "conv3_3"
177 | top: "conv3_3"
178 | }
179 |
180 | layer {
181 | name: "pool3"
182 | type: "Pooling"
183 | bottom: "conv3_3"
184 | top: "pool3"
185 | pooling_param {
186 | pool: MAX
187 | kernel_size: 2
188 | stride: 2
189 | }
190 | }
191 |
192 | layer {
193 | name: "conv4_1"
194 | type: "Convolution"
195 | bottom: "pool3"
196 | top: "conv4_1"
197 | convolution_param {
198 | bias_term: true
199 | num_output: 512
200 | pad: 1
201 | kernel_size: 3
202 | stride: 1
203 | }
204 | }
205 |
206 | layer {
207 | name: "relu4_1"
208 | type: "ReLU"
209 | bottom: "conv4_1"
210 | top: "conv4_1"
211 | }
212 |
213 | layer {
214 | name: "conv4_2"
215 | type: "Convolution"
216 | bottom: "conv4_1"
217 | top: "conv4_2"
218 | convolution_param {
219 | bias_term: true
220 | num_output: 512
221 | pad: 1
222 | kernel_size: 3
223 | stride: 1
224 | }
225 | }
226 |
227 | layer {
228 | name: "relu4_2"
229 | type: "ReLU"
230 | bottom: "conv4_2"
231 | top: "conv4_2"
232 | }
233 |
234 | layer {
235 | name: "conv4_3"
236 | type: "Convolution"
237 | bottom: "conv4_2"
238 | top: "conv4_3"
239 | convolution_param {
240 | bias_term: true
241 | num_output: 512
242 | pad: 1
243 | kernel_size: 3
244 | stride: 1
245 | }
246 | }
247 |
248 | layer {
249 | name: "relu4_3"
250 | type: "ReLU"
251 | bottom: "conv4_3"
252 | top: "conv4_3"
253 | }
254 |
255 | layer {
256 | name: "pool4"
257 | type: "Pooling"
258 | bottom: "conv4_3"
259 | top: "pool4"
260 | pooling_param {
261 | pool: MAX
262 | kernel_size: 2
263 | stride: 2
264 | }
265 | }
266 |
267 | layer {
268 | name: "conv5_1"
269 | type: "Convolution"
270 | bottom: "pool4"
271 | top: "conv5_1"
272 | convolution_param {
273 | bias_term: true
274 | num_output: 512
275 | pad: 1
276 | kernel_size: 3
277 | stride: 1
278 | }
279 | }
280 |
281 | layer {
282 | name: "relu5_1"
283 | type: "ReLU"
284 | bottom: "conv5_1"
285 | top: "conv5_1"
286 | }
287 |
288 | layer {
289 | name: "conv5_2"
290 | type: "Convolution"
291 | bottom: "conv5_1"
292 | top: "conv5_2"
293 | convolution_param {
294 | bias_term: true
295 | num_output: 512
296 | pad: 1
297 | kernel_size: 3
298 | stride: 1
299 | }
300 | }
301 |
302 | layer {
303 | name: "relu5_2"
304 | type: "ReLU"
305 | bottom: "conv5_2"
306 | top: "conv5_2"
307 | }
308 |
309 | layer {
310 | name: "conv5_3"
311 | type: "Convolution"
312 | bottom: "conv5_2"
313 | top: "conv5_3"
314 | convolution_param {
315 | bias_term: true
316 | num_output: 512
317 | pad: 1
318 | kernel_size: 3
319 | stride: 1
320 | }
321 | }
322 |
323 | layer {
324 | name: "relu5_3"
325 | type: "ReLU"
326 | bottom: "conv5_3"
327 | top: "conv5_3"
328 | }
329 |
330 | layer {
331 | name: "pool5"
332 | type: "Pooling"
333 | bottom: "conv5_3"
334 | top: "pool5"
335 | pooling_param {
336 | pool: MAX
337 | kernel_size: 2
338 | stride: 2
339 | }
340 | }
341 |
342 | layer {
343 | name: "fc6"
344 | type: "Convolution"
345 | bottom: "pool5"
346 | top: "fc6"
347 | convolution_param {
348 | bias_term: true
349 | num_output: 4096
350 | kernel_size: 7
351 | stride: 1
352 | }
353 | }
354 |
355 | layer {
356 | name: "relu6"
357 | type: "ReLU"
358 | bottom: "fc6"
359 | top: "fc6"
360 | }
361 |
362 | layer {
363 | name: "dropout6"
364 | type: "Dropout"
365 | bottom: "fc6"
366 | top: "fc6"
367 | dropout_param {
368 | dropout_ratio: 0.5
369 | }
370 | }
371 |
372 | layer {
373 | name: "fc7"
374 | type: "Convolution"
375 | bottom: "fc6"
376 | top: "fc7"
377 | convolution_param {
378 | bias_term: true
379 | num_output: 4096
380 | kernel_size: 1
381 | stride: 1
382 | }
383 | }
384 |
385 | layer {
386 | name: "relu7"
387 | type: "ReLU"
388 | bottom: "fc7"
389 | top: "fc7"
390 | }
391 |
392 | layer {
393 | name: "dropout7"
394 | type: "Dropout"
395 | bottom: "fc7"
396 | top: "fc7"
397 | dropout_param {
398 | dropout_ratio: 0.5
399 | }
400 | }
401 |
402 | layer {
403 | name: "fc8"
404 | type: "Convolution"
405 | bottom: "fc7"
406 | top: "fc8"
407 | convolution_param {
408 | bias_term: true
409 | num_output: 1000
410 | kernel_size: 1
411 | stride: 1
412 | }
413 | }
414 |
415 | layer {
416 | name: "reshape"
417 | type: "Reshape"
418 | bottom: "fc8"
419 | top: "reshape"
420 | reshape_param {
421 | shape {
422 | dim: 0
423 | dim: 0
424 | }
425 | }
426 | }
427 |
428 | layer {
429 | name: "prob"
430 | type: "Softmax"
431 | bottom: "reshape"
432 | top: "prob"
433 | }
--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/xception/rpn_rcnn_deploys/rcnn_deploy_faster_voc_xception-dw-merge-aligned.prototxt:
--------------------------------------------------------------------------------
1 | input: "xception11_elewise"
2 | input_shape {
3 | dim: 1
4 | dim: 728
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rois"
10 | input_shape {
11 | dim: 300
12 | dim: 5
13 | }
14 |
15 | #============== RCNN ===============
16 | layer {
17 | name: "roi_pool"
18 | type: "ROIPooling"
19 | bottom: "xception11_elewise"
20 | bottom: "rois"
21 | top: "roi_pool"
22 | roi_pooling_param {
23 | pooled_w: 8
24 | pooled_h: 8
25 | spatial_scale: 0.062500
26 | }
27 | }
28 | layer {
29 | name: "xception12_match_conv"
30 | type: "Convolution"
31 | bottom: "roi_pool"
32 | top: "xception12_match_conv"
33 | param {
34 | lr_mult: 1
35 | decay_mult: 1
36 | }
37 | convolution_param {
38 | bias_term: false
39 | num_output: 1024
40 | pad: 0
41 | kernel_size: 1
42 | stride: 1
43 | }
44 | }
45 | layer {
46 | name: "xception12_match_conv_scale"
47 | type: "Scale"
48 | bottom: "xception12_match_conv"
49 | top: "xception12_match_conv"
50 | scale_param {
51 | bias_term: true
52 | }
53 | param {
54 | lr_mult: 0.0
55 | decay_mult: 0.0
56 | }
57 | param {
58 | lr_mult: 0.0
59 | decay_mult: 0.0
60 | }
61 | }
62 | layer {
63 | name: "xception12_relu"
64 | type: "ReLU"
65 | bottom: "roi_pool"
66 | top: "xception12_relu"
67 | }
68 | layer {
69 | name: "xception12_conv1_1"
70 | type: "ConvolutionDepthwise"
71 | bottom: "xception12_relu"
72 | top: "xception12_conv1_1"
73 | param {
74 | lr_mult: 1
75 | decay_mult: 1
76 | }
77 | convolution_param {
78 | bias_term: false
79 | num_output: 728
80 | group: 728
81 | pad: 1
82 | kernel_size: 3
83 | stride: 1
84 | }
85 | }
86 | layer {
87 | name: "xception12_conv1_2"
88 | type: "Convolution"
89 | bottom: "xception12_conv1_1"
90 | top: "xception12_conv1_2"
91 | param {
92 | lr_mult: 1
93 | decay_mult: 1
94 | }
95 | convolution_param {
96 | bias_term: false
97 | num_output: 728
98 | pad: 0
99 | kernel_size: 1
100 | stride: 1
101 | }
102 | }
103 | layer {
104 | name: "xception12_conv1_scale"
105 | type: "Scale"
106 | bottom: "xception12_conv1_2"
107 | top: "xception12_conv1_2"
108 | scale_param {
109 | bias_term: true
110 | }
111 | param {
112 | lr_mult: 0.0
113 | decay_mult: 0.0
114 | }
115 | param {
116 | lr_mult: 0.0
117 | decay_mult: 0.0
118 | }
119 | }
120 | layer {
121 | name: "xception12_conv1_relu"
122 | type: "ReLU"
123 | bottom: "xception12_conv1_2"
124 | top: "xception12_conv1_2"
125 | }
126 | layer {
127 | name: "xception12_conv2_1"
128 | type: "ConvolutionDepthwise"
129 | bottom: "xception12_conv1_2"
130 | top: "xception12_conv2_1"
131 | param {
132 | lr_mult: 1
133 | decay_mult: 1
134 | }
135 | convolution_param {
136 | bias_term: false
137 | num_output: 728
138 | group: 728
139 | pad: 1
140 | kernel_size: 3
141 | stride: 1
142 | }
143 | }
144 | layer {
145 | name: "xception12_conv2_2"
146 | type: "Convolution"
147 | bottom: "xception12_conv2_1"
148 | top: "xception12_conv2_2"
149 | param {
150 | lr_mult: 1
151 | decay_mult: 1
152 | }
153 | convolution_param {
154 | bias_term: false
155 | num_output: 1024
156 | pad: 0
157 | kernel_size: 1
158 | stride: 1
159 | }
160 | }
161 | layer {
162 | name: "xception12_conv2_scale"
163 | type: "Scale"
164 | bottom: "xception12_conv2_2"
165 | top: "xception12_conv2_2"
166 | scale_param {
167 | bias_term: true
168 | }
169 | param {
170 | lr_mult: 0.0
171 | decay_mult: 0.0
172 | }
173 | param {
174 | lr_mult: 0.0
175 | decay_mult: 0.0
176 | }
177 | }
178 | layer {
179 | name: "xception12_pool"
180 | type: "Pooling"
181 | bottom: "xception12_conv2_2"
182 | top: "xception12_pool"
183 | pooling_param {
184 | pool: MAX
185 | kernel_size: 3
186 | stride: 1
187 | pad: 1
188 | ceil_mode: false
189 | }
190 | }
191 | layer {
192 | name: "xception12_elewise"
193 | type: "Eltwise"
194 | bottom: "xception12_match_conv"
195 | bottom: "xception12_pool"
196 | top: "xception12_elewise"
197 | eltwise_param {
198 | operation: SUM
199 | }
200 | }
201 | layer {
202 | name: "conv3_1"
203 | type: "ConvolutionDepthwise"
204 | bottom: "xception12_elewise"
205 | top: "conv3_1"
206 | param {
207 | lr_mult: 1
208 | decay_mult: 1
209 | }
210 | convolution_param {
211 | bias_term: false
212 | num_output: 1024
213 | group: 1024
214 | pad: 1
215 | kernel_size: 3
216 | stride: 1
217 | }
218 | }
219 | layer {
220 | name: "conv3_2"
221 | type: "Convolution"
222 | bottom: "conv3_1"
223 | top: "conv3_2"
224 | param {
225 | lr_mult: 1
226 | decay_mult: 1
227 | }
228 | convolution_param {
229 | bias_term: false
230 | num_output: 1536
231 | pad: 0
232 | kernel_size: 1
233 | stride: 1
234 | }
235 | }
236 | layer {
237 | name: "conv3_scale"
238 | type: "Scale"
239 | bottom: "conv3_2"
240 | top: "conv3_2"
241 | scale_param {
242 | bias_term: true
243 | }
244 | param {
245 | lr_mult: 0.0
246 | decay_mult: 0.0
247 | }
248 | param {
249 | lr_mult: 0.0
250 | decay_mult: 0.0
251 | }
252 | }
253 | layer {
254 | name: "conv3_relu"
255 | type: "ReLU"
256 | bottom: "conv3_2"
257 | top: "conv3_2"
258 | }
259 | layer {
260 | name: "conv4_1"
261 | type: "ConvolutionDepthwise"
262 | bottom: "conv3_2"
263 | top: "conv4_1"
264 | param {
265 | lr_mult: 1
266 | decay_mult: 1
267 | }
268 | convolution_param {
269 | bias_term: false
270 | num_output: 1536
271 | group: 1536
272 | pad: 1
273 | kernel_size: 3
274 | stride: 1
275 | }
276 | }
277 | layer {
278 | name: "conv4_2"
279 | type: "Convolution"
280 | bottom: "conv4_1"
281 | top: "conv4_2"
282 | param {
283 | lr_mult: 1
284 | decay_mult: 1
285 | }
286 | convolution_param {
287 | bias_term: false
288 | num_output: 2048
289 | pad: 0
290 | kernel_size: 1
291 | stride: 1
292 | }
293 | }
294 | layer {
295 | name: "conv4_scale"
296 | type: "Scale"
297 | bottom: "conv4_2"
298 | top: "conv4_2"
299 | scale_param {
300 | bias_term: true
301 | }
302 | param {
303 | lr_mult: 0.0
304 | decay_mult: 0.0
305 | }
306 | param {
307 | lr_mult: 0.0
308 | decay_mult: 0.0
309 | }
310 | }
311 | layer {
312 | name: "conv4_relu"
313 | type: "ReLU"
314 | bottom: "conv4_2"
315 | top: "conv4_2"
316 | }
317 | layer {
318 | name: "pool_ave"
319 | type: "Pooling"
320 | bottom: "conv4_2"
321 | top: "pool_ave"
322 | pooling_param {
323 | global_pooling : true
324 | pool: AVE
325 | }
326 | }
327 | layer {
328 | name: "cls_score"
329 | type: "InnerProduct"
330 | bottom: "pool_ave"
331 | top: "cls_score"
332 | param {
333 | lr_mult: 1
334 | decay_mult: 1
335 | }
336 | param {
337 | lr_mult: 2
338 | decay_mult: 0
339 | }
340 | inner_product_param {
341 | num_output: 21
342 | weight_filler {
343 | type: "msra"
344 | std: 0.01
345 | }
346 | bias_filler {
347 | type: "constant"
348 | value: 0
349 | }
350 | }
351 | }
352 | layer {
353 | name: "bbox_pred"
354 | type: "InnerProduct"
355 | bottom: "pool_ave"
356 | top: "bbox_pred"
357 | param {
358 | lr_mult: 1
359 | decay_mult: 1
360 | }
361 | param {
362 | lr_mult: 2
363 | decay_mult: 0
364 | }
365 | inner_product_param {
366 | num_output: 84
367 | weight_filler {
368 | type: "msra"
369 | std: 0.01
370 | }
371 | bias_filler {
372 | type: "constant"
373 | value: 0
374 | }
375 | }
376 | }
377 | layer {
378 | name: "cls_prob"
379 | type: "Softmax"
380 | bottom: "cls_score"
381 | top: "cls_prob"
382 | }
383 |
384 |
385 |
--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg19-pytorch.prototxt:
--------------------------------------------------------------------------------
1 | input: "data"
2 | input_shape {
3 | dim: 1
4 | dim: 3
5 | dim: 224
6 | dim: 224
7 | }
8 |
9 | layer {
10 | name: "conv1_1"
11 | type: "Convolution"
12 | bottom: "data"
13 | top: "conv1_1"
14 | convolution_param {
15 | bias_term: true
16 | num_output: 64
17 | pad: 1
18 | kernel_size: 3
19 | stride: 1
20 | }
21 | }
22 | layer {
23 | name: "relu1_1"
24 | type: "ReLU"
25 | bottom: "conv1_1"
26 | top: "conv1_1"
27 | }
28 |
29 | layer {
30 | name: "conv1_2"
31 | type: "Convolution"
32 | bottom: "conv1_1"
33 | top: "conv1_2"
34 | convolution_param {
35 | bias_term: true
36 | num_output: 64
37 | pad: 1
38 | kernel_size: 3
39 | stride: 1
40 | }
41 | }
42 | layer {
43 | name: "relu1_2"
44 | type: "ReLU"
45 | bottom: "conv1_2"
46 | top: "conv1_2"
47 | }
48 |
49 | layer {
50 | name: "pool1"
51 | type: "Pooling"
52 | bottom: "conv1_2"
53 | top: "pool1"
54 | pooling_param {
55 | pool: MAX
56 | kernel_size: 2
57 | stride: 2
58 | }
59 | }
60 |
61 | layer {
62 | name: "conv2_1"
63 | type: "Convolution"
64 | bottom: "pool1"
65 | top: "conv2_1"
66 | convolution_param {
67 | bias_term: true
68 | num_output: 128
69 | pad: 1
70 | kernel_size: 3
71 | stride: 1
72 | }
73 | }
74 | layer {
75 | name: "relu2_1"
76 | type: "ReLU"
77 | bottom: "conv2_1"
78 | top: "conv2_1"
79 | }
80 |
81 | layer {
82 | name: "conv2_2"
83 | type: "Convolution"
84 | bottom: "conv2_1"
85 | top: "conv2_2"
86 | convolution_param {
87 | bias_term: true
88 | num_output: 128
89 | pad: 1
90 | kernel_size: 3
91 | stride: 1
92 | }
93 | }
94 | layer {
95 | name: "relu2_2"
96 | type: "ReLU"
97 | bottom: "conv2_2"
98 | top: "conv2_2"
99 | }
100 |
101 | layer {
102 | name: "pool2"
103 | type: "Pooling"
104 | bottom: "conv2_2"
105 | top: "pool2"
106 | pooling_param {
107 | pool: MAX
108 | kernel_size: 2
109 | stride: 2
110 | }
111 | }
112 |
113 | layer {
114 | name: "conv3_1"
115 | type: "Convolution"
116 | bottom: "pool2"
117 | top: "conv3_1"
118 | convolution_param {
119 | bias_term: true
120 | num_output: 256
121 | pad: 1
122 | kernel_size: 3
123 | stride: 1
124 | }
125 | }
126 | layer {
127 | name: "relu3_1"
128 | type: "ReLU"
129 | bottom: "conv3_1"
130 | top: "conv3_1"
131 | }
132 |
133 | layer {
134 | name: "conv3_2"
135 | type: "Convolution"
136 | bottom: "conv3_1"
137 | top: "conv3_2"
138 | convolution_param {
139 | bias_term: true
140 | num_output: 256
141 | pad: 1
142 | kernel_size: 3
143 | stride: 1
144 | }
145 | }
146 | layer {
147 | name: "relu3_2"
148 | type: "ReLU"
149 | bottom: "conv3_2"
150 | top: "conv3_2"
151 | }
152 |
153 | layer {
154 | name: "conv3_3"
155 | type: "Convolution"
156 | bottom: "conv3_2"
157 | top: "conv3_3"
158 | convolution_param {
159 | bias_term: true
160 | num_output: 256
161 | pad: 1
162 | kernel_size: 3
163 | stride: 1
164 | }
165 | }
166 | layer {
167 | name: "relu3_3"
168 | type: "ReLU"
169 | bottom: "conv3_3"
170 | top: "conv3_3"
171 | }
172 | layer {
173 | name: "conv3_4"
174 | type: "Convolution"
175 | bottom: "conv3_3"
176 | top: "conv3_4"
177 | convolution_param {
178 | bias_term: true
179 | num_output: 256
180 | pad: 1
181 | kernel_size: 3
182 | stride: 1
183 | }
184 | }
185 | layer {
186 | name: "relu3_4"
187 | type: "ReLU"
188 | bottom: "conv3_4"
189 | top: "conv3_4"
190 | }
191 |
192 | layer {
193 | name: "pool3"
194 | type: "Pooling"
195 | bottom: "conv3_4"
196 | top: "pool3"
197 | pooling_param {
198 | pool: MAX
199 | kernel_size: 2
200 | stride: 2
201 | }
202 | }
203 |
204 | layer {
205 | name: "conv4_1"
206 | type: "Convolution"
207 | bottom: "pool3"
208 | top: "conv4_1"
209 | convolution_param {
210 | bias_term: true
211 | num_output: 512
212 | pad: 1
213 | kernel_size: 3
214 | stride: 1
215 | }
216 | }
217 | layer {
218 | name: "relu4_1"
219 | type: "ReLU"
220 | bottom: "conv4_1"
221 | top: "conv4_1"
222 | }
223 |
224 | layer {
225 | name: "conv4_2"
226 | type: "Convolution"
227 | bottom: "conv4_1"
228 | top: "conv4_2"
229 | convolution_param {
230 | bias_term: true
231 | num_output: 512
232 | pad: 1
233 | kernel_size: 3
234 | stride: 1
235 | }
236 | }
237 | layer {
238 | name: "relu4_2"
239 | type: "ReLU"
240 | bottom: "conv4_2"
241 | top: "conv4_2"
242 | }
243 |
244 | layer {
245 | name: "conv4_3"
246 | type: "Convolution"
247 | bottom: "conv4_2"
248 | top: "conv4_3"
249 | convolution_param {
250 | bias_term: true
251 | num_output: 512
252 | pad: 1
253 | kernel_size: 3
254 | stride: 1
255 | }
256 | }
257 | layer {
258 | name: "relu4_3"
259 | type: "ReLU"
260 | bottom: "conv4_3"
261 | top: "conv4_3"
262 | }
263 |
264 | layer {
265 | name: "conv4_4"
266 | type: "Convolution"
267 | bottom: "conv4_3"
268 | top: "conv4_4"
269 | convolution_param {
270 | bias_term: true
271 | num_output: 512
272 | pad: 1
273 | kernel_size: 3
274 | stride: 1
275 | }
276 | }
277 | layer {
278 | name: "relu4_4"
279 | type: "ReLU"
280 | bottom: "conv4_4"
281 | top: "conv4_4"
282 | }
283 |
284 | layer {
285 | name: "pool4"
286 | type: "Pooling"
287 | bottom: "conv4_4"
288 | top: "pool4"
289 | pooling_param {
290 | pool: MAX
291 | kernel_size: 2
292 | stride: 2
293 | }
294 | }
295 |
296 | layer {
297 | name: "conv5_1"
298 | type: "Convolution"
299 | bottom: "pool4"
300 | top: "conv5_1"
301 | convolution_param {
302 | bias_term: true
303 | num_output: 512
304 | pad: 1
305 | kernel_size: 3
306 | stride: 1
307 | }
308 | }
309 | layer {
310 | name: "relu5_1"
311 | type: "ReLU"
312 | bottom: "conv5_1"
313 | top: "conv5_1"
314 | }
315 |
316 | layer {
317 | name: "conv5_2"
318 | type: "Convolution"
319 | bottom: "conv5_1"
320 | top: "conv5_2"
321 | convolution_param {
322 | bias_term: true
323 | num_output: 512
324 | pad: 1
325 | kernel_size: 3
326 | stride: 1
327 | }
328 | }
329 | layer {
330 | name: "relu5_2"
331 | type: "ReLU"
332 | bottom: "conv5_2"
333 | top: "conv5_2"
334 | }
335 |
336 | layer {
337 | name: "conv5_3"
338 | type: "Convolution"
339 | bottom: "conv5_2"
340 | top: "conv5_3"
341 | convolution_param {
342 | bias_term: true
343 | num_output: 512
344 | pad: 1
345 | kernel_size: 3
346 | stride: 1
347 | }
348 | }
349 | layer {
350 | name: "relu5_3"
351 | type: "ReLU"
352 | bottom: "conv5_3"
353 | top: "conv5_3"
354 | }
355 |
356 | layer {
357 | name: "conv5_4"
358 | type: "Convolution"
359 | bottom: "conv5_3"
360 | top: "conv5_4"
361 | convolution_param {
362 | bias_term: true
363 | num_output: 512
364 | pad: 1
365 | kernel_size: 3
366 | stride: 1
367 | }
368 | }
369 | layer {
370 | name: "relu5_4"
371 | type: "ReLU"
372 | bottom: "conv5_4"
373 | top: "conv5_4"
374 | }
375 |
376 | layer {
377 | name: "pool5"
378 | type: "Pooling"
379 | bottom: "conv5_4"
380 | top: "pool5"
381 | pooling_param {
382 | pool: MAX
383 | kernel_size: 2
384 | stride: 2
385 | }
386 | }
387 |
388 | layer {
389 | bottom: "pool5"
390 | top: "fc6"
391 | name: "fc6"
392 | type: "InnerProduct"
393 | inner_product_param {
394 | num_output: 4096
395 | }
396 | }
397 |
398 | layer {
399 | name: "relu6"
400 | type: "ReLU"
401 | bottom: "fc6"
402 | top: "fc6"
403 | }
404 |
405 | layer {
406 | name: "dropout6"
407 | type: "Dropout"
408 | bottom: "fc6"
409 | top: "fc6"
410 | dropout_param {
411 | dropout_ratio: 0.5
412 | }
413 | }
414 |
415 | layer {
416 | bottom: "fc6"
417 | top: "fc7"
418 | name: "fc7"
419 | type: "InnerProduct"
420 | inner_product_param {
421 | num_output: 4096
422 | }
423 | }
424 |
425 | layer {
426 | name: "relu7"
427 | type: "ReLU"
428 | bottom: "fc7"
429 | top: "fc7"
430 | }
431 |
432 | layer {
433 | name: "dropout7"
434 | type: "Dropout"
435 | bottom: "fc7"
436 | top: "fc7"
437 | dropout_param {
438 | dropout_ratio: 0.5
439 | }
440 | }
441 |
442 | layer {
443 | bottom: "fc7"
444 | top: "classifier"
445 | name: "classifier"
446 | type: "InnerProduct"
447 | inner_product_param {
448 | num_output: 1000
449 | }
450 | }
451 |
452 | layer {
453 | name: "prob"
454 | type: "Softmax"
455 | bottom: "classifier"
456 | top: "prob"
457 | }
458 |
--------------------------------------------------------------------------------
/seg/pspnet/tools/image_seg_data.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import sys
3 |
4 | sys.path.append('/home/prmct/workspace/py-RFCN-priv/caffe-priv/python')
5 |
6 | import caffe
7 |
8 | import numpy as np
9 |
10 |
11 | class ImageSegDataLayer(caffe.Layer):
12 | def get_gpu_id(self, gpu_id=0):
13 | self.gpu_id = gpu_id
14 | if self.shuffle:
15 | np.random.seed(self.gpu_id)
16 | np.random.shuffle(self.indices)
17 |
18 | def setup(self, bottom, top):
19 | print self.param_str
20 | params = eval(self.param_str)
21 |
22 | self.color_factor = np.array(params.get('color_factor', (0.95, 1.05))) # (0.95, 1.05)
23 | self.contrast_factor = np.array(params.get('contrast_factor', (0.95, 1.05))) # (0.95, 1.05)
24 | self.brightness_factor = np.array(params.get('brightness_factor', (0.95, 1.05))) # (0.95, 1.05)
25 | self.mirror = params.get('mirror', True)
26 | self.gaussian_blur = params.get('gaussian_blur', True)
27 | self.scale_factor = np.array(params.get('scale_factor', (0.75, 2.0))) # (0.75, 2.0)
28 | self.rotation_factor = np.array(params.get('rotation_factor', (-10, 10))) # (-10, 10)
29 |
30 | self.crop_size = int(params.get('crop_size', 513))
31 | self.ignore_label = int(params.get('ignore_label', 255))
32 | self.mean = np.array(params.get('mean', (102.98, 115.947, 122.772)), dtype=np.float32)
33 | self.scale = float(params.get('scale', 1.0))
34 |
35 | self.root_dir = params['root_dir']
36 | self.source = params['source']
37 | self.batch_size = int(params.get('batch_size', 1))
38 | self.shuffle = params.get('shuffle', True)
39 |
40 | if len(top) != 2:
41 | raise Exception("Need to define two tops: data and label.")
42 | if len(bottom) != 0:
43 | raise Exception("Do not define a bottom.") # data layers have no bottoms
44 | if len(self.color_factor) != 2:
45 | raise Exception("'color_factor' must have 2 values for factor range.")
46 | if len(self.contrast_factor) != 2:
47 | raise Exception("'contrast_factor' must have 2 values for factor range.")
48 | if len(self.brightness_factor) != 2:
49 | raise Exception("'brightness_factor' must have 2 values for factor range.")
50 | if len(self.mean) != 3:
51 | raise Exception("'mean' must have 3 values for B G R.")
52 | if len(self.scale_factor) != 2:
53 | raise Exception("'scale_factor' must have 2 values for factor range.")
54 | if self.crop_size <= 0:
55 | raise Exception("'Need positive crop_size.")
56 |
57 | self.indices = open(self.source, 'r').read().splitlines()
58 | self.epoch_num = len(self.indices)
59 | self.idx = 0
60 |
61 | def reshape(self, bottom, top):
62 | top[0].reshape(self.batch_size, 3, self.crop_size, self.crop_size) # for images
63 | top[1].reshape(self.batch_size, 1, self.crop_size, self.crop_size) # for labels
64 |
65 | def forward(self, bottom, top):
66 | batch_img = []
67 | batch_label = []
68 | for _ in xrange(self.batch_size):
69 | _img = cv2.imread('{}{}'.format(self.root_dir, self.indices[self.idx].split(' ')[0]))
70 | _label = cv2.imread('{}{}'.format(self.root_dir, self.indices[self.idx].split(' ')[1]), 0)
71 |
72 | if _img.shape[:2] != _label.shape:
73 | raise Exception("Need to define two tops: data and label.")
74 |
75 | aug_img, aug_label = self.augmentation(_img, _label)
76 | batch_img.append(aug_img.transpose((2, 0, 1)))
77 | batch_label.append([aug_label])
78 |
79 | self.idx += 1
80 | if self.idx == self.epoch_num:
81 | self.idx = 0
82 | if self.shuffle:
83 | np.random.seed(self.gpu_id)
84 | np.random.shuffle(self.indices)
85 | batch_img = np.asarray(batch_img)
86 | batch_label = np.asarray(batch_label)
87 |
88 | top[0].data[...] = batch_img
89 | top[1].data[...] = batch_label
90 |
91 | def backward(self, top, propagate_down, bottom):
92 | pass
93 |
94 | def augmentation(self, img, label):
95 | ori_h, ori_w = img.shape[:2]
96 |
97 | _color = 1.0
98 | _contrast = 1.0
99 | _brightness = 1.0
100 |
101 | if self.color_factor[0] != 0 and self.color_factor[1] != 0 and self.color_factor[0] < self.color_factor[1]:
102 | _color = np.random.randint(int(self.color_factor[0] * 100),
103 | int(self.color_factor[1] * 100)) / 100.0
104 |
105 | if self.contrast_factor[0] != 0 and self.contrast_factor[1] != 0 and self.contrast_factor[0] < \
106 | self.contrast_factor[1]:
107 | _contrast = np.random.randint(int(self.contrast_factor[0] * 100),
108 | int(self.contrast_factor[1] * 100)) / 100.0
109 |
110 | if self.brightness_factor[0] != 0 and self.brightness_factor[1] != 0 and self.brightness_factor[0] < \
111 | self.brightness_factor[1]:
112 | _brightness = np.random.randint(int(self.brightness_factor[0] * 100),
113 | int(self.brightness_factor[1] * 100)) / 100.0
114 |
115 | _HSV = np.dot(cv2.cvtColor(img, cv2.COLOR_BGR2HSV).reshape((-1, 3)),
116 | np.array([[_color, 0, 0], [0, _contrast, 0], [0, 0, _brightness]]))
117 | _HSV_H = np.where(_HSV < 255, _HSV, 255)
118 | img = cv2.cvtColor(np.uint8(_HSV_H.reshape((-1, img.shape[1], 3))), cv2.COLOR_HSV2BGR)
119 |
120 | if self.gaussian_blur:
121 | if not np.random.randint(0, 4):
122 | img = cv2.GaussianBlur(img, (3, 3), 0)
123 |
124 | img = np.asarray(img, dtype=np.float32)
125 | label = np.asarray(label, dtype=np.uint8)
126 |
127 | if self.mirror:
128 | if np.random.randint(0, 2):
129 | img = img[:, :: -1]
130 | label = label[:, :: -1]
131 |
132 | if self.scale_factor[0] != 0 and self.scale_factor[1] != 0 and self.scale_factor[0] < self.scale_factor[1]:
133 | _scale = np.random.randint(int(self.scale_factor[0] * 100),
134 | int(self.scale_factor[1] * 100)) / 100.0
135 | res_w = int(_scale * ori_w)
136 | res_h = int(_scale * ori_h)
137 | img = cv2.resize(img, (res_w, res_h))
138 | label = cv2.resize(label, (res_w, res_h), interpolation=cv2.cv.CV_INTER_NN)
139 |
140 | if self.rotation_factor[0] != 0 and self.rotation_factor[1] != 0 and self.rotation_factor[0] < \
141 | self.rotation_factor[1]:
142 | if np.random.randint(0, 2):
143 | _rotation = np.random.randint(int(self.rotation_factor[0] * 100),
144 | int(self.rotation_factor[1] * 100)) / 100.0
145 | tmp_h, tmp_w = img.shape[:2]
146 | rotate_mat = cv2.getRotationMatrix2D((tmp_w / 2, tmp_h / 2), _rotation,
147 | 1)
148 | img = cv2.warpAffine(img, rotate_mat, (tmp_w, tmp_h),
149 | borderValue=cv2.cv.Scalar(self.mean[0], self.mean[1], self.mean[2]))
150 | label = cv2.warpAffine(label, rotate_mat, (tmp_w, tmp_h), flags=cv2.cv.CV_INTER_NN,
151 | borderValue=cv2.cv.Scalar(self.ignore_label))
152 |
153 | # perform random crop
154 | pad_h = max(self.crop_size - img.shape[0], 0)
155 | pad_w = max(self.crop_size - img.shape[1], 0)
156 | pad_img = cv2.copyMakeBorder(img, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT,
157 | value=cv2.cv.Scalar(self.mean[0], self.mean[1], self.mean[2]))
158 | pad_label = cv2.copyMakeBorder(label, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT,
159 | value=cv2.cv.Scalar(self.ignore_label))
160 | off_h = np.random.randint(0, pad_img.shape[0] - self.crop_size + 1)
161 | off_w = np.random.randint(0, pad_img.shape[1] - self.crop_size + 1)
162 | aug_img = pad_img[off_h:off_h + self.crop_size, off_w:off_w + self.crop_size, :]
163 | aug_label = pad_label[off_h:off_h + self.crop_size, off_w:off_w + self.crop_size]
164 |
165 | # perform (x-mean)*scale
166 | aug_img -= self.mean
167 | aug_img *= self.scale
168 |
169 | return aug_img, aug_label
170 |
--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/resnet38a/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet38a-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "res15_eletwise"
2 | input_shape {
3 | dim: 1
4 | dim: 1024
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rois"
10 | input_shape {
11 | dim: 300
12 | dim: 5
13 | }
14 |
15 | #============== RCNN ===============
16 | layer {
17 | name: "roi_pool"
18 | type: "ROIPooling"
19 | bottom: "res15_eletwise"
20 | bottom: "rois"
21 | top: "roi_pool"
22 | roi_pooling_param {
23 | pooled_w: 14
24 | pooled_h: 14
25 | spatial_scale: 0.062500
26 | }
27 | }
28 | layer {
29 | bottom: "roi_pool"
30 | top: "pool5"
31 | name: "pool5"
32 | type: "Pooling"
33 | pooling_param {
34 | kernel_size: 3
35 | pad: 0
36 | stride: 2
37 | pool: MAX
38 | }
39 | }
40 | layer {
41 | bottom: "pool5"
42 | top: "res16_scale"
43 | name: "res16_scale"
44 | type: "Scale"
45 | scale_param {
46 | bias_term: true
47 | }
48 | param {
49 | lr_mult: 0.0
50 | decay_mult: 0.0
51 | }
52 | param {
53 | lr_mult: 0.0
54 | decay_mult: 0.0
55 | }
56 | }
57 | layer {
58 | bottom: "res16_scale"
59 | top: "res16_scale"
60 | name: "res16_relu"
61 | type: "ReLU"
62 | }
63 | layer {
64 | bottom: "res16_scale"
65 | top: "res16_match_conv"
66 | name: "res16_match_conv"
67 | type: "Convolution"
68 | param {
69 | lr_mult: 1
70 | decay_mult: 1
71 | }
72 | convolution_param {
73 | num_output: 2048
74 | kernel_size: 1
75 | pad: 0
76 | stride: 1
77 | bias_term: false
78 | }
79 | }
80 | layer {
81 | bottom: "res16_scale"
82 | top: "res16_conv1"
83 | name: "res16_conv1"
84 | type: "Convolution"
85 | param {
86 | lr_mult: 1
87 | decay_mult: 1
88 | }
89 | convolution_param {
90 | num_output: 512
91 | kernel_size: 1
92 | pad: 0
93 | stride: 1
94 | bias_term: false
95 | }
96 | }
97 | layer {
98 | bottom: "res16_conv1"
99 | top: "res16_conv1"
100 | name: "res16_conv1_scale"
101 | type: "Scale"
102 | scale_param {
103 | bias_term: true
104 | }
105 | param {
106 | lr_mult: 0.0
107 | decay_mult: 0.0
108 | }
109 | param {
110 | lr_mult: 0.0
111 | decay_mult: 0.0
112 | }
113 | }
114 | layer {
115 | bottom: "res16_conv1"
116 | top: "res16_conv1"
117 | name: "res16_conv1_relu"
118 | type: "ReLU"
119 | }
120 | layer {
121 | bottom: "res16_conv1"
122 | top: "res16_conv2"
123 | name: "res16_conv2"
124 | type: "Convolution"
125 | param {
126 | lr_mult: 1
127 | decay_mult: 1
128 | }
129 | convolution_param {
130 | num_output: 1024
131 | kernel_size: 3
132 | pad: 1
133 | stride: 1
134 | bias_term: false
135 | }
136 | }
137 | layer {
138 | bottom: "res16_conv2"
139 | top: "res16_conv2"
140 | name: "res16_conv2_scale"
141 | type: "Scale"
142 | scale_param {
143 | bias_term: true
144 | }
145 | param {
146 | lr_mult: 0.0
147 | decay_mult: 0.0
148 | }
149 | param {
150 | lr_mult: 0.0
151 | decay_mult: 0.0
152 | }
153 | }
154 | layer {
155 | bottom: "res16_conv2"
156 | top: "res16_conv2"
157 | name: "res16_conv2_relu"
158 | type: "ReLU"
159 | }
160 | layer {
161 | bottom: "res16_conv2"
162 | top: "res16_conv3"
163 | name: "res16_conv3"
164 | type: "Convolution"
165 | param {
166 | lr_mult: 1
167 | decay_mult: 1
168 | }
169 | convolution_param {
170 | num_output: 2048
171 | kernel_size: 1
172 | pad: 0
173 | stride: 1
174 | bias_term: false
175 | }
176 | }
177 | layer {
178 | bottom: "res16_match_conv"
179 | bottom: "res16_conv3"
180 | top: "res16_eletwise"
181 | name: "res16_eletwise"
182 | type: "Eltwise"
183 | }
184 | layer {
185 | bottom: "res16_eletwise"
186 | top: "res17_scale"
187 | name: "res17_scale"
188 | type: "Scale"
189 | scale_param {
190 | bias_term: true
191 | }
192 | param {
193 | lr_mult: 0.0
194 | decay_mult: 0.0
195 | }
196 | param {
197 | lr_mult: 0.0
198 | decay_mult: 0.0
199 | }
200 | }
201 | layer {
202 | bottom: "res17_scale"
203 | top: "res17_scale"
204 | name: "res17_relu"
205 | type: "ReLU"
206 | }
207 | layer {
208 | bottom: "res17_scale"
209 | top: "res17_match_conv"
210 | name: "res17_match_conv"
211 | type: "Convolution"
212 | param {
213 | lr_mult: 1
214 | decay_mult: 1
215 | }
216 | convolution_param {
217 | num_output: 4096
218 | kernel_size: 1
219 | pad: 0
220 | stride: 1
221 | bias_term: false
222 | }
223 | }
224 | layer {
225 | bottom: "res17_scale"
226 | top: "res17_conv1"
227 | name: "res17_conv1"
228 | type: "Convolution"
229 | param {
230 | lr_mult: 1
231 | decay_mult: 1
232 | }
233 | convolution_param {
234 | num_output: 1024
235 | kernel_size: 1
236 | pad: 0
237 | stride: 1
238 | bias_term: false
239 | }
240 | }
241 | layer {
242 | bottom: "res17_conv1"
243 | top: "res17_conv1"
244 | name: "res17_conv1_scale"
245 | type: "Scale"
246 | scale_param {
247 | bias_term: true
248 | }
249 | param {
250 | lr_mult: 0.0
251 | decay_mult: 0.0
252 | }
253 | param {
254 | lr_mult: 0.0
255 | decay_mult: 0.0
256 | }
257 | }
258 | layer {
259 | bottom: "res17_conv1"
260 | top: "res17_conv1"
261 | name: "res17_conv1_relu"
262 | type: "ReLU"
263 | }
264 | layer {
265 | bottom: "res17_conv1"
266 | top: "res17_conv2"
267 | name: "res17_conv2"
268 | type: "Convolution"
269 | param {
270 | lr_mult: 1
271 | decay_mult: 1
272 | }
273 | convolution_param {
274 | num_output: 2048
275 | kernel_size: 3
276 | pad: 1
277 | stride: 1
278 | bias_term: false
279 | }
280 | }
281 | layer {
282 | bottom: "res17_conv2"
283 | top: "res17_conv2"
284 | name: "res17_conv2_scale"
285 | type: "Scale"
286 | scale_param {
287 | bias_term: true
288 | }
289 | param {
290 | lr_mult: 0.0
291 | decay_mult: 0.0
292 | }
293 | param {
294 | lr_mult: 0.0
295 | decay_mult: 0.0
296 | }
297 | }
298 | layer {
299 | bottom: "res17_conv2"
300 | top: "res17_conv2"
301 | name: "res17_conv2_relu"
302 | type: "ReLU"
303 | }
304 | layer {
305 | bottom: "res17_conv2"
306 | top: "res17_conv3"
307 | name: "res17_conv3"
308 | type: "Convolution"
309 | param {
310 | lr_mult: 1
311 | decay_mult: 1
312 | }
313 | convolution_param {
314 | num_output: 4096
315 | kernel_size: 1
316 | pad: 0
317 | stride: 1
318 | bias_term: false
319 | }
320 | }
321 | layer {
322 | bottom: "res17_match_conv"
323 | bottom: "res17_conv3"
324 | top: "res17_eletwise"
325 | name: "res17_eletwise"
326 | type: "Eltwise"
327 | }
328 | layer {
329 | bottom: "res17_eletwise"
330 | top: "res17_eletwise"
331 | name: "res17_eletwise_scale"
332 | type: "Scale"
333 | scale_param {
334 | bias_term: true
335 | }
336 | param {
337 | lr_mult: 0.0
338 | decay_mult: 0.0
339 | }
340 | param {
341 | lr_mult: 0.0
342 | decay_mult: 0.0
343 | }
344 | }
345 | layer {
346 | bottom: "res17_eletwise"
347 | top: "res17_eletwise"
348 | name: "res17_eletwise_relu"
349 | type: "ReLU"
350 | }
351 | layer {
352 | bottom: "res17_eletwise"
353 | top: "pool_ave"
354 | name: "pool_ave"
355 | type: "Pooling"
356 | pooling_param {
357 | global_pooling : true
358 | pool: AVE
359 | }
360 | }
361 | layer {
362 | name: "cls_score"
363 | type: "InnerProduct"
364 | bottom: "pool_ave"
365 | top: "cls_score"
366 | param {
367 | lr_mult: 1
368 | decay_mult: 1
369 | }
370 | param {
371 | lr_mult: 2
372 | decay_mult: 0
373 | }
374 | inner_product_param {
375 | num_output: 21
376 | weight_filler {
377 | type: "msra"
378 | std: 0.01
379 | }
380 | bias_filler {
381 | type: "constant"
382 | value: 0
383 | }
384 | }
385 | }
386 | layer {
387 | name: "bbox_pred"
388 | type: "InnerProduct"
389 | bottom: "pool_ave"
390 | top: "bbox_pred"
391 | param {
392 | lr_mult: 1
393 | decay_mult: 1
394 | }
395 | param {
396 | lr_mult: 2
397 | decay_mult: 0
398 | }
399 | inner_product_param {
400 | num_output: 84
401 | weight_filler {
402 | type: "msra"
403 | std: 0.01
404 | }
405 | bias_filler {
406 | type: "constant"
407 | value: 0
408 | }
409 | }
410 | }
411 | layer {
412 | name: "cls_prob"
413 | type: "Softmax"
414 | bottom: "cls_score"
415 | top: "cls_prob"
416 | }
417 |
418 |
--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg13bn-pytorch.prototxt:
--------------------------------------------------------------------------------
1 | input: "data"
2 | input_shape {
3 | dim: 1
4 | dim: 3
5 | dim: 224
6 | dim: 224
7 | }
8 |
9 | layer {
10 | name: "conv1_1"
11 | type: "Convolution"
12 | bottom: "data"
13 | top: "conv1_1"
14 | convolution_param {
15 | bias_term: true
16 | num_output: 64
17 | pad: 1
18 | kernel_size: 3
19 | stride: 1
20 | }
21 | }
22 | layer {
23 | bottom: "conv1_1"
24 | top: "conv1_1"
25 | name: "conv1_1_bn"
26 | type: "BatchNorm"
27 | batch_norm_param {
28 | use_global_stats: true
29 | }
30 | }
31 | layer {
32 | bottom: "conv1_1"
33 | top: "conv1_1"
34 | name: "conv1_1_scale"
35 | type: "Scale"
36 | scale_param {
37 | bias_term: true
38 | }
39 | }
40 | layer {
41 | name: "relu1_1"
42 | type: "ReLU"
43 | bottom: "conv1_1"
44 | top: "conv1_1"
45 | }
46 |
47 | layer {
48 | name: "conv1_2"
49 | type: "Convolution"
50 | bottom: "conv1_1"
51 | top: "conv1_2"
52 | convolution_param {
53 | bias_term: true
54 | num_output: 64
55 | pad: 1
56 | kernel_size: 3
57 | stride: 1
58 | }
59 | }
60 | layer {
61 | bottom: "conv1_2"
62 | top: "conv1_2"
63 | name: "conv1_2_bn"
64 | type: "BatchNorm"
65 | batch_norm_param {
66 | use_global_stats: true
67 | }
68 | }
69 | layer {
70 | bottom: "conv1_2"
71 | top: "conv1_2"
72 | name: "conv1_2_scale"
73 | type: "Scale"
74 | scale_param {
75 | bias_term: true
76 | }
77 | }
78 | layer {
79 | name: "relu1_2"
80 | type: "ReLU"
81 | bottom: "conv1_2"
82 | top: "conv1_2"
83 | }
84 |
85 | layer {
86 | name: "pool1"
87 | type: "Pooling"
88 | bottom: "conv1_2"
89 | top: "pool1"
90 | pooling_param {
91 | pool: MAX
92 | kernel_size: 2
93 | stride: 2
94 | }
95 | }
96 |
97 | layer {
98 | name: "conv2_1"
99 | type: "Convolution"
100 | bottom: "pool1"
101 | top: "conv2_1"
102 | convolution_param {
103 | bias_term: true
104 | num_output: 128
105 | pad: 1
106 | kernel_size: 3
107 | stride: 1
108 | }
109 | }
110 | layer {
111 | bottom: "conv2_1"
112 | top: "conv2_1"
113 | name: "conv2_1_bn"
114 | type: "BatchNorm"
115 | batch_norm_param {
116 | use_global_stats: true
117 | }
118 | }
119 | layer {
120 | bottom: "conv2_1"
121 | top: "conv2_1"
122 | name: "conv2_1_scale"
123 | type: "Scale"
124 | scale_param {
125 | bias_term: true
126 | }
127 | }
128 | layer {
129 | name: "relu2_1"
130 | type: "ReLU"
131 | bottom: "conv2_1"
132 | top: "conv2_1"
133 | }
134 |
135 | layer {
136 | name: "conv2_2"
137 | type: "Convolution"
138 | bottom: "conv2_1"
139 | top: "conv2_2"
140 | convolution_param {
141 | bias_term: true
142 | num_output: 128
143 | pad: 1
144 | kernel_size: 3
145 | stride: 1
146 | }
147 | }
148 | layer {
149 | bottom: "conv2_2"
150 | top: "conv2_2"
151 | name: "conv2_2_bn"
152 | type: "BatchNorm"
153 | batch_norm_param {
154 | use_global_stats: true
155 | }
156 | }
157 | layer {
158 | bottom: "conv2_2"
159 | top: "conv2_2"
160 | name: "conv2_2_scale"
161 | type: "Scale"
162 | scale_param {
163 | bias_term: true
164 | }
165 | }
166 | layer {
167 | name: "relu2_2"
168 | type: "ReLU"
169 | bottom: "conv2_2"
170 | top: "conv2_2"
171 | }
172 |
173 | layer {
174 | name: "pool2"
175 | type: "Pooling"
176 | bottom: "conv2_2"
177 | top: "pool2"
178 | pooling_param {
179 | pool: MAX
180 | kernel_size: 2
181 | stride: 2
182 | }
183 | }
184 |
185 | layer {
186 | name: "conv3_1"
187 | type: "Convolution"
188 | bottom: "pool2"
189 | top: "conv3_1"
190 | convolution_param {
191 | bias_term: true
192 | num_output: 256
193 | pad: 1
194 | kernel_size: 3
195 | stride: 1
196 | }
197 | }
198 | layer {
199 | bottom: "conv3_1"
200 | top: "conv3_1"
201 | name: "conv3_1_bn"
202 | type: "BatchNorm"
203 | batch_norm_param {
204 | use_global_stats: true
205 | }
206 | }
207 | layer {
208 | bottom: "conv3_1"
209 | top: "conv3_1"
210 | name: "conv3_1_scale"
211 | type: "Scale"
212 | scale_param {
213 | bias_term: true
214 | }
215 | }
216 | layer {
217 | name: "relu3_1"
218 | type: "ReLU"
219 | bottom: "conv3_1"
220 | top: "conv3_1"
221 | }
222 |
223 | layer {
224 | name: "conv3_2"
225 | type: "Convolution"
226 | bottom: "conv3_1"
227 | top: "conv3_2"
228 | convolution_param {
229 | bias_term: true
230 | num_output: 256
231 | pad: 1
232 | kernel_size: 3
233 | stride: 1
234 | }
235 | }
236 | layer {
237 | bottom: "conv3_2"
238 | top: "conv3_2"
239 | name: "conv3_2_bn"
240 | type: "BatchNorm"
241 | batch_norm_param {
242 | use_global_stats: true
243 | }
244 | }
245 | layer {
246 | bottom: "conv3_2"
247 | top: "conv3_2"
248 | name: "conv3_2_scale"
249 | type: "Scale"
250 | scale_param {
251 | bias_term: true
252 | }
253 | }
254 | layer {
255 | name: "relu3_2"
256 | type: "ReLU"
257 | bottom: "conv3_2"
258 | top: "conv3_2"
259 | }
260 |
261 | layer {
262 | name: "pool3"
263 | type: "Pooling"
264 | bottom: "conv3_2"
265 | top: "pool3"
266 | pooling_param {
267 | pool: MAX
268 | kernel_size: 2
269 | stride: 2
270 | }
271 | }
272 |
273 | layer {
274 | name: "conv4_1"
275 | type: "Convolution"
276 | bottom: "pool3"
277 | top: "conv4_1"
278 | convolution_param {
279 | bias_term: true
280 | num_output: 512
281 | pad: 1
282 | kernel_size: 3
283 | stride: 1
284 | }
285 | }
286 | layer {
287 | bottom: "conv4_1"
288 | top: "conv4_1"
289 | name: "conv4_1_bn"
290 | type: "BatchNorm"
291 | batch_norm_param {
292 | use_global_stats: true
293 | }
294 | }
295 | layer {
296 | bottom: "conv4_1"
297 | top: "conv4_1"
298 | name: "conv4_1_scale"
299 | type: "Scale"
300 | scale_param {
301 | bias_term: true
302 | }
303 | }
304 | layer {
305 | name: "relu4_1"
306 | type: "ReLU"
307 | bottom: "conv4_1"
308 | top: "conv4_1"
309 | }
310 |
311 | layer {
312 | name: "conv4_2"
313 | type: "Convolution"
314 | bottom: "conv4_1"
315 | top: "conv4_2"
316 | convolution_param {
317 | bias_term: true
318 | num_output: 512
319 | pad: 1
320 | kernel_size: 3
321 | stride: 1
322 | }
323 | }
324 | layer {
325 | bottom: "conv4_2"
326 | top: "conv4_2"
327 | name: "conv4_2_bn"
328 | type: "BatchNorm"
329 | batch_norm_param {
330 | use_global_stats: true
331 | }
332 | }
333 | layer {
334 | bottom: "conv4_2"
335 | top: "conv4_2"
336 | name: "conv4_2_scale"
337 | type: "Scale"
338 | scale_param {
339 | bias_term: true
340 | }
341 | }
342 | layer {
343 | name: "relu4_2"
344 | type: "ReLU"
345 | bottom: "conv4_2"
346 | top: "conv4_2"
347 | }
348 |
349 | layer {
350 | name: "pool4"
351 | type: "Pooling"
352 | bottom: "conv4_2"
353 | top: "pool4"
354 | pooling_param {
355 | pool: MAX
356 | kernel_size: 2
357 | stride: 2
358 | }
359 | }
360 |
361 | layer {
362 | name: "conv5_1"
363 | type: "Convolution"
364 | bottom: "pool4"
365 | top: "conv5_1"
366 | convolution_param {
367 | bias_term: true
368 | num_output: 512
369 | pad: 1
370 | kernel_size: 3
371 | stride: 1
372 | }
373 | }
374 | layer {
375 | bottom: "conv5_1"
376 | top: "conv5_1"
377 | name: "conv5_1_bn"
378 | type: "BatchNorm"
379 | batch_norm_param {
380 | use_global_stats: true
381 | }
382 | }
383 | layer {
384 | bottom: "conv5_1"
385 | top: "conv5_1"
386 | name: "conv5_1_scale"
387 | type: "Scale"
388 | scale_param {
389 | bias_term: true
390 | }
391 | }
392 | layer {
393 | name: "relu5_1"
394 | type: "ReLU"
395 | bottom: "conv5_1"
396 | top: "conv5_1"
397 | }
398 |
399 | layer {
400 | name: "conv5_2"
401 | type: "Convolution"
402 | bottom: "conv5_1"
403 | top: "conv5_2"
404 | convolution_param {
405 | bias_term: true
406 | num_output: 512
407 | pad: 1
408 | kernel_size: 3
409 | stride: 1
410 | }
411 | }
412 | layer {
413 | bottom: "conv5_2"
414 | top: "conv5_2"
415 | name: "conv5_2_bn"
416 | type: "BatchNorm"
417 | batch_norm_param {
418 | use_global_stats: true
419 | }
420 | }
421 | layer {
422 | bottom: "conv5_2"
423 | top: "conv5_2"
424 | name: "conv5_2_scale"
425 | type: "Scale"
426 | scale_param {
427 | bias_term: true
428 | }
429 | }
430 | layer {
431 | name: "relu5_2"
432 | type: "ReLU"
433 | bottom: "conv5_2"
434 | top: "conv5_2"
435 | }
436 |
437 | layer {
438 | name: "pool5"
439 | type: "Pooling"
440 | bottom: "conv5_2"
441 | top: "pool5"
442 | pooling_param {
443 | pool: MAX
444 | kernel_size: 2
445 | stride: 2
446 | }
447 | }
448 |
449 | layer {
450 | bottom: "pool5"
451 | top: "fc6"
452 | name: "fc6"
453 | type: "InnerProduct"
454 | inner_product_param {
455 | num_output: 4096
456 | }
457 | }
458 |
459 | layer {
460 | name: "relu6"
461 | type: "ReLU"
462 | bottom: "fc6"
463 | top: "fc6"
464 | }
465 |
466 | layer {
467 | name: "dropout6"
468 | type: "Dropout"
469 | bottom: "fc6"
470 | top: "fc6"
471 | dropout_param {
472 | dropout_ratio: 0.5
473 | }
474 | }
475 |
476 | layer {
477 | bottom: "fc6"
478 | top: "fc7"
479 | name: "fc7"
480 | type: "InnerProduct"
481 | inner_product_param {
482 | num_output: 4096
483 | }
484 | }
485 |
486 | layer {
487 | name: "relu7"
488 | type: "ReLU"
489 | bottom: "fc7"
490 | top: "fc7"
491 | }
492 |
493 | layer {
494 | name: "dropout7"
495 | type: "Dropout"
496 | bottom: "fc7"
497 | top: "fc7"
498 | dropout_param {
499 | dropout_ratio: 0.5
500 | }
501 | }
502 |
503 | layer {
504 | bottom: "fc7"
505 | top: "classifier"
506 | name: "classifier"
507 | type: "InnerProduct"
508 | inner_product_param {
509 | num_output: 1000
510 | }
511 | }
512 |
513 | layer {
514 | name: "prob"
515 | type: "Softmax"
516 | bottom: "classifier"
517 | top: "prob"
518 | }
519 |
--------------------------------------------------------------------------------
/cls/README.md:
--------------------------------------------------------------------------------
1 | ## CLS (Classification)
2 |
3 | Please install [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) for evaluating and finetuning.
4 |
5 | ### Disclaimer
6 |
7 | Most of the models are converted from other projects, the main contribution belongs to the original authors.
8 |
9 | Project links:
10 |
11 | [mxnet-model-gallery](https://github.com/dmlc/mxnet-model-gallery)、 [tensorflow slim](https://github.com/tensorflow/models/tree/master/slim)、 [craftGBD](https://github.com/craftGBD/craftGBD)、 [ResNeXt](https://github.com/facebookresearch/ResNeXt)、 [DenseNet](https://github.com/liuzhuang13/DenseNet)、 [wide-residual-networks](https://github.com/szagoruyko/wide-residual-networks)、 [keras deep-learning-models](https://github.com/fchollet/deep-learning-models)、 [ademxapp](https://github.com/itijyou/ademxapp)、 [DPNs](https://github.com/cypw/DPNs)、[Senet](https://github.com/hujie-frank/SENet)
12 |
13 |
14 | ### Performance on imagenet validation.
15 | **1. Top-1/5 error of pre-train models in this repository.**
16 |
17 | Network|224/299
(single-crop)|224/299
(12-crop)|320/395
(single-crop)|320/395
(12-crop)
18 | :---:|:---:|:---:|:---:|:---:
19 | resnet18-priv| 29.62/10.38 | 26.69/8.64 | 27.54/8.98 | 26.23/8.21
20 | resnext26-32x4d-priv| 24.93/7.75 | 23.54/6.89 | 24.20/7.21 | 23.19/6.60
21 | resnet101-v2| 21.95/6.12 | 19.99/5.04 | 20.37/5.16 | 19.29/4.57
22 | resnet152-v2| 20.85/5.42 | 19.24/4.68 | 19.66/4.73 | 18.84/4.32
23 | resnet269-v2| 19.71/5.00 | 18.25/4.20 | 18.70/4.33 | 17.87/3.85
24 | resnet38a| 20.66/5.27 | ../.. | 19.25/4.66 | ../..
25 | inception-v3| 21.67/5.75 | 19.60/4.73 | 20.10/4.82 | 19.25/4.24
26 | xception| 20.90/5.49 | 19.68/4.90 | 19.58/4.77 | 18.91/4.39
27 | inception-v4| 20.03/5.09 | 18.60/4.30 | 18.68/4.32 |18.12/3.92
28 | inception-resnet-v2| 19.86/4.83 | 18.46/4.08 | 18.75/4.02 | 18.15/3.71
29 | resnext50-32x4d| 22.37/6.31 | 20.53/5.35 | 21.10/5.53 | 20.37/5.03
30 | resnext101-32x4d| 21.30/5.79 | 19.47/4.89 | 19.91/4.97 | 19.19/4.59
31 | resnext101-64x4d| 20.60/5.41 | 18.88/4.59 | 19.26/4.63 | 18.48/4.31
32 | wrn50-2
(resnet50-1x128d)| 22.13/6.13 | 20.09/5.06 | 20.68/5.28 | 19.83/4.87
33 | airx50-24x4d| 22.39/6.23 | 20.36/5.19 | 20.88/5.33 | 19.97/4.92
34 | air101| 21.32/5.76 | 19.36/4.84 | 19.92/4.75 | 19.05/4.43
35 | air152| 20.38/5.11 | 18.46/4.26 | 19.08/4.40 | 18.53/4.00
36 | airx101-32x4d| 21.15/5.74 | 19.43/4.86 | 19.61/4.93 | 18.90/4.49
37 | dpn-68-extra| 22.56/6.24 | 20.48/4.99 | 20.99/5.25 | 20.09/4.73
38 | dpn-92| 20.81/5.47 | 18.99/4.59 | 19.23/4.64 | 18.68/4.24
39 | dpn-98| 20.27/5.28 | 18.57/4.42 | 18.87/4.43 | 18.21/4.11
40 | dpn-131| 20.00/5.24 | 18.52/4.28 | 18.63/4.31 | 17.99/3.92
41 | dpn-107-extra| 19.70/5.06 | ../.. | 18.41/4.25 | ../..
42 | se-inception-v2
(se-inception-bn)| 23.64/7.04 | 21.57/5.86 | 21.61/5.87 | 20.85/5.38
43 | se-resnet50| 22.39/6.37 | 20.61/5.34 | 20.49/5.22 | 20.02/4.85
44 | se-resnet50-hik| 21.98/5.80 | 20.06/4.88 | 20.51/5.04 | 19.92/4.68
45 | se-resnet101| 21.76/5.72 | 19.96/4.79 | 19.97/4.78 | 19.34/4.41
46 | se-resnet152| 21.34/5.54 | 19.56/4.66 | 19.34/4.59 | 18.83/4.32
47 | se-resnext50-32x4d| 20.96/5.53 | 19.39/4.69 | 19.36/4.66 | 18.70/4.38
48 | se-resnext101-32x4d| 19.83/4.95 | 18.44/4.16 | 18.14/4.08 | 17.68/3.86
49 | senet
(se-resnext152-64x4d)| 18.67/4.47 | 17.40/3.69 | 17.28/3.78 | 16.80/3.47
50 |
51 | - The resnet18-priv, resnext26-32x4d-priv are trained under [pytorch](https://github.com/soeaver/pytorch-classification) by bupt-priv.
52 | - The pre-train models are tested on original [caffe](https://github.com/BVLC/caffe) by [evaluation_cls.py](https://github.com/soeaver/caffe-model/blob/master/cls/evaluation_cls.py), **but ceil_mode:false(pooling_layer) is used for the models converted from torch, the detail in https://github.com/BVLC/caffe/pull/3057/files**. If you remove ceil_mode:false, the performance will decline about 1% top1.
53 | - 224x224(base_size=256) and 320x320(base_size=320) crop size for resnet-v2/resnext/wrn, 299x299(base_size=320) and 395x395(base_size=395) crop size for inception.
54 |
55 | **2. Top-1/5 accuracy with different crop sizes.**
56 | 
57 | - Figure: Accuracy curves of inception_v3(left) and resnet101_v2(right) with different crop sizes.
58 |
59 | **3. Download url and forward/backward time cost for each model.**
60 |
61 | Forward/Backward time cost is evaluated with one image/mini-batch using cuDNN 5.1 on a Pascal Titan X GPU.
62 |
63 | We use
64 | ```
65 | ~/caffe/build/tools/caffe -model deploy.prototxt time -gpu -iterations 1000
66 | ```
67 | to test the forward/backward time cost, the result is really different with time cost of [evaluation_cls.py](https://github.com/soeaver/caffe-model/blob/master/cls/evaluation_cls.py)
68 |
69 | Network|F/B(224/299)|F/B(320/395)|Download
(BaiduCloud)|Download
(GoogleDrive)|Source
70 | :---:|:---:|:---:|:---:|:---:|:---:
71 | resnet18-priv | 4.48/5.07ms | 4.99/7.01ms | [44.6MB](https://pan.baidu.com/s/1hrYc3La)|44.6MB|[pytorch-cls](https://github.com/soeaver/pytorch-classification)
72 | resnext26-32x4d-priv | 8.53/10.12ms | 10.55/13.46ms | [58.9MB](https://pan.baidu.com/s/1dFzmUOh)|[58.9MB](https://drive.google.com/open?id=0B9mkjlmP0d7zZEh4dzZ3TVZUb2M)|[pytorch-cls](https://github.com/soeaver/pytorch-classification)
73 | resnet101-v2| 22.31/22.75ms | 26.02/29.50ms | [170.3MB](https://pan.baidu.com/s/1kVQDHFx)|[170.3MB](https://drive.google.com/open?id=0B9mkjlmP0d7zRlhISks0VktGOGs)|[craftGBD](https://github.com/craftGBD/craftGBD)
74 | resnet152-v2| 32.11/32.54ms | 37.46/41.84ms | [230.2MB](https://pan.baidu.com/s/1dFIc4vB)|[230.2MB](https://drive.google.com/open?id=0B9mkjlmP0d7zOXhrb1EyYVRHOEk)|[craftGBD](https://github.com/craftGBD/craftGBD)
75 | resnet269-v2| 58.20/59.15ms | 69.43/77.26ms | [390.4MB](https://pan.baidu.com/s/1qYbICs0)|[390.4MB](https://drive.google.com/open?id=0B9mkjlmP0d7zOGFxcTMySHN6bUE)|[craftGBD](https://github.com/craftGBD/craftGBD)
76 | inception-v3| 21.79/19.82ms | 22.14/24.88ms | [91.1MB](https://pan.baidu.com/s/1boC0HEf)|[91.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zTEJmNEh6c0RfYzg)|[mxnet](https://github.com/dmlc/mxnet-model-gallery/blob/master/imagenet-1k-inception-v3.md)
77 | xception | 14.03/30.39ms | 19.46/48.64ms | [87.4MB](https://pan.baidu.com/s/1gfiTShd)|87.4MB|[keras-models](https://github.com/fchollet/deep-learning-models)
78 | inception-v4| 32.96/32.19ms | 36.04/41.91ms | [163.1MB](https://pan.baidu.com/s/1c6D150)|[163.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zUEJ3aEJ2b3J0RFU)|[tf-slim](https://github.com/tensorflow/models/tree/master/slim)
79 | inception-resnet-v2| 49.06/54.83ms | 54.06/66.38ms | [213.4MB](https://pan.baidu.com/s/1jHPJCX4)|[213.4MB](https://drive.google.com/open?id=0B9mkjlmP0d7zc3A4NWlQQzdoM28)|[tf-slim](https://github.com/tensorflow/models/tree/master/slim)
80 | resnext50-32x4d| 17.29/20.08ms | 19.02/23.81ms | [95.8MB](https://pan.baidu.com/s/1kVqgfJL)|[95.8MB](https://drive.google.com/open?id=0B9mkjlmP0d7zYVgwanhVWnhrYlE)|[facebookresearch](https://github.com/facebookresearch/ResNeXt)
81 | resnext101-32x4d| 30.73/35.75ms | 34.33/41.02ms | [169.1MB](https://pan.baidu.com/s/1hswrNUG)|[169.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zTzYyelgyYlpOU3c)|[facebookresearch](https://github.com/facebookresearch/ResNeXt)
82 | resnext101-64x4d| 42.07/64.58ms | 51.99/77.71ms | [319.2MB](https://pan.baidu.com/s/1pLhk0Zp)|[319.2MB](https://drive.google.com/open?id=0B9mkjlmP0d7zQ0ZZOENnSFdQWnc)|[facebookresearch](https://github.com/facebookresearch/ResNeXt)
83 | wrn50-2
(resnet50_1x128d)| 16.48/25.28ms | 20.99/35.04ms | [263.1MB](https://pan.baidu.com/s/1nvhoCsh)|[263.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zYW40dUMxS3VPclU)|[szagoruyko](https://github.com/szagoruyko/wide-residual-networks)
84 | airx50-24x4d| 23.59/24.80ms | 26.64/30.92ms | .. | .. |[pytorch-cls](https://github.com/soeaver/pytorch-classification)
85 | air101| 35.78/35.94ms | 39.69/45.52ms | .. | .. |[pytorch-cls](https://github.com/soeaver/pytorch-classification)
86 | airx101-32x4d| 49.43/55.52ms | 54.64/66.31ms | .. | .. |[pytorch-cls](https://github.com/soeaver/pytorch-classification)
87 | dpn-68| ../..ms | ../..ms | [48.4MB](https://pan.baidu.com/s/1bphINV5) | .. |[DPNs](https://github.com/cypw/DPNs)
88 | dpn-92| 29.71/30.68ms | 35.19/37.13ms | [144.2MB](https://pan.baidu.com/s/1pL0VuWV)|[144.2MB](https://drive.google.com/open?id=0B9mkjlmP0d7zaWVKWFd2OXpRTVU)|[DPNs](https://github.com/cypw/DPNs)
89 | dpn-98| 36.24/44.06ms | 42.84/53.50ms | [235.6MB](https://pan.baidu.com/s/1pKHBRlD) | .. |[DPNs](https://github.com/cypw/DPNs)
90 | dpn-107| 45.21/59.77ms | 56.12/77.78ms | [332.4MB](https://pan.baidu.com/s/1i5b0Uih) | .. |[DPNs](https://github.com/cypw/DPNs)
91 | dpn-131| 48.20/59.43ms | 57.66/72.43ms | [303.3MB](https://pan.baidu.com/s/1miOdMHi) | .. |[DPNs](https://github.com/cypw/DPNs)
92 | se-inception-v2| 14.66/10.63ms | 15.71/13.52ms | .. | .. |[senet](https://github.com/hujie-frank/SENet)
93 | se-resnet50| 15.29/14.20ms | 17.96/19.69ms | .. | .. |[senet](https://github.com/hujie-frank/SENet)
94 |
95 | - For speeding up xception, we adopt [convolution depthwise layer](https://github.com/BVLC/caffe/pull/5665/files).
96 |
97 | ### Check the performance
98 | **1. Download the ILSVRC 2012 classification val set [6.3GB](http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar), and put the extracted images into the directory:**
99 |
100 | ~/Database/ILSVRC2012
101 |
102 | **2. Modify the parameter settings**
103 |
104 | Network|val_file|mean_value|std
105 | :---:|:---:|:---:|:---:
106 | resnet-v2(101/152/269)| ILSVRC2012_val | [102.98, 115.947, 122.772] | [1.0, 1.0, 1.0]
107 | resnet10/18/, resnext, air(x) | ILSVRC2012_val | [103.52, 116.28, 123.675] | [57.375, 57.12, 58.395]
108 | inception-v3| **ILSVRC2015_val** | [128.0, 128.0, 128.0] | [128.0, 128.0, 128.0]
109 | inception-v2, xception
inception-v4, inception-resnet-v2 | ILSVRC2012_val | [128.0, 128.0, 128.0] | [128.0, 128.0, 128.0]
110 | dpn(68/92/98/131/107)| ILSVRC2012_val | [104.0, 117.0, 124.0] | [59.88, 59.88, 59.88]
111 | official senet| **ILSVRC2015_val** | [104.0, 117.0, 123.0] | [1.0, 1.0, 1.0]
112 |
113 |
114 | **3. then run evaluation_cls.py**
115 |
116 | python evaluation_cls.py
117 |
--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/resnet101-v2/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet101-v2-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "res31_scale"
2 | input_shape {
3 | dim: 1
4 | dim: 1024
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rois"
10 | input_shape {
11 | dim: 300
12 | dim: 5
13 | }
14 |
15 |
16 | #============== RCNN ===============
17 | layer {
18 | name: "roi_pool"
19 | type: "ROIPooling"
20 | bottom: "res31_scale"
21 | bottom: "rois"
22 | top: "roi_pool"
23 | roi_pooling_param {
24 | pooled_w: 14
25 | pooled_h: 14
26 | spatial_scale: 0.062500
27 | }
28 | }
29 |
30 | layer {
31 | name: "res31_conv1"
32 | type: "Convolution"
33 | bottom: "roi_pool"
34 | top: "res31_conv1"
35 | param {
36 | lr_mult: 1
37 | decay_mult: 1
38 | }
39 | convolution_param {
40 | bias_term: false
41 | num_output: 512
42 | pad: 0
43 | kernel_size: 1
44 | stride: 1
45 | }
46 | }
47 |
48 | layer {
49 | name: "res31_conv1_scale"
50 | type: "Scale"
51 | bottom: "res31_conv1"
52 | top: "res31_conv1"
53 | scale_param {
54 | bias_term: true
55 | }
56 | param {
57 | lr_mult: 0.0
58 | decay_mult: 0.0
59 | }
60 | param {
61 | lr_mult: 0.0
62 | decay_mult: 0.0
63 | }
64 | }
65 | layer {
66 | name: "res31_conv1_relu"
67 | type: "ReLU"
68 | bottom: "res31_conv1"
69 | top: "res31_conv1"
70 | }
71 | layer {
72 | name: "res31_conv2"
73 | type: "Convolution"
74 | bottom: "res31_conv1"
75 | top: "res31_conv2"
76 | param {
77 | lr_mult: 1
78 | decay_mult: 1
79 | }
80 | convolution_param {
81 | bias_term: false
82 | num_output: 512
83 | pad: 1
84 | kernel_size: 3
85 | stride: 2
86 | }
87 | }
88 |
89 | layer {
90 | name: "res31_conv2_scale"
91 | type: "Scale"
92 | bottom: "res31_conv2"
93 | top: "res31_conv2"
94 | scale_param {
95 | bias_term: true
96 | }
97 | param {
98 | lr_mult: 0.0
99 | decay_mult: 0.0
100 | }
101 | param {
102 | lr_mult: 0.0
103 | decay_mult: 0.0
104 | }
105 | }
106 | layer {
107 | name: "res31_conv2_relu"
108 | type: "ReLU"
109 | bottom: "res31_conv2"
110 | top: "res31_conv2"
111 | }
112 | layer {
113 | name: "res31_conv3"
114 | type: "Convolution"
115 | bottom: "res31_conv2"
116 | top: "res31_conv3"
117 | param {
118 | lr_mult: 1
119 | decay_mult: 1
120 | }
121 | convolution_param {
122 | bias_term: false
123 | num_output: 2048
124 | pad: 0
125 | kernel_size: 1
126 | stride: 1
127 | }
128 | }
129 | layer {
130 | name: "res31_match_conv"
131 | type: "Convolution"
132 | bottom: "roi_pool"
133 | top: "res31_match_conv"
134 | param {
135 | lr_mult: 1
136 | decay_mult: 1
137 | }
138 | convolution_param {
139 | bias_term: false
140 | num_output: 2048
141 | pad: 0
142 | kernel_size: 1
143 | stride: 2
144 | }
145 | }
146 | layer {
147 | name: "res31_eletwise"
148 | type: "Eltwise"
149 | bottom: "res31_match_conv"
150 | bottom: "res31_conv3"
151 | top: "res31_eletwise"
152 | eltwise_param {
153 | operation: SUM
154 | }
155 | }
156 |
157 | layer {
158 | name: "res32_scale"
159 | type: "Scale"
160 | bottom: "res31_eletwise"
161 | top: "res32_scale"
162 | scale_param {
163 | bias_term: true
164 | }
165 | param {
166 | lr_mult: 0.0
167 | decay_mult: 0.0
168 | }
169 | param {
170 | lr_mult: 0.0
171 | decay_mult: 0.0
172 | }
173 | }
174 | layer {
175 | name: "res32_relu"
176 | type: "ReLU"
177 | bottom: "res32_scale"
178 | top: "res32_scale"
179 | }
180 | layer {
181 | name: "res32_conv1"
182 | type: "Convolution"
183 | bottom: "res32_scale"
184 | top: "res32_conv1"
185 | param {
186 | lr_mult: 1
187 | decay_mult: 1
188 | }
189 | convolution_param {
190 | bias_term: false
191 | num_output: 512
192 | pad: 0
193 | kernel_size: 1
194 | stride: 1
195 | }
196 | }
197 |
198 | layer {
199 | name: "res32_conv1_scale"
200 | type: "Scale"
201 | bottom: "res32_conv1"
202 | top: "res32_conv1"
203 | scale_param {
204 | bias_term: true
205 | }
206 | param {
207 | lr_mult: 0.0
208 | decay_mult: 0.0
209 | }
210 | param {
211 | lr_mult: 0.0
212 | decay_mult: 0.0
213 | }
214 | }
215 | layer {
216 | name: "res32_conv1_relu"
217 | type: "ReLU"
218 | bottom: "res32_conv1"
219 | top: "res32_conv1"
220 | }
221 | layer {
222 | name: "res32_conv2"
223 | type: "Convolution"
224 | bottom: "res32_conv1"
225 | top: "res32_conv2"
226 | param {
227 | lr_mult: 1
228 | decay_mult: 1
229 | }
230 | convolution_param {
231 | bias_term: false
232 | num_output: 512
233 | pad: 1
234 | kernel_size: 3
235 | stride: 1
236 | }
237 | }
238 |
239 | layer {
240 | name: "res32_conv2_scale"
241 | type: "Scale"
242 | bottom: "res32_conv2"
243 | top: "res32_conv2"
244 | scale_param {
245 | bias_term: true
246 | }
247 | param {
248 | lr_mult: 0.0
249 | decay_mult: 0.0
250 | }
251 | param {
252 | lr_mult: 0.0
253 | decay_mult: 0.0
254 | }
255 | }
256 | layer {
257 | name: "res32_conv2_relu"
258 | type: "ReLU"
259 | bottom: "res32_conv2"
260 | top: "res32_conv2"
261 | }
262 | layer {
263 | name: "res32_conv3"
264 | type: "Convolution"
265 | bottom: "res32_conv2"
266 | top: "res32_conv3"
267 | param {
268 | lr_mult: 1
269 | decay_mult: 1
270 | }
271 | convolution_param {
272 | bias_term: false
273 | num_output: 2048
274 | pad: 0
275 | kernel_size: 1
276 | stride: 1
277 | }
278 | }
279 | layer {
280 | name: "res32_eletwise"
281 | type: "Eltwise"
282 | bottom: "res31_eletwise"
283 | bottom: "res32_conv3"
284 | top: "res32_eletwise"
285 | eltwise_param {
286 | operation: SUM
287 | }
288 | }
289 |
290 | layer {
291 | name: "res33_scale"
292 | type: "Scale"
293 | bottom: "res32_eletwise"
294 | top: "res33_scale"
295 | scale_param {
296 | bias_term: true
297 | }
298 | param {
299 | lr_mult: 0.0
300 | decay_mult: 0.0
301 | }
302 | param {
303 | lr_mult: 0.0
304 | decay_mult: 0.0
305 | }
306 | }
307 | layer {
308 | name: "res33_relu"
309 | type: "ReLU"
310 | bottom: "res33_scale"
311 | top: "res33_scale"
312 | }
313 | layer {
314 | name: "res33_conv1"
315 | type: "Convolution"
316 | bottom: "res33_scale"
317 | top: "res33_conv1"
318 | param {
319 | lr_mult: 1
320 | decay_mult: 1
321 | }
322 | convolution_param {
323 | bias_term: false
324 | num_output: 512
325 | pad: 0
326 | kernel_size: 1
327 | stride: 1
328 | }
329 | }
330 |
331 | layer {
332 | name: "res33_conv1_scale"
333 | type: "Scale"
334 | bottom: "res33_conv1"
335 | top: "res33_conv1"
336 | scale_param {
337 | bias_term: true
338 | }
339 | param {
340 | lr_mult: 0.0
341 | decay_mult: 0.0
342 | }
343 | param {
344 | lr_mult: 0.0
345 | decay_mult: 0.0
346 | }
347 | }
348 | layer {
349 | name: "res33_conv1_relu"
350 | type: "ReLU"
351 | bottom: "res33_conv1"
352 | top: "res33_conv1"
353 | }
354 | layer {
355 | name: "res33_conv2"
356 | type: "Convolution"
357 | bottom: "res33_conv1"
358 | top: "res33_conv2"
359 | param {
360 | lr_mult: 1
361 | decay_mult: 1
362 | }
363 | convolution_param {
364 | bias_term: false
365 | num_output: 512
366 | pad: 1
367 | kernel_size: 3
368 | stride: 1
369 | }
370 | }
371 |
372 | layer {
373 | name: "res33_conv2_scale"
374 | type: "Scale"
375 | bottom: "res33_conv2"
376 | top: "res33_conv2"
377 | scale_param {
378 | bias_term: true
379 | }
380 | param {
381 | lr_mult: 0.0
382 | decay_mult: 0.0
383 | }
384 | param {
385 | lr_mult: 0.0
386 | decay_mult: 0.0
387 | }
388 | }
389 | layer {
390 | name: "res33_conv2_relu"
391 | type: "ReLU"
392 | bottom: "res33_conv2"
393 | top: "res33_conv2"
394 | }
395 | layer {
396 | name: "res33_conv3"
397 | type: "Convolution"
398 | bottom: "res33_conv2"
399 | top: "res33_conv3"
400 | param {
401 | lr_mult: 1
402 | decay_mult: 1
403 | }
404 | convolution_param {
405 | bias_term: false
406 | num_output: 2048
407 | pad: 0
408 | kernel_size: 1
409 | stride: 1
410 | }
411 | }
412 | layer {
413 | name: "res33_eletwise"
414 | type: "Eltwise"
415 | bottom: "res32_eletwise"
416 | bottom: "res33_conv3"
417 | top: "res33_eletwise"
418 | eltwise_param {
419 | operation: SUM
420 | }
421 | }
422 |
423 | layer {
424 | name: "res33_eletwise_scale"
425 | type: "Scale"
426 | bottom: "res33_eletwise"
427 | top: "res33_eletwise_scale"
428 | scale_param {
429 | bias_term: true
430 | }
431 | param {
432 | lr_mult: 0.0
433 | decay_mult: 0.0
434 | }
435 | param {
436 | lr_mult: 0.0
437 | decay_mult: 0.0
438 | }
439 | }
440 | layer {
441 | name: "res33_eletwise_relu"
442 | type: "ReLU"
443 | bottom: "res33_eletwise_scale"
444 | top: "res33_eletwise_scale"
445 | }
446 |
447 | layer {
448 | bottom: "res33_eletwise_scale"
449 | top: "pool5"
450 | name: "pool5"
451 | type: "Pooling"
452 | pooling_param {
453 | pool: AVE
454 | global_pooling: true
455 | }
456 | }
457 | layer {
458 | name: "cls_score"
459 | type: "InnerProduct"
460 | bottom: "pool5"
461 | top: "cls_score"
462 | param {
463 | lr_mult: 1
464 | decay_mult: 1
465 | }
466 | param {
467 | lr_mult: 2
468 | decay_mult: 0
469 | }
470 | inner_product_param {
471 | num_output: 21
472 | weight_filler {
473 | type: "msra"
474 | std: 0.01
475 | }
476 | bias_filler {
477 | type: "constant"
478 | value: 0
479 | }
480 | }
481 | }
482 | layer {
483 | name: "bbox_pred"
484 | type: "InnerProduct"
485 | bottom: "pool5"
486 | top: "bbox_pred"
487 | param {
488 | lr_mult: 1
489 | decay_mult: 1
490 | }
491 | param {
492 | lr_mult: 2
493 | decay_mult: 0
494 | }
495 | inner_product_param {
496 | num_output: 84
497 | weight_filler {
498 | type: "msra"
499 | std: 0.01
500 | }
501 | bias_filler {
502 | type: "constant"
503 | value: 0
504 | }
505 | }
506 | }
507 | layer {
508 | name: "cls_prob"
509 | type: "Softmax"
510 | bottom: "cls_score"
511 | top: "cls_prob"
512 | }
513 |
--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/resnet152-v2/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet152-v2-merge.prototxt:
--------------------------------------------------------------------------------
1 | input: "res48_scale"
2 | input_shape {
3 | dim: 1
4 | dim: 1024
5 | dim: 40
6 | dim: 40
7 | }
8 |
9 | input: "rois"
10 | input_shape {
11 | dim: 300
12 | dim: 5
13 | }
14 |
15 | #============== RCNN ===============
16 | layer {
17 | name: "roi_pool"
18 | type: "ROIPooling"
19 | bottom: "res48_scale"
20 | bottom: "rois"
21 | top: "roi_pool"
22 | roi_pooling_param {
23 | pooled_w: 14
24 | pooled_h: 14
25 | spatial_scale: 0.062500
26 | }
27 | }
28 | layer {
29 | name: "res48_conv1"
30 | type: "Convolution"
31 | bottom: "roi_pool"
32 | top: "res48_conv1"
33 | param {
34 | lr_mult: 1
35 | decay_mult: 1
36 | }
37 | convolution_param {
38 | bias_term: false
39 | num_output: 512
40 | pad: 0
41 | kernel_size: 1
42 | stride: 1
43 | }
44 | }
45 |
46 | layer {
47 | name: "res48_conv1_scale"
48 | type: "Scale"
49 | bottom: "res48_conv1"
50 | top: "res48_conv1"
51 | scale_param {
52 | bias_term: true
53 | }
54 | param {
55 | lr_mult: 0.0
56 | decay_mult: 0.0
57 | }
58 | param {
59 | lr_mult: 0.0
60 | decay_mult: 0.0
61 | }
62 | }
63 | layer {
64 | name: "res48_conv1_relu"
65 | type: "ReLU"
66 | bottom: "res48_conv1"
67 | top: "res48_conv1"
68 | }
69 | layer {
70 | name: "res48_conv2"
71 | type: "Convolution"
72 | bottom: "res48_conv1"
73 | top: "res48_conv2"
74 | param {
75 | lr_mult: 1
76 | decay_mult: 1
77 | }
78 | convolution_param {
79 | bias_term: false
80 | num_output: 512
81 | pad: 1
82 | kernel_size: 3
83 | stride: 2
84 | }
85 | }
86 |
87 | layer {
88 | name: "res48_conv2_scale"
89 | type: "Scale"
90 | bottom: "res48_conv2"
91 | top: "res48_conv2"
92 | scale_param {
93 | bias_term: true
94 | }
95 | param {
96 | lr_mult: 0.0
97 | decay_mult: 0.0
98 | }
99 | param {
100 | lr_mult: 0.0
101 | decay_mult: 0.0
102 | }
103 | }
104 | layer {
105 | name: "res48_conv2_relu"
106 | type: "ReLU"
107 | bottom: "res48_conv2"
108 | top: "res48_conv2"
109 | }
110 | layer {
111 | name: "res48_conv3"
112 | type: "Convolution"
113 | bottom: "res48_conv2"
114 | top: "res48_conv3"
115 | param {
116 | lr_mult: 1
117 | decay_mult: 1
118 | }
119 | convolution_param {
120 | bias_term: false
121 | num_output: 2048
122 | pad: 0
123 | kernel_size: 1
124 | stride: 1
125 | }
126 | }
127 | layer {
128 | name: "res48_match_conv"
129 | type: "Convolution"
130 | bottom: "roi_pool"
131 | top: "res48_match_conv"
132 | param {
133 | lr_mult: 1
134 | decay_mult: 1
135 | }
136 | convolution_param {
137 | bias_term: false
138 | num_output: 2048
139 | pad: 0
140 | kernel_size: 1
141 | stride: 2
142 | bias_filler {
143 | type: "constant"
144 | value: 0.2
145 | }
146 | }
147 | }
148 | layer {
149 | name: "res48_eletwise"
150 | type: "Eltwise"
151 | bottom: "res48_match_conv"
152 | bottom: "res48_conv3"
153 | top: "res48_eletwise"
154 | eltwise_param {
155 | operation: SUM
156 | }
157 | }
158 |
159 | layer {
160 | name: "res49_scale"
161 | type: "Scale"
162 | bottom: "res48_eletwise"
163 | top: "res49_scale"
164 | scale_param {
165 | bias_term: true
166 | }
167 | param {
168 | lr_mult: 0.0
169 | decay_mult: 0.0
170 | }
171 | param {
172 | lr_mult: 0.0
173 | decay_mult: 0.0
174 | }
175 | }
176 | layer {
177 | name: "res49_relu"
178 | type: "ReLU"
179 | bottom: "res49_scale"
180 | top: "res49_scale"
181 | }
182 | layer {
183 | name: "res49_conv1"
184 | type: "Convolution"
185 | bottom: "res49_scale"
186 | top: "res49_conv1"
187 | param {
188 | lr_mult: 1
189 | decay_mult: 1
190 | }
191 | convolution_param {
192 | bias_term: false
193 | num_output: 512
194 | pad: 0
195 | kernel_size: 1
196 | stride: 1
197 | }
198 | }
199 |
200 | layer {
201 | name: "res49_conv1_scale"
202 | type: "Scale"
203 | bottom: "res49_conv1"
204 | top: "res49_conv1"
205 | scale_param {
206 | bias_term: true
207 | }
208 | param {
209 | lr_mult: 0.0
210 | decay_mult: 0.0
211 | }
212 | param {
213 | lr_mult: 0.0
214 | decay_mult: 0.0
215 | }
216 | }
217 | layer {
218 | name: "res49_conv1_relu"
219 | type: "ReLU"
220 | bottom: "res49_conv1"
221 | top: "res49_conv1"
222 | }
223 | layer {
224 | name: "res49_conv2"
225 | type: "Convolution"
226 | bottom: "res49_conv1"
227 | top: "res49_conv2"
228 | param {
229 | lr_mult: 1
230 | decay_mult: 1
231 | }
232 | convolution_param {
233 | bias_term: false
234 | num_output: 512
235 | pad: 1
236 | kernel_size: 3
237 | stride: 1
238 | }
239 | }
240 | layer {
241 | name: "res49_conv2_scale"
242 | type: "Scale"
243 | bottom: "res49_conv2"
244 | top: "res49_conv2"
245 | scale_param {
246 | bias_term: true
247 | }
248 | param {
249 | lr_mult: 0.0
250 | decay_mult: 0.0
251 | }
252 | param {
253 | lr_mult: 0.0
254 | decay_mult: 0.0
255 | }
256 | }
257 | layer {
258 | name: "res49_conv2_relu"
259 | type: "ReLU"
260 | bottom: "res49_conv2"
261 | top: "res49_conv2"
262 | }
263 | layer {
264 | name: "res49_conv3"
265 | type: "Convolution"
266 | bottom: "res49_conv2"
267 | top: "res49_conv3"
268 | param {
269 | lr_mult: 1
270 | decay_mult: 1
271 | }
272 | convolution_param {
273 | bias_term: false
274 | num_output: 2048
275 | pad: 0
276 | kernel_size: 1
277 | stride: 1
278 | }
279 | }
280 | layer {
281 | name: "res49_eletwise"
282 | type: "Eltwise"
283 | bottom: "res48_eletwise"
284 | bottom: "res49_conv3"
285 | top: "res49_eletwise"
286 | eltwise_param {
287 | operation: SUM
288 | }
289 | }
290 |
291 | layer {
292 | name: "res50_scale"
293 | type: "Scale"
294 | bottom: "res49_eletwise"
295 | top: "res50_scale"
296 | scale_param {
297 | bias_term: true
298 | }
299 | param {
300 | lr_mult: 0.0
301 | decay_mult: 0.0
302 | }
303 | param {
304 | lr_mult: 0.0
305 | decay_mult: 0.0
306 | }
307 | }
308 | layer {
309 | name: "res50_relu"
310 | type: "ReLU"
311 | bottom: "res50_scale"
312 | top: "res50_scale"
313 | }
314 | layer {
315 | name: "res50_conv1"
316 | type: "Convolution"
317 | bottom: "res50_scale"
318 | top: "res50_conv1"
319 | param {
320 | lr_mult: 1
321 | decay_mult: 1
322 | }
323 | convolution_param {
324 | bias_term: false
325 | num_output: 512
326 | pad: 0
327 | kernel_size: 1
328 | stride: 1
329 | }
330 | }
331 |
332 | layer {
333 | name: "res50_conv1_scale"
334 | type: "Scale"
335 | bottom: "res50_conv1"
336 | top: "res50_conv1"
337 | scale_param {
338 | bias_term: true
339 | }
340 | param {
341 | lr_mult: 0.0
342 | decay_mult: 0.0
343 | }
344 | param {
345 | lr_mult: 0.0
346 | decay_mult: 0.0
347 | }
348 | }
349 | layer {
350 | name: "res50_conv1_relu"
351 | type: "ReLU"
352 | bottom: "res50_conv1"
353 | top: "res50_conv1"
354 | }
355 | layer {
356 | name: "res50_conv2"
357 | type: "Convolution"
358 | bottom: "res50_conv1"
359 | top: "res50_conv2"
360 | param {
361 | lr_mult: 1
362 | decay_mult: 1
363 | }
364 | convolution_param {
365 | bias_term: false
366 | num_output: 512
367 | pad: 1
368 | kernel_size: 3
369 | stride: 1
370 | }
371 | }
372 |
373 | layer {
374 | name: "res50_conv2_scale"
375 | type: "Scale"
376 | bottom: "res50_conv2"
377 | top: "res50_conv2"
378 | scale_param {
379 | bias_term: true
380 | }
381 | param {
382 | lr_mult: 0.0
383 | decay_mult: 0.0
384 | }
385 | param {
386 | lr_mult: 0.0
387 | decay_mult: 0.0
388 | }
389 | }
390 | layer {
391 | name: "res50_conv2_relu"
392 | type: "ReLU"
393 | bottom: "res50_conv2"
394 | top: "res50_conv2"
395 | }
396 | layer {
397 | name: "res50_conv3"
398 | type: "Convolution"
399 | bottom: "res50_conv2"
400 | top: "res50_conv3"
401 | param {
402 | lr_mult: 1
403 | decay_mult: 1
404 | }
405 | convolution_param {
406 | bias_term: false
407 | num_output: 2048
408 | pad: 0
409 | kernel_size: 1
410 | stride: 1
411 | }
412 | }
413 | layer {
414 | name: "res50_eletwise"
415 | type: "Eltwise"
416 | bottom: "res49_eletwise"
417 | bottom: "res50_conv3"
418 | top: "res50_eletwise"
419 | eltwise_param {
420 | operation: SUM
421 | }
422 | }
423 |
424 | layer {
425 | name: "res50_eletwise_scale"
426 | type: "Scale"
427 | bottom: "res50_eletwise"
428 | top: "res50_eletwise_scale"
429 | scale_param {
430 | bias_term: true
431 | }
432 | param {
433 | lr_mult: 0.0
434 | decay_mult: 0.0
435 | }
436 | param {
437 | lr_mult: 0.0
438 | decay_mult: 0.0
439 | }
440 | }
441 | layer {
442 | name: "res50_eletwise_relu"
443 | type: "ReLU"
444 | bottom: "res50_eletwise_scale"
445 | top: "res50_eletwise_scale"
446 | }
447 | layer {
448 | name: "pool5"
449 | type: "Pooling"
450 | bottom: "res50_eletwise_scale"
451 | top: "pool5"
452 | pooling_param {
453 | pool: AVE
454 | global_pooling: true
455 | }
456 | }
457 | layer {
458 | name: "cls_score"
459 | type: "InnerProduct"
460 | bottom: "pool5"
461 | top: "cls_score"
462 | param {
463 | lr_mult: 1
464 | decay_mult: 1
465 | }
466 | param {
467 | lr_mult: 2
468 | decay_mult: 0
469 | }
470 | inner_product_param {
471 | num_output: 21
472 | weight_filler {
473 | type: "msra"
474 | std: 0.01
475 | }
476 | bias_filler {
477 | type: "constant"
478 | value: 0
479 | }
480 | }
481 | }
482 | layer {
483 | name: "bbox_pred"
484 | type: "InnerProduct"
485 | bottom: "pool5"
486 | top: "bbox_pred"
487 | param {
488 | lr_mult: 1
489 | decay_mult: 1
490 | }
491 | param {
492 | lr_mult: 2
493 | decay_mult: 0
494 | }
495 | inner_product_param {
496 | num_output: 84
497 | weight_filler {
498 | type: "msra"
499 | std: 0.01
500 | }
501 | bias_filler {
502 | type: "constant"
503 | value: 0
504 | }
505 | }
506 | }
507 | layer {
508 | name: "cls_prob"
509 | type: "Softmax"
510 | bottom: "cls_score"
511 | top: "cls_prob"
512 | }
513 |
514 |
--------------------------------------------------------------------------------