├── cls
    ├── accuracy.png
    ├── inception
    │   └── README.md
    ├── resnet-v2
    │   └── README.md
    ├── cls_lite
    │   └── README.md
    ├── vgg
    │   ├── deploy_vgg13-pytorch.prototxt
    │   ├── deploy_vgg16-5x.prototxt
    │   ├── deploy_vgg16-dsd.prototxt
    │   ├── deploy_vgg16-pytorch.prototxt
    │   ├── deploy_vgg16-tf.prototxt
    │   ├── deploy_vgg19-pytorch.prototxt
    │   └── deploy_vgg13bn-pytorch.prototxt
    ├── evaluation_cls.py
    └── README.md
├── .gitmodules
├── det
    ├── rfcn
    │   ├── README.md
    │   ├── models
    │   │   ├── pascal_voc
    │   │   │   ├── solver.prototxt
    │   │   │   ├── air101
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_rfcn_voc_air101-merge.prototxt
    │   │   │   ├── resnet101-v2
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_rfcn_voc_resnet101-v2-merge.prototxt
    │   │   │   ├── resnet18
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_rfcn_voc_resnet18-priv-merge.prototxt
    │   │   │   ├── inception-v4
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_rfcn_voc_inception-v4-merge-aligned.prototxt
    │   │   │   ├── resnext101-32x4d
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_rfcn_voc_resnext101-32x4d-merge.prototxt
    │   │   │   ├── resnext101-64x4d
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_rfcn_voc_resnext101-64x4d-merge.prototxt
    │   │   │   ├── se-inception-v2
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_rfcn_voc_se-inception-v2-merge.prototxt
    │   │   │   └── resnext26-32x4d
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │       └── rcnn_deploy_rfcn_voc_resnext26-32x4d-priv-merge.prototxt
    │   │   └── coco
    │   │   │   ├── air101
    │   │   │       └── rpn_rcnn_deploys
    │   │   │       │   └── rcnn_deploy_rfcn_coco_air101-merge.prototxt
    │   │   │   ├── inception-v4
    │   │   │       └── rpn_rcnn_deploys
    │   │   │       │   └── rcnn_deploy_rfcn_coco_inception-v4-merge-aligned.prototxt
    │   │   │   └── resnext101-32x4d
    │   │   │       └── rpn_rcnn_deploys
    │   │   │           └── rcnn_deploy_rfcn_coco_resnext101-32x4d-merge.prototxt
    │   ├── experiments
    │   │   └── cfgs
    │   │   │   ├── rfcn_end2end.yml
    │   │   │   └── rfcn_end2end_ohem.yml
    │   └── tools
    │   │   ├── train_net_multi_gpu.py
    │   │   └── score.py
    ├── faster_rcnn
    │   ├── models
    │   │   ├── pascal_voc
    │   │   │   ├── solver.prototxt
    │   │   │   ├── airx101-32x4d
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_faster_voc_airx101-32x4d-merge-fc2-ohem-multigrid.prototxt
    │   │   │   ├── 2007test400.txt
    │   │   │   ├── resnet18
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_faster_voc_resnet18-priv-merge.prototxt
    │   │   │   ├── mobilenet
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_faster_voc_mobilenet-dw.prototxt
    │   │   │   ├── xception
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_faster_voc_xception-dw-merge-aligned.prototxt
    │   │   │   ├── resnet38a
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_faster_voc_resnet38a-merge.prototxt
    │   │   │   ├── resnet101-v2
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │   │   └── rcnn_deploy_faster_voc_resnet101-v2-merge.prototxt
    │   │   │   └── resnet152-v2
    │   │   │   │   └── rpn_rcnn_deploys
    │   │   │   │       └── rcnn_deploy_faster_voc_resnet152-v2-merge.prototxt
    │   │   └── coco
    │   │   │   ├── air101
    │   │   │       └── rpn_rcnn_deploys
    │   │   │       │   └── rcnn_deploy_faster_voc_air101-merge-fc2-multigrid.prototxt
    │   │   │   └── inception-v4
    │   │   │       └── rpn_rcnn_deploys
    │   │   │           └── rcnn_deploy_faster_coco_inception-v4-merge-aligned-fpn.prototxt
    │   ├── experiments
    │   │   └── cfgs
    │   │   │   ├── faster_rcnn_end2end.yml
    │   │   │   └── faster_rcnn_end2end_ohem.yml
    │   ├── README.md
    │   └── tools
    │   │   ├── train_net_multi_gpu.py
    │   │   └── score.py
    ├── MSCOCO_Benchmark.md
    ├── README.md
    └── VOC_Benchmark.md
├── LICENSE
├── seg
    ├── pspnet
    │   └── tools
    │   │   ├── train_net_multi.py
    │   │   └── image_seg_data.py
    ├── score_seg.py
    ├── README.md
    └── evaluation_seg.py
└── README.md


/cls/accuracy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/soeaver/caffe-model/HEAD/cls/accuracy.png


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "pypriv"]
2 | 	path = pypriv
3 | 	url = https://github.com/soeaver/pypriv.git
4 | 


--------------------------------------------------------------------------------
/det/rfcn/README.md:
--------------------------------------------------------------------------------
1 | ## RFCN 
2 | ### Training rfcn networks on pascal voc
3 | 
4 | 1.Download the network weights trained on imagenet.
5 | 


--------------------------------------------------------------------------------
/cls/inception/README.md:
--------------------------------------------------------------------------------
1 | ### Note
2 | We adopt [convolution depthwise layer](https://github.com/BVLC/caffe/pull/5665/files) in deploy_xception-dw.prototxt for speeding up.
3 | 


--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "./rfcn_voc_resnet101-v2-merge.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "multistep"
 4 | gamma: 0.1
 5 | # stepsize: 30000
 6 | stepvalue: 80000
 7 | # stepvalue: 70000
 8 | display: 20
 9 | average_loss: 100
10 | # iter_size: 1
11 | momentum: 0.9
12 | weight_decay: 0.0001
13 | # We disable standard caffe solver snapshotting and implement our own snapshot
14 | # function
15 | snapshot: 0
16 | # We still use the snapshot prefix, though
17 | snapshot_prefix: "rfcn_voc_resnet101-v2"
18 | iter_size: 1
19 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "~/caffe-model/det/faster_rcnn/models/pascal_voc/resnet101-v2/faster_voc_resnet101-v2-merge.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "multistep"
 4 | gamma: 0.1
 5 | stepvalue: 50000
 6 | display: 20
 7 | average_loss: 100
 8 | 
 9 | momentum: 0.9
10 | weight_decay: 0.0001
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "faster_voc_resnet101-v2"
16 | iter_size: 1
17 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml:
--------------------------------------------------------------------------------
 1 | ROOT_DIR: ~/caffe-model/det/faster_rcnn/models/pascal_voc/resnet101-v2
 2 | EXP_DIR: faster_rcnn_end2end
 3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]]
 4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]]
 5 | TRAIN:
 6 |   SNAPSHOT_INFIX: 'ss'
 7 |   SNAPSHOT_ITERS: 10000
 8 |   # SCALES: [400, 600, 800, 1000, 1200] # for multi-scale training
 9 |   SCALES: [600]
10 |   MAX_SIZE: 1000  
11 |   HAS_RPN: True
12 |   IMS_PER_BATCH: 1
13 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
14 |   RPN_POSITIVE_OVERLAP: 0.7
15 |   RPN_BATCHSIZE: 256
16 |   BATCH_SIZE: 128
17 |   PROPOSAL_METHOD: gt
18 |   BG_THRESH_LO: 0.0
19 |   ASPECT_GROUPING: True
20 | TEST:
21 |   HAS_RPN: True
22 |   SCALES: [600]
23 |   MAX_SIZE: 1000
24 | 
25 | 


--------------------------------------------------------------------------------
/cls/resnet-v2/README.md:
--------------------------------------------------------------------------------
 1 | ### Resnet-v2
 2 | 
 3 | At present, We have not finished the generator scripts of resnet-v2 yet. Maybe [ResNet_with_IdentityMapping](https://github.com/MichaelHunson/ResNet_with_IdentityMapping) is useful.
 4 | 
 5 | The detail is described in the paper **Identity Mappings in Deep Residual Networks** (https://arxiv.org/abs/1603.05027).
 6 | 
 7 | The caffe models are converted from **craftGBD** (https://github.com/craftGBD/craftGBD). 
 8 | Models in craftGBD are different in BN layer, we manually converted the modified 'bn_layer' to offical 'batch_norm_layer and scale_layer'.
 9 | 
10 | ### Notes
11 | - I appreciate **craftGBD** (https://github.com/craftGBD/craftGBD) for training the models.
12 | - There are some differences in layer naming with craftGBD version.
13 | 


--------------------------------------------------------------------------------
/det/rfcn/experiments/cfgs/rfcn_end2end.yml:
--------------------------------------------------------------------------------
 1 | ROOT_DIR: ~/caffe-model/det/rfcn/models/pascal_voc/resnet101-v2/ss 
 2 | EXP_DIR:  rfcn_end2end
 3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]]
 4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]]
 5 | TRAIN:
 6 |   SNAPSHOT_INFIX: 'ss'
 7 |   SNAPSHOT_ITERS: 10000
 8 |   # SCALES: [200, 400, 600, 800]
 9 |   SCALES: [600]
10 |   MAX_SIZE: 1000  
11 |   HAS_RPN: True
12 |   IMS_PER_BATCH: 1
13 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
14 |   RPN_POSITIVE_OVERLAP: 0.7
15 |   # ONLY_INSIDE_ANCHORS: False
16 |   RPN_BATCHSIZE: 256
17 |   BATCH_SIZE: 128
18 |   PROPOSAL_METHOD: gt
19 |   BG_THRESH_LO: 0.1
20 |   AGNOSTIC: True
21 |   RPN_PRE_NMS_TOP_N: 6000
22 |   RPN_POST_NMS_TOP_N: 300
23 | TEST:
24 |   HAS_RPN: True
25 |   SCALES: [600]
26 |   MAX_SIZE: 1000
27 | 
28 | 


--------------------------------------------------------------------------------
/det/rfcn/experiments/cfgs/rfcn_end2end_ohem.yml:
--------------------------------------------------------------------------------
 1 | ROOT_DIR: ~/caffe-model/det/rfcn/models/pascal_voc/resnet101-v2/ss-ohem
 2 | EXP_DIR:  rfcn_end2end
 3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]]
 4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]]
 5 | TRAIN:
 6 |   SNAPSHOT_INFIX: 'ss-ohem'
 7 |   SNAPSHOT_ITERS: 10000
 8 |   # SCALES: [200, 400, 600, 800]
 9 |   SCALES: [600]
10 |   MAX_SIZE: 1000  
11 |   HAS_RPN: True
12 |   IMS_PER_BATCH: 1 
13 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
14 |   RPN_POSITIVE_OVERLAP: 0.7
15 |   RPN_NORMALIZE_TARGETS: True
16 |   # ONLY_INSIDE_ANCHORS: False
17 |   RPN_BATCHSIZE: 256
18 |   BATCH_SIZE: -1
19 |   PROPOSAL_METHOD: gt
20 |   BG_THRESH_LO: 0.0
21 |   AGNOSTIC: True
22 |   RPN_PRE_NMS_TOP_N: 6000
23 |   RPN_POST_NMS_TOP_N: 300
24 | TEST:
25 |   HAS_RPN: True
26 |   SCALES: [600]
27 |   MAX_SIZE: 1000
28 | 
29 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end_ohem.yml:
--------------------------------------------------------------------------------
 1 | ROOT_DIR: ~/caffe-model/det/faster_rcnn/models/pascal_voc/resnet101-v2
 2 | EXP_DIR: faster_rcnn_end2end
 3 | PIXEL_MEANS: [[[102.98, 115.947, 122.772]]]
 4 | PIXEL_STDS: [[[1.0, 1.0, 1.0]]]
 5 | TRAIN:
 6 |   SNAPSHOT_INFIX: 'ss-ohem'
 7 |   SNAPSHOT_ITERS: 10000
 8 |   # SCALES: [400, 600, 800, 1000, 1200] # for multi-scale training
 9 |   SCALES: [600]
10 |   MAX_SIZE: 1000  
11 |   HAS_RPN: True
12 |   IMS_PER_BATCH: 1
13 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
14 |   RPN_POSITIVE_OVERLAP: 0.7
15 |   RPN_NORMALIZE_TARGETS: True
16 |   # ONLY_INSIDE_ANCHORS: False
17 |   RPN_BATCHSIZE: 256
18 |   BATCH_SIZE: -1
19 |   RPN_PRE_NMS_TOP_N: 6000
20 |   RPN_POST_NMS_TOP_N: 300
21 |   PROPOSAL_METHOD: gt
22 |   BG_THRESH_LO: 0.0
23 | TEST:
24 |   HAS_RPN: True
25 |   SCALES: [600]
26 |   MAX_SIZE: 1000
27 | 
28 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | ## Faster RCNN
 2 | ### Training faster rcnn networks on pascal voc
 3 | 
 4 | 1.Download the network weights trained on imagenet.
 5 | 
 6 | 
 7 | 2.Modify solver file
 8 |   ```
 9 |   caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt
10 |   ```
11 |  - You need modify 'train_net' and 'snapshot_prefix' to the correct path or name.
12 |   
13 |   
14 | 3.Modify yml file
15 |   ```
16 |   caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml
17 |   ```
18 |  - The faster rcnn models will saved in '{ROOT_DIR}/output/{EXP_DIR}/{imdb.name}/' folder.
19 | 
20 | 
21 | 4.Training
22 |   ```
23 |     python train_net_multi_gpu.py --gpu_id 0,1 --solver ~/caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt --iters 80000 --weights ~/caffe-model/cls/ilsvrc/resnet-v2/resnet101-v2/resnet101-v2_merge.caffemodel --cfg ~/caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml --imdb voc_0712_trainval
24 |   ```
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 soeaver Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/det/rfcn/models/coco/air101/rpn_rcnn_deploys/rcnn_deploy_rfcn_coco_air101-merge.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 3969
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 81
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/air101/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_air101-merge.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 1029
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 21
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/resnet101-v2/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnet101-v2-merge.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 1029
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 21
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/resnet18/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnet18-priv-merge.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 1029
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 21
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/det/rfcn/models/coco/inception-v4/rpn_rcnn_deploys/rcnn_deploy_rfcn_coco_inception-v4-merge-aligned.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 3969
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 81
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/det/rfcn/models/coco/resnext101-32x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_coco_resnext101-32x4d-merge.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 3969
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 81
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/inception-v4/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_inception-v4-merge-aligned.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 1029
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 21
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/resnext101-32x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnext101-32x4d-merge.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 1029
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 21
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/resnext101-64x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnext101-64x4d-merge.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 1029
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 21
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/se-inception-v2/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_se-inception-v2-merge.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 1029
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 21
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/det/rfcn/models/pascal_voc/resnext26-32x4d/rpn_rcnn_deploys/rcnn_deploy_rfcn_voc_resnext26-32x4d-priv-merge.prototxt:
--------------------------------------------------------------------------------
 1 | input: "rfcn_cls"
 2 | input_shape {
 3 |   dim: 1
 4 |   dim: 1029
 5 |   dim: 40
 6 |   dim: 40
 7 | }
 8 | 
 9 | input: "rfcn_bbox"
10 | input_shape {
11 |   dim: 1
12 |   dim: 392
13 |   dim: 40
14 |   dim: 40
15 | }
16 | 
17 | input: "rois"
18 | input_shape {
19 |   dim: 300
20 |   dim: 5
21 | }
22 | 
23 | 
24 | #======= position sensitive RoI pooling ========
25 | layer {
26 |   bottom: "rfcn_cls"
27 |   bottom: "rois"
28 |   top: "psroipooled_cls_rois"
29 |   name: "psroipooled_cls_rois"
30 |   type: "PSROIPooling"
31 |   psroi_pooling_param {
32 |     spatial_scale: 0.0625
33 |     output_dim: 21
34 |     group_size: 7
35 |   }
36 | }
37 | layer {
38 |   bottom: "psroipooled_cls_rois"
39 |   top: "cls_score"
40 |   name: "ave_cls_score_rois"
41 |   type: "Pooling"
42 |   pooling_param {
43 |     pool: AVE
44 |     global_pooling: true
45 |   }
46 | }
47 | layer {
48 |   bottom: "rfcn_bbox"
49 |   bottom: "rois"
50 |   top: "psroipooled_loc_rois"
51 |   name: "psroipooled_loc_rois"
52 |   type: "PSROIPooling"
53 |   psroi_pooling_param {
54 |     spatial_scale: 0.0625
55 |     output_dim: 8
56 |     group_size: 7
57 |   }
58 | }
59 | layer {
60 |   bottom: "psroipooled_loc_rois"
61 |   top: "bbox_pred"
62 |   name: "ave_bbox_pred_rois"
63 |   type: "Pooling"
64 |   pooling_param {
65 |     pool: AVE
66 |     global_pooling: true
67 |   }
68 | }
69 | layer {
70 |   name: "cls_prob"
71 |   type: "Softmax"
72 |   bottom: "cls_score"
73 |   top: "cls_prob"
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/seg/pspnet/tools/train_net_multi.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from multiprocessing import Process
 3 | 
 4 | # sys.setrecursionlimit(100000)
 5 | 
 6 | sys.path.append('/home/prmct/workspace/py-RFCN-priv/caffe-priv/python')
 7 | # sys.path.append('/home/yanglu/workspace/py-R-FCN-multiGPU-master-0619/caffe/python')
 8 | import caffe
 9 | 
10 | # _snapshot='./aug_single_resnet101_iter_5000.solverstate'
11 | _weights = '/home/prmct/Program/classification/ilsvrc/resnet_v2/resnet101_v2/resnet101_v2_merge_bn_scale.caffemodel'
12 | 
13 | solver_prototxt = './solver.prototxt'
14 | gpus = [0,1,2,3]
15 | max_iter = 200000
16 | 
17 | def solve(proto, gpus, uid, rank, max_iter):
18 |     caffe.set_mode_gpu()
19 |     caffe.set_device(gpus[rank])
20 |     caffe.set_solver_count(len(gpus))
21 |     caffe.set_solver_rank(rank)
22 |     caffe.set_multiprocess(True)
23 | 
24 |     solver = caffe.SGDSolver(proto)
25 |     if rank == 0:
26 |         # solver.restore(_snapshot)
27 |         solver.net.copy_from(_weights)
28 |     
29 |     solver.net.layers[0].get_gpu_id(gpus[rank])
30 | 
31 |     nccl = caffe.NCCL(solver, uid)
32 |     nccl.bcast()
33 |     solver.add_callback(nccl)
34 | 
35 |     if solver.param.layer_wise_reduce:
36 |         solver.net.after_backward(nccl)
37 | 
38 |     for _ in range(max_iter):
39 |         solver.step(1)
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     uid = caffe.NCCL.new_uid()
44 |     caffe.init_log()
45 |     caffe.log('Using devices %s' % str(gpus))
46 |     procs = []
47 | 
48 |     for rank in range(len(gpus)):
49 |         p = Process(target=solve,
50 |                     args=(solver_prototxt, gpus, uid, rank, max_iter))
51 |         p.daemon = False
52 |         p.start()
53 |         procs.append(p)
54 |     for p in procs:
55 |         p.join()
56 | 
57 | 


--------------------------------------------------------------------------------
/seg/score_seg.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | gt_root = '/home/prmct/Database/VOC_PASCAL/VOC2012_test/SegmentationClassAug/'
 5 | pre_root = './predict/'
 6 | val_pth = './val.txt' 
 7 | n_class = 21
 8 | 
 9 | 
10 | def fast_hist(a, b, n):
11 |     k = (a >= 0) & (a < n)
12 |     return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n)
13 | 
14 | 
15 | def compute_hist(val_list):
16 |     hist = np.zeros((n_class, n_class))
17 |     for idx in val_list:
18 |         print idx
19 |         label = cv2.imread(gt_root + idx + '.png', 0)
20 |         gt = label.flatten()
21 |         tmp = cv2.imread(pre_root + idx + '.png', 0)
22 | 
23 |         if label.shape != tmp.shape:
24 |             pre = cv2.resize(tmp, (label.shape[1], label.shape[0]), interpolation=cv2.cv.CV_INTER_NN)
25 |             pre = pre.flatten()
26 |         else:
27 |             pre = tmp.flatten()
28 | 
29 |         hist += fast_hist(gt, pre, n_class)
30 | 
31 |     # return hist[1:, 1:]
32 |     return hist
33 | 
34 | 
35 | def mean_IoU(overall_h):
36 |     iu = np.diag(overall_h) / (overall_h.sum(1) + overall_h.sum(0) - np.diag(overall_h))
37 |     return np.nanmean(iu)
38 | 
39 | 
40 | def per_class_acc(overall_h):
41 |     acc = np.diag(overall_h) / overall_h.sum(1)
42 |     return np.nanmean(acc)
43 | 
44 | 
45 | def pixel_wise_acc(overall_h):
46 |     return np.diag(overall_h).sum() / overall_h.sum()
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     val_list = []
51 | 
52 |     f = open(val_pth, 'r')
53 |     for i in f:
54 |         val_list.append(i.strip().split(' ')[-1].split('/')[-1])
55 | 
56 |     hist = compute_hist(val_list)
57 | 
58 |     print 'Mean IoU:', mean_IoU(hist)
59 |     print 'Pixel Acc:', np.diag(hist).sum() / hist.sum()
60 |     print 'Mean Acc:', per_class_acc(hist)
61 | 
62 |     # print np.diag(hist).sum() / hist.sum()
63 | 


--------------------------------------------------------------------------------
/seg/README.md:
--------------------------------------------------------------------------------
 1 | ## Object Segmentation
 2 | 
 3 | ### We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)
 4 | we are releasing the training code and files, the models and more experiments will come soon.
 5 | 
 6 | ### Object Segmentation Performance on PASCAL VOC.
 7 | **1. PSPNet training on [SBD](http://home.bharathh.info/pubs/pdfs/BharathICCV2011.pdf) (10,582 images) and testing on VOC 2012 validation (1,449 images).**
 8 | 
 9 |  Network|mIoU(%)|pixel acc(%)|training<br/>speed|training<br/>memory|testing<br/>speed|testing<br/>memory
10 |  :---:|:---:|:---:|:---:|:---:|:---:|:---:
11 |  resnet101-v2| 77.94 | 94.94 | 1.6 img/s | 8,023MB | 3.0 img/s | 4,071MB
12 |  resnet101-v2-selu| 77.10 | 94.80 | 1.6 img/s | 8,017MB | 3.0 img/s | 4,065MB
13 |  resnext101-32x4d| 77.79 | 94.92 | 1.3 img/s | 8,891MB | 2.6 img/s | 5,241MB
14 |  air101| 77.64 | 94.93 | 1.3 img/s | 10,017MB | 2.5 img/s | 5,241MB
15 |  inception-v4| 77.58 | 94.83 | -- img/s | --MB | -- img/s | --MB
16 |  se-resnet50| 75.80 | 94.30 | -- img/s | --MB | -- img/s | --MB
17 |  - To reduce memory usage, we merge all the models batchnorm layer parameters into scale layer, more details please refer to [faster-rcnn-resnet](https://github.com/Eniac-Xie/faster-rcnn-resnet#modification) or [pva-faster-rcnn](https://github.com/sanghoon/pva-faster-rcnn/blob/master/tools/gen_merged_model.py);
18 |  - PSP module without batch normlization, the kernel_size of avepooling is 64, 32, 16 and 8 respectively;
19 |  - All the models use 513x513 input with random crop, multi-scale traing (0.75x, 1.0x, 1.25x, 1.5x, 2.0x) and horizantal flipping;
20 |  - The training and testing speed is calculated on a single Nvidia Titan pascal GPU with batch_size=1;
21 |  - Training batch_size=16 for 2,0000 iterations, base_lr=0.001 with 'poly' learning rate policy (power=0.9);
22 |  - Testing with single scale, base_size=555 and crop_size=513, no flipping, no crf;
23 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/models/coco/air101/rpn_rcnn_deploys/rcnn_deploy_faster_voc_air101-merge-fc2-multigrid.prototxt:
--------------------------------------------------------------------------------
  1 | input: "conv_new_1"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 256
  5 |   dim: 40
  6 |   dim: 40
  7 | }
  8 | 
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 300
 12 |   dim: 5
 13 | }
 14 | 
 15 | #============== RCNN ===============
 16 | layer {
 17 |   bottom: "conv_new_1"
 18 |   bottom: "rois"
 19 |   top: "roi_pool"
 20 |   name: "roi_pool"
 21 |   type: "ROIPooling"
 22 |   roi_pooling_param {
 23 |     pooled_w: 7
 24 |     pooled_h: 7
 25 |     spatial_scale: 0.062500
 26 |   }
 27 | }
 28 | layer {
 29 |   name: "fc1"
 30 |   type: "InnerProduct"
 31 |   bottom: "roi_pool"
 32 |   top: "fc1"
 33 |   param {
 34 |     lr_mult: 1
 35 |     decay_mult: 1
 36 |   }
 37 |   param {
 38 |     lr_mult: 2
 39 |     decay_mult: 0
 40 |   }
 41 |   inner_product_param {
 42 |     num_output: 1024
 43 |     weight_filler {
 44 |       type: "gaussian"
 45 |       std: 0.01
 46 |     }
 47 |     bias_filler {
 48 |       type: "constant"
 49 |       value: 0
 50 |     }
 51 |   }
 52 | }
 53 | layer {
 54 |   name: "fc1_relu"
 55 |   type: "ReLU"
 56 |   bottom: "fc1"
 57 |   top: "fc1"
 58 | }
 59 | layer {
 60 |   name: "fc2"
 61 |   type: "InnerProduct"
 62 |   bottom: "fc1"
 63 |   top: "fc2"
 64 |   param {
 65 |     lr_mult: 1
 66 |     decay_mult: 1
 67 |   }
 68 |   param {
 69 |     lr_mult: 2
 70 |     decay_mult: 0
 71 |   }
 72 |   inner_product_param {
 73 |     num_output: 1024
 74 |     weight_filler {
 75 |       type: "gaussian"
 76 |       std: 0.01
 77 |     }
 78 |     bias_filler {
 79 |       type: "constant"
 80 |       value: 0
 81 |     }
 82 |   }
 83 | }
 84 | layer {
 85 |   name: "fc2_relu"
 86 |   type: "ReLU"
 87 |   bottom: "fc2"
 88 |   top: "fc2"
 89 | }
 90 | layer {
 91 |   name: "cls_score"
 92 |   type: "InnerProduct"
 93 |   bottom: "fc2"
 94 |   top: "cls_score"
 95 |   param {
 96 |     lr_mult: 1
 97 |     decay_mult: 1
 98 |   }
 99 |   param {
100 |     lr_mult: 2
101 |     decay_mult: 0
102 |   }
103 |   inner_product_param {
104 |     num_output: 81
105 |     weight_filler {
106 |       type: "msra"
107 |       std: 0.01
108 |     }
109 |     bias_filler {
110 |       type: "constant"
111 |       value: 0
112 |     }
113 |   }
114 | }
115 | layer {
116 |   name: "bbox_pred"
117 |   type: "InnerProduct"
118 |   bottom: "fc2"
119 |   top: "bbox_pred"
120 |   param {
121 |     lr_mult: 1
122 |     decay_mult: 1
123 |   }
124 |   param {
125 |     lr_mult: 2
126 |     decay_mult: 0
127 |   }
128 |   inner_product_param {
129 |     num_output: 324
130 |     weight_filler {
131 |       type: "msra"
132 |       std: 0.01
133 |     }
134 |     bias_filler {
135 |       type: "constant"
136 |       value: 0
137 |     }
138 |   }
139 | }
140 | layer {
141 |   name: "cls_prob"
142 |   type: "Softmax"
143 |   bottom: "cls_score"
144 |   top: "cls_prob"
145 | }
146 | 
147 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/airx101-32x4d/rpn_rcnn_deploys/rcnn_deploy_faster_voc_airx101-32x4d-merge-fc2-ohem-multigrid.prototxt:
--------------------------------------------------------------------------------
  1 | input: "conv_new_1"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 256
  5 |   dim: 40
  6 |   dim: 40
  7 | }
  8 | 
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 300
 12 |   dim: 5
 13 | }
 14 | 
 15 | #============== RCNN ===============
 16 | layer {
 17 |   bottom: "conv_new_1"
 18 |   bottom: "rois"
 19 |   top: "roi_pool"
 20 |   name: "roi_pool"
 21 |   type: "ROIPooling"
 22 |   roi_pooling_param {
 23 |     pooled_w: 7
 24 |     pooled_h: 7
 25 |     spatial_scale: 0.062500
 26 |   }
 27 | }
 28 | layer {
 29 |   name: "fc1"
 30 |   type: "InnerProduct"
 31 |   bottom: "roi_pool"
 32 |   top: "fc1"
 33 |   param {
 34 |     lr_mult: 1
 35 |     decay_mult: 1
 36 |   }
 37 |   param {
 38 |     lr_mult: 2
 39 |     decay_mult: 0
 40 |   }
 41 |   inner_product_param {
 42 |     num_output: 1024
 43 |     weight_filler {
 44 |       type: "gaussian"
 45 |       std: 0.01
 46 |     }
 47 |     bias_filler {
 48 |       type: "constant"
 49 |       value: 0
 50 |     }
 51 |   }
 52 | }
 53 | layer {
 54 |   name: "fc1_relu"
 55 |   type: "ReLU"
 56 |   bottom: "fc1"
 57 |   top: "fc1"
 58 | }
 59 | layer {
 60 |   name: "fc2"
 61 |   type: "InnerProduct"
 62 |   bottom: "fc1"
 63 |   top: "fc2"
 64 |   param {
 65 |     lr_mult: 1
 66 |     decay_mult: 1
 67 |   }
 68 |   param {
 69 |     lr_mult: 2
 70 |     decay_mult: 0
 71 |   }
 72 |   inner_product_param {
 73 |     num_output: 1024
 74 |     weight_filler {
 75 |       type: "gaussian"
 76 |       std: 0.01
 77 |     }
 78 |     bias_filler {
 79 |       type: "constant"
 80 |       value: 0
 81 |     }
 82 |   }
 83 | }
 84 | layer {
 85 |   name: "fc2_relu"
 86 |   type: "ReLU"
 87 |   bottom: "fc2"
 88 |   top: "fc2"
 89 | }
 90 | layer {
 91 |   name: "cls_score"
 92 |   type: "InnerProduct"
 93 |   bottom: "fc2"
 94 |   top: "cls_score"
 95 |   param {
 96 |     lr_mult: 1
 97 |     decay_mult: 1
 98 |   }
 99 |   param {
100 |     lr_mult: 2
101 |     decay_mult: 0
102 |   }
103 |   inner_product_param {
104 |     num_output: 21
105 |     weight_filler {
106 |       type: "msra"
107 |       std: 0.01
108 |     }
109 |     bias_filler {
110 |       type: "constant"
111 |       value: 0
112 |     }
113 |   }
114 | }
115 | layer {
116 |   name: "bbox_pred"
117 |   type: "InnerProduct"
118 |   bottom: "fc2"
119 |   top: "bbox_pred"
120 |   param {
121 |     lr_mult: 1
122 |     decay_mult: 1
123 |   }
124 |   param {
125 |     lr_mult: 2
126 |     decay_mult: 0
127 |   }
128 |   inner_product_param {
129 |     num_output: 84
130 |     weight_filler {
131 |       type: "msra"
132 |       std: 0.01
133 |     }
134 |     bias_filler {
135 |       type: "constant"
136 |       value: 0
137 |     }
138 |   }
139 | }
140 | layer {
141 |   name: "cls_prob"
142 |   type: "Softmax"
143 |   bottom: "cls_score"
144 |   top: "cls_prob"
145 | }
146 | 
147 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/models/coco/inception-v4/rpn_rcnn_deploys/rcnn_deploy_faster_coco_inception-v4-merge-aligned-fpn.prototxt:
--------------------------------------------------------------------------------
  1 | input: "p2_elewise"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 256
  5 |   dim: 40
  6 |   dim: 40
  7 | }
  8 | 
  9 | input: "p3_elewise"
 10 | input_shape {
 11 |   dim: 1
 12 |   dim: 256
 13 |   dim: 40
 14 |   dim: 40
 15 | }
 16 | 
 17 | input: "p4_elewise"
 18 | input_shape {
 19 |   dim: 1
 20 |   dim: 256
 21 |   dim: 40
 22 |   dim: 40
 23 | }
 24 | 
 25 | input: "p5_conv_1x1"
 26 | input_shape {
 27 |   dim: 1
 28 |   dim: 256
 29 |   dim: 40
 30 |   dim: 40
 31 | }
 32 | 
 33 | input: "rois"
 34 | input_shape {
 35 |   dim: 300
 36 |   dim: 5
 37 | }
 38 | 
 39 | #============== ROI Pooling ===============
 40 | layer {
 41 |   name: "roi_pool4"
 42 |   type: "ROIPooling"
 43 |   bottom: "p2_elewise"
 44 |   bottom: "rois"
 45 |   top: "roi_pool4"
 46 |   roi_pooling_param {
 47 |     pooled_w: 8
 48 |     pooled_h: 8
 49 |     spatial_scale: 0.25
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "roi_pool8"
 54 |   type: "ROIPooling"
 55 |   bottom: "p3_elewise"
 56 |   bottom: "rois"
 57 |   top: "roi_pool8"
 58 |   roi_pooling_param {
 59 |     pooled_w: 8
 60 |     pooled_h: 8
 61 |     spatial_scale: 0.125
 62 |   }
 63 | }
 64 | layer {
 65 |   name: "roi_pool16"
 66 |   type: "ROIPooling"
 67 |   bottom: "p4_elewise"
 68 |   bottom: "rois"
 69 |   top: "roi_pool16"
 70 |   roi_pooling_param {
 71 |     pooled_w: 8
 72 |     pooled_h: 8
 73 |     spatial_scale: 0.0625
 74 |   }
 75 | }
 76 | layer {
 77 |   name: "roi_pool32"
 78 |   type: "ROIPooling"
 79 |   bottom: "p5_conv_1x1"
 80 |   bottom: "rois"
 81 |   top: "roi_pool32"
 82 |   roi_pooling_param {
 83 |     pooled_w: 8
 84 |     pooled_h: 8
 85 |     spatial_scale: 0.03125
 86 |   }
 87 | }
 88 | layer {
 89 |   name: "roi_pool"
 90 |   type: "Eltwise"
 91 |   bottom: "roi_pool4"
 92 |   bottom: "roi_pool8"
 93 |   bottom: "roi_pool16"
 94 |   bottom: "roi_pool32"
 95 |   top: "roi_pool"
 96 |   eltwise_param {
 97 |     operation: SUM
 98 |   }
 99 | }
100 | layer {
101 |   name: "fc1"
102 |   type: "InnerProduct"
103 |   bottom: "roi_pool"
104 |   top: "fc1"
105 |   param {
106 |     lr_mult: 1
107 |     decay_mult: 1
108 |   }
109 |   param {
110 |     lr_mult: 2
111 |     decay_mult: 0
112 |   }
113 |   inner_product_param {
114 |     num_output: 1024
115 |     weight_filler {
116 |       type: "gaussian"
117 |       std: 0.01
118 |     }
119 |     bias_filler {
120 |       type: "constant"
121 |       value: 0
122 |     }
123 |   }
124 | }
125 | layer {
126 |   name: "fc1_relu"
127 |   type: "ReLU"
128 |   bottom: "fc1"
129 |   top: "fc1"
130 | }
131 | layer {
132 |   name: "fc2"
133 |   type: "InnerProduct"
134 |   bottom: "fc1"
135 |   top: "fc2"
136 |   param {
137 |     lr_mult: 1
138 |     decay_mult: 1
139 |   }
140 |   param {
141 |     lr_mult: 2
142 |     decay_mult: 0
143 |   }
144 |   inner_product_param {
145 |     num_output: 1024
146 |     weight_filler {
147 |       type: "gaussian"
148 |       std: 0.01
149 |     }
150 |     bias_filler {
151 |       type: "constant"
152 |       value: 0
153 |     }
154 |   }
155 | }
156 | layer {
157 |   name: "fc2_relu"
158 |   type: "ReLU"
159 |   bottom: "fc2"
160 |   top: "fc2"
161 | }
162 | layer {
163 |   name: "cls_score"
164 |   type: "InnerProduct"
165 |   bottom: "fc2"
166 |   top: "cls_score"
167 |   param {
168 |     lr_mult: 1
169 |     decay_mult: 1
170 |   }
171 |   param {
172 |     lr_mult: 2
173 |     decay_mult: 0
174 |   }
175 |   inner_product_param {
176 |     num_output: 81
177 |     weight_filler {
178 |       type: "msra"
179 |       std: 0.01
180 |     }
181 |     bias_filler {
182 |       type: "constant"
183 |       value: 0
184 |     }
185 |   }
186 | }
187 | layer {
188 |   name: "bbox_pred"
189 |   type: "InnerProduct"
190 |   bottom: "fc2"
191 |   top: "bbox_pred"
192 |   param {
193 |     lr_mult: 1
194 |     decay_mult: 1
195 |   }
196 |   param {
197 |     lr_mult: 2
198 |     decay_mult: 0
199 |   }
200 |   inner_product_param {
201 |     num_output: 324
202 |     weight_filler {
203 |       type: "msra"
204 |       std: 0.01
205 |     }
206 |     bias_filler {
207 |       type: "constant"
208 |       value: 0
209 |     }
210 |   }
211 | }
212 | layer {
213 |   name: "cls_prob"
214 |   type: "Softmax"
215 |   bottom: "cls_score"
216 |   top: "cls_prob"
217 | }
218 | 
219 | 
220 | 


--------------------------------------------------------------------------------
/det/rfcn/tools/train_net_multi_gpu.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Written by soeaver
  5 | # Modified version of py-R-FCN-multiGPU
  6 | # --------------------------------------------------------
  7 | 
  8 | """Train a Fast R-CNN network on a region of interest database."""
  9 | 
 10 | # import _init_paths
 11 | import sys
 12 | sys.path.append('~/py-RFCN-priv/caffe-priv/python')
 13 | sys.path.append('~/py-RFCN-priv/lib')
 14 | from fast_rcnn.train_multi_gpu import get_training_roidb, train_net_multi_gpu
 15 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
 16 | from datasets.factory import get_imdb
 17 | import datasets.imdb
 18 | import caffe
 19 | import argparse
 20 | import pprint
 21 | import numpy as np
 22 | 
 23 | 
 24 | def parse_args():
 25 |     """
 26 |     Parse input arguments
 27 |     """
 28 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
 29 |     parser.add_argument("--gpu_id", type=str, 
 30 |                         default='0,1',
 31 |                         help="List of device ids.")
 32 |     parser.add_argument('--solver', dest='solver',
 33 |                         help='solver prototxt',
 34 |                         default='~/caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt', type=str)
 35 |     parser.add_argument('--iters', dest='max_iters',
 36 |                         help='number of iterations to train',
 37 |                         default=80000, type=int)
 38 |     parser.add_argument('--weights', dest='pretrained_model',
 39 |                         help='initialize with pretrained model weights',
 40 |                         default='~/caffe-model/cls/ilsvrc/resnet-v2/resnet101-v2/resnet101-v2_merge.caffemodel', type=str)
 41 |     parser.add_argument('--cfg', dest='cfg_file',
 42 |                         help='optional config file',
 43 |                         default='~/caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml', type=str)
 44 |     parser.add_argument('--imdb', dest='imdb_name',
 45 |                         help='dataset to train on',
 46 |                         default='voc_0712_trainval', type=str)
 47 |     parser.add_argument('--rand', dest='randomize',
 48 |                         help='randomize (do not use a fixed seed)',
 49 |                         action='store_true')
 50 |     parser.add_argument('--set', dest='set_cfgs',
 51 |                         help='set config keys', default=None,
 52 |                         nargs=argparse.REMAINDER)
 53 | 
 54 |     if len(sys.argv) == 1:
 55 |         parser.print_help()
 56 |         sys.exit(1)
 57 | 
 58 |     args = parser.parse_args()
 59 |     return args
 60 | 
 61 | def combined_roidb(imdb_names):
 62 |     def get_roidb(imdb_name):
 63 |         imdb = get_imdb(imdb_name)
 64 |         print 'Loaded dataset `{:s}` for training'.format(imdb.name)
 65 |         imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
 66 |         print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)
 67 |         roidb = get_training_roidb(imdb)
 68 |         return roidb
 69 | 
 70 |     roidbs = [get_roidb(s) for s in imdb_names.split('+')]
 71 |     roidb = roidbs[0]
 72 |     if len(roidbs) > 1:
 73 |         for r in roidbs[1:]:
 74 |             roidb.extend(r)
 75 |         imdb = datasets.imdb.imdb(imdb_names)
 76 |     else:
 77 |         imdb = get_imdb(imdb_names)
 78 |     return imdb, roidb
 79 | 
 80 | if __name__ == '__main__':
 81 |     args = parse_args()
 82 | 
 83 |     print('Called with args:')
 84 |     print(args)
 85 | 
 86 |     if args.cfg_file is not None:
 87 |         cfg_from_file(args.cfg_file)
 88 |     if args.set_cfgs is not None:
 89 |         cfg_from_list(args.set_cfgs)
 90 | 
 91 |     gpu_id = args.gpu_id
 92 |     gpu_list = gpu_id.split(',')
 93 |     gpus = [int(i) for i in gpu_list]
 94 | 
 95 |     print('Using config:')
 96 |     pprint.pprint(cfg)
 97 | 
 98 |     if not args.randomize:
 99 |         # fix the random seeds (numpy and caffe) for reproducibility
100 |         np.random.seed(cfg.RNG_SEED)
101 |         #caffe.set_random_seed(cfg.RNG_SEED)
102 | 
103 |     # set up caffe
104 | 
105 |     imdb, roidb = combined_roidb(args.imdb_name)
106 |     print '{:d} roidb entries'.format(len(roidb))
107 | 
108 |     output_dir = get_output_dir(imdb)
109 |     print 'Output will be saved to `{:s}`'.format(output_dir)
110 | 
111 |     train_net_multi_gpu(args.solver, roidb, output_dir,
112 |               pretrained_model=args.pretrained_model,
113 |               max_iter=args.max_iters, gpus=gpus)
114 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/tools/train_net_multi_gpu.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Written by soeaver
  5 | # Modified version of py-R-FCN-multiGPU
  6 | # --------------------------------------------------------
  7 | 
  8 | """Train a Fast R-CNN network on a region of interest database."""
  9 | 
 10 | # import _init_paths
 11 | import sys
 12 | sys.path.append('~/py-RFCN-priv/caffe-priv/python')
 13 | sys.path.append('~/py-RFCN-priv/lib')
 14 | from fast_rcnn.train_multi_gpu import get_training_roidb, train_net_multi_gpu
 15 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
 16 | from datasets.factory import get_imdb
 17 | import datasets.imdb
 18 | import caffe
 19 | import argparse
 20 | import pprint
 21 | import numpy as np
 22 | 
 23 | 
 24 | def parse_args():
 25 |     """
 26 |     Parse input arguments
 27 |     """
 28 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
 29 |     parser.add_argument("--gpu_id", type=str, 
 30 |                         default='0,1',
 31 |                         help="List of device ids.")
 32 |     parser.add_argument('--solver', dest='solver',
 33 |                         help='solver prototxt',
 34 |                         default='~/caffe-model/det/faster_rcnn/models/pascal_voc/solver.prototxt', type=str)
 35 |     parser.add_argument('--iters', dest='max_iters',
 36 |                         help='number of iterations to train',
 37 |                         default=80000, type=int)
 38 |     parser.add_argument('--weights', dest='pretrained_model',
 39 |                         help='initialize with pretrained model weights',
 40 |                         default='~/caffe-model/cls/ilsvrc/resnet-v2/resnet101-v2/resnet101-v2_merge.caffemodel', type=str)
 41 |     parser.add_argument('--cfg', dest='cfg_file',
 42 |                         help='optional config file',
 43 |                         default='~/caffe-model/det/faster_rcnn/experiments/cfgs/faster_rcnn_end2end.yml', type=str)
 44 |     parser.add_argument('--imdb', dest='imdb_name',
 45 |                         help='dataset to train on',
 46 |                         default='voc_0712_trainval', type=str)
 47 |     parser.add_argument('--rand', dest='randomize',
 48 |                         help='randomize (do not use a fixed seed)',
 49 |                         action='store_true')
 50 |     parser.add_argument('--set', dest='set_cfgs',
 51 |                         help='set config keys', default=None,
 52 |                         nargs=argparse.REMAINDER)
 53 | 
 54 |     if len(sys.argv) == 1:
 55 |         parser.print_help()
 56 |         sys.exit(1)
 57 | 
 58 |     args = parser.parse_args()
 59 |     return args
 60 | 
 61 | def combined_roidb(imdb_names):
 62 |     def get_roidb(imdb_name):
 63 |         imdb = get_imdb(imdb_name)
 64 |         print 'Loaded dataset `{:s}` for training'.format(imdb.name)
 65 |         imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
 66 |         print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)
 67 |         roidb = get_training_roidb(imdb)
 68 |         return roidb
 69 | 
 70 |     roidbs = [get_roidb(s) for s in imdb_names.split('+')]
 71 |     roidb = roidbs[0]
 72 |     if len(roidbs) > 1:
 73 |         for r in roidbs[1:]:
 74 |             roidb.extend(r)
 75 |         imdb = datasets.imdb.imdb(imdb_names)
 76 |     else:
 77 |         imdb = get_imdb(imdb_names)
 78 |     return imdb, roidb
 79 | 
 80 | if __name__ == '__main__':
 81 |     args = parse_args()
 82 | 
 83 |     print('Called with args:')
 84 |     print(args)
 85 | 
 86 |     if args.cfg_file is not None:
 87 |         cfg_from_file(args.cfg_file)
 88 |     if args.set_cfgs is not None:
 89 |         cfg_from_list(args.set_cfgs)
 90 | 
 91 |     gpu_id = args.gpu_id
 92 |     gpu_list = gpu_id.split(',')
 93 |     gpus = [int(i) for i in gpu_list]
 94 | 
 95 |     print('Using config:')
 96 |     pprint.pprint(cfg)
 97 | 
 98 |     if not args.randomize:
 99 |         # fix the random seeds (numpy and caffe) for reproducibility
100 |         np.random.seed(cfg.RNG_SEED)
101 |         #caffe.set_random_seed(cfg.RNG_SEED)
102 | 
103 |     # set up caffe
104 | 
105 |     imdb, roidb = combined_roidb(args.imdb_name)
106 |     print '{:d} roidb entries'.format(len(roidb))
107 | 
108 |     output_dir = get_output_dir(imdb)
109 |     print 'Output will be saved to `{:s}`'.format(output_dir)
110 | 
111 |     train_net_multi_gpu(args.solver, roidb, output_dir,
112 |               pretrained_model=args.pretrained_model,
113 |               max_iter=args.max_iters, gpus=gpus)
114 | 


--------------------------------------------------------------------------------
/det/MSCOCO_Benchmark.md:
--------------------------------------------------------------------------------
 1 | ## MSCOCO Detection Benchmark
 2 | 
 3 | **We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)**
 4 | 
 5 | ### **1. Results training on MSCOCO2017-trainval and testing on test-dev2017.**
 6 | 
 7 |  Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L
 8 |  :---:|:---:|:---:|:---:|:---:|:---:|:---:
 9 |  **RFCN-se-inception-v2** <br/> with ms-train & ohem & multigrid | 32.6 | 53.6 | 34.5 | 12.5 | 35.1 | 48.4
10 |  **RFCN-se-inception-v2** <br/> with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & ms-test | 36.8 | 59.8 | 38.7 | 19.7 | 39.8 | 49.1
11 |  **RFCN-se-resnet50** <br/> with ms-train & ohem & multigrid | 32.9 | 54.4 | 34.8 | 13.0 | 35.3 | 48.1
12 |  **FPN-Faster-inception-v4** <br/> with ms-train | 36.5 | 58.5 | 38.8 | 16.5 | 38.8 | 52.1
13 |  **FPN-Faster-inception-v4** <br/> with ms-train & bbox-voting & soft-nms | 38.3 | 61.0 | 40.8 | 20.0 | 41.5 | 51.4
14 |  **FPN-Faster-inception-v4** <br/> with ms-train & bbox-voting & soft-nms & flipping & ms-test | 39.5 | 62.5 | 42.3 | 23.3 | 43.2 | 51.0
15 |  **RFCN-air101** <br/> with ms-train & ohem & multigrid | 38.2 | 60.1 | 41.2 | 18.2 | 41.9 | 53.0
16 |  **RFCN-air101** <br/> with extra-7-epochs & ms-train & ohem & multigrid  | 38.5 | 60.2 | 41.4 | 18.3 | 42.1 | 53.4
17 |  **RFCN-air101** <br/> with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping | 40.4 | 63.5 | 43.5 | 22.6 | 44.4 | 52.0
18 |  **RFCN-air101** <br/> with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & ms-test | 41.8 | 65.3 | 45.3 | 26.1 | 45.6 | 52.4
19 |  **RFCN-air101** <br/> with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & assign-ms-test | 42.1 | 64.6 | 45.6 | 25.6 | 44.5 | 54.1
20 |  **RFCN-air101** <br/> with ms-train & ohem & multigrid & deformpsroi & bbox-voting & soft-nms & flipping & assign-ms-test | 43.2 | 66.0 | 46.7 | 25.6 | 46.3 | 55.9
21 |  **Faster-2fc-air101** <br/> with ms-train & ohem & multigrid | 36.5 | 60.4 | 38.1 | 15.5 | 39.5 | 53.5
22 |  
23 |  - All the models are test on a single scale (600*1000) without any bells and whistles;
24 |   
25 |  
26 | ### **2. Context Pyramid Attention Network (CPANet) results training on MSCOCO2017-trainval and testing on test-dev2017.**
27 |  
28 |  Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L
29 |  :---:|:---:|:---:|:---:|:---:|:---:|:---:
30 |  **CPANet-air101** <br/> with ms-train & ohem & multigrid & 600-scale-test | 40.1 | 62.2 | 43.4 | 19.4 | 44.4 | 55.9
31 |  **CPANet-air101** <br/> with ms-train & ohem & multigrid & 800-scale-test | 41.9 | 64.8 | 45.5 | 24.0 | 45.9 | 54.6
32 |  **CPANet-air101** <br/> with ms-train & ohem & multigrid & 800-scale-test & snms | 42.7 | 65.4 | 46.7 | 24.6 | 46.8 | 55.6
33 |  **CPANet-air101** <br/> with ms-train & ohem & multigrid & 800-scale-test & snms & flipping | 43.5 | 65.9 | 47.5 | 25.1 | 47.7 | 56.6 
34 |  
35 |  
36 | ### **3. COCOPerson results training on MSCOCO2017-trainval and testing on test-dev2017.**
37 | 
38 |  Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L|mAR@10
39 |  :---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:
40 |  **RFCN-se-air14-thin-specific** <br/> with ms-train & ohem & multigrid | 21.5 | 48.9 | 16.5 | 12.3 | 27.3 | 30.8 | 28.6
41 |  **RFCN-resnet18-specific** <br/> with ms-train & ohem & multigrid | 38.5 | 66.1 | 39.8 | 16.8 | 47.1 | 63.0 | 41.9
42 |  **RFCN-se-resnet50-specific** <br/> with 800-scale-train & ohem & multigrid | 39.0 | 64.1 | 41.1 | 13.5 | 48.4 | 66.4 | 43.9
43 |  **RFCN-se-resnet50-specific** <br/> with ms-train & ohem & multigrid | 41.9 | 67.7 | 44.3 | 18.6 | 51.0 | 67.9 | 46.0
44 |  **RFCN-se-resnet50-specific** <br/> with ms-train & ohem & multigrid & snms & flip & ms-test | 44.6 | 72.8 | 47.3 | 25.3 | 54.4 | 63.3 | 49.8
45 |  **RFCN-se-resnet50** <br/> with ms-train & ohem & multigrid | 42.7 | 72.0 | 44.5 | 21.0 | 51.1 | 66.4 | 45.4
46 |  **RFCN-se-inception-v2-specific** <br/> with ms-train & ohem & multigrid | 41.2 | 66.7 | 43.2 | 17.6 | 50.0 | 68.3 | 45.1
47 |  **RFCN-se-inception-v2** <br/> with ms-train & ohem & multigrid | 42.3 | 71.4 | 44.2 | 19.5 | 50.7 | 67.2 | 44.9
48 |  **RFCN-se-inception-v2** <br/> with ms-train & ohem & multigrid & bbox-voting & soft-nms & flipping & ms-test | 48.0 | 79.5 | 50.0 | 28.3 | 55.8 | 67.5 | 50.8
49 |  **RFCN-air101** <br/> with ms-train & ohem & multigrid & deformpsroi & bbox-voting & soft-nms & flipping & assign-ms-test | 54.0 | 83.9 | 58.2 | 35.2 | 61.6 | 73.0 | 55.1
50 |  **CPANet-air101** <br/> with ms-train & ohem & multigrid & 600-scale-test | 47.7 | 76.4 | 51.1 | 25.3 | 56.8 | 70.6 | 50.2
51 |  **CPANet-air101** <br/> with ms-train & ohem & multigrid & 800-scale-test & snms & flipping | 53.4 | 82.7 | 58.0 | 33.1 | 61.8 | 73.3 | 55.0
52 | 
53 | 


--------------------------------------------------------------------------------
/cls/cls_lite/README.md:
--------------------------------------------------------------------------------
 1 | ## CLS Lite (Classification lite)
 2 | 
 3 | Please install [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) for evaluating and finetuning.
 4 | 
 5 | 
 6 | ### Performance of lite models on imagenet validation.
 7 | **1. Top-1/5 error and CPU/GPU speed of lite models in this repository.**
 8 | 
 9 |  Network|Top-1/5 error|F/B on GPU|F/B on CPU|Source
10 |  :---:|:---:|:---:|:---:|:---:
11 |  resnet10-1x32d | 44.78/21.42 | 2.19/2.57ms | 42.84/38.00ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
12 |  resnet10-1x48d | -- | 2.55/3.01ms | 83.66/75.97ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
13 |  resnet10-1x64d | 35.93/14.59 | 2.93/3.86ms | 134.3/124.8ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
14 |  resnet10-1x96d | 30.66/11.13 | 3.42/5.57ms | 220.7/204.9ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
15 |  resnet18-1x16d | 51.37/26.35 | 3.03/3.22ms | 25.03/22.63ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
16 |  resnet18-1x32d | 38.24/16.02 | 3.53/4.14ms | 69.2/63.2ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
17 |  resnet18-1x48d | 32.55/11.87 | 4.30/4.83ms | 139.1/127.6ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
18 |  resnet18-1x64d<br/>(resnet18-priv) | 29.62/10.38 | 4.48/5.07ms | 213.2/193.3ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
19 |  resnet18-1x96d | 26.11/8.31 | 6.16/9.94ms | 443.2/419.0ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
20 |  resnet18-1x128d | 24.81/7.61 | 9.75/16.94ms | 729.1/695.4ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
21 |  resnext26-32x4d | 25.57/8.12 | 9.68/11.16ms | 331.4/300.2ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
22 |  vgg13-pytorch | 31.07/11.13 | 5.70/9.35ms | 1318/1279ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
23 |  vgg13bn-pytorch | 29.50/10.18 | 8.35/13.49ms | 1443/1336ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
24 |  vgg16-pytorch | 29.14/10.00 | 6.79/11.78ms | 1684/1643ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
25 |  vgg16-tf | 29.03/10.12 | 13.04/48.90ms | 1787/1647ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim)
26 |  vgg16-dsd | 27.62/9.02 | 6.81/11.80ms | 1753/1660ms | [dsd](https://github.com/songhan/DSD)
27 |  vgg16-5x | 31.67/11.60 | 4.46/7.15ms | 580.5/593.0ms | [channel-pruning](https://github.com/yihui-he/channel-pruning)
28 |  vgg16-3c4x | 28.79/9.78 | 7.53/9.77ms | 753.4/772.4ms | [channel-pruning](https://github.com/yihui-he/channel-pruning)
29 |  vgg16bn-pytorch | 27.53/8.99 | 9.14/15.83ms | 1783/1695ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
30 |  vgg19-pytorch | 28.23/9.60 | 8.03/14.26ms | 2076/2012ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
31 |  vgg19bn-pytorch | 26.58/8.45 | 10.75/18.77ms | 2224/2081ms | [vision](https://github.com/pytorch/vision/tree/master/torchvision/models)
32 |  inception-v1-tf | 31.37/11.10 | 10.66/7.84ms | 186.2/155.8ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim)
33 |  inception-v2-tf | 27.91/9.40 | 13.93/10.65ms | 286.4/255.0ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim)
34 |  xception | 20.90/5.49 | 15.21/31.65ms | 1262/1253ms | [keras-models](https://github.com/fchollet/deep-learning-models)
35 |  mobilenet-v1-1.0 | 29.98/10.52 | 6.16/9.50ms | 169.4/138.1ms | [tf-slim](https://github.com/tensorflow/models/tree/master/research/slim)
36 |  air14-1x8d | 56.28/31.25 | 4.28/3.08ms | 21.01/3.29ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
37 |  air14-1x16d | 44.23/20.68 | 5.13/3.56ms | 45.45/6.41ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
38 |  air26-1x16d | 36.31/14.59 | 7.32/4.70ms | 62.02/8.52ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
39 |  air26-1x32d | 28.71/9.59 | 8.77/5.05ms | 170.7/19.25ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
40 |  air50-1x16d | 31.19/11.26 | 14.73/8.31ms | 91.65/16.06ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
41 |  air50-1x32d | 25.59/7.89 | 15.39/7.64ms | 229.6/22.81ms | [pytorch-cls](https://github.com/soeaver/pytorch-classification)
42 |  dpn68 | 22.56/6.24 | 22.70/21.41ms | 371.1/329.3ms | [DPNs](https://github.com/cypw/DPNs) 
43 |  se-resnet50 | 22.39/6.37 | 17.91/19.49ms | 932.2/821.4ms | [senet](https://github.com/hujie-frank/SENet) 
44 |  se-resnet50-hik | 21.98/5.80 | 17.43/20.13ms | 581.1/482.7ms | [senet-caffe](https://github.com/shicai/SENet-Caffe) 
45 |  se-inception-v2 | 23.64/7.04 | 15.31/11.21ms | 251.9/218.5ms | [senet](https://github.com/hujie-frank/SENet) 
46 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/2007test400.txt:
--------------------------------------------------------------------------------
  1 | 000001
  2 | 000002
  3 | 000003
  4 | 000004
  5 | 000006
  6 | 000008
  7 | 000010
  8 | 000011
  9 | 000013
 10 | 000014
 11 | 000015
 12 | 000018
 13 | 000022
 14 | 000025
 15 | 000027
 16 | 000028
 17 | 000029
 18 | 000031
 19 | 000037
 20 | 000038
 21 | 000040
 22 | 000043
 23 | 000045
 24 | 000049
 25 | 000053
 26 | 000054
 27 | 000055
 28 | 000056
 29 | 000057
 30 | 000058
 31 | 000059
 32 | 000062
 33 | 000067
 34 | 000068
 35 | 000069
 36 | 000070
 37 | 000071
 38 | 000074
 39 | 000075
 40 | 000076
 41 | 000079
 42 | 000080
 43 | 000082
 44 | 000084
 45 | 000085
 46 | 000086
 47 | 000087
 48 | 000088
 49 | 000090
 50 | 000092
 51 | 000094
 52 | 000096
 53 | 000097
 54 | 000098
 55 | 000100
 56 | 000103
 57 | 000105
 58 | 000106
 59 | 000108
 60 | 000111
 61 | 000114
 62 | 000115
 63 | 000116
 64 | 000119
 65 | 000124
 66 | 000126
 67 | 000127
 68 | 000128
 69 | 000135
 70 | 000136
 71 | 000137
 72 | 000139
 73 | 000144
 74 | 000145
 75 | 000148
 76 | 000149
 77 | 000151
 78 | 000152
 79 | 000155
 80 | 000157
 81 | 000160
 82 | 000166
 83 | 000167
 84 | 000168
 85 | 000172
 86 | 000175
 87 | 000176
 88 | 000178
 89 | 000179
 90 | 000181
 91 | 000182
 92 | 000183
 93 | 000185
 94 | 000186
 95 | 000188
 96 | 000191
 97 | 000195
 98 | 000196
 99 | 000197
100 | 000199
101 | 000201
102 | 000202
103 | 000204
104 | 000205
105 | 000206
106 | 000212
107 | 000213
108 | 000216
109 | 000217
110 | 000223
111 | 000226
112 | 000227
113 | 000230
114 | 000231
115 | 000234
116 | 000237
117 | 000238
118 | 000239
119 | 000240
120 | 000243
121 | 000247
122 | 000248
123 | 000252
124 | 000253
125 | 000254
126 | 000255
127 | 000258
128 | 000260
129 | 000261
130 | 000264
131 | 000265
132 | 000267
133 | 000271
134 | 000272
135 | 000273
136 | 000274
137 | 000277
138 | 000279
139 | 000280
140 | 000281
141 | 000283
142 | 000284
143 | 000286
144 | 000287
145 | 000290
146 | 000291
147 | 000292
148 | 000293
149 | 000295
150 | 000297
151 | 000299
152 | 000300
153 | 000301
154 | 000309
155 | 000310
156 | 000313
157 | 000314
158 | 000315
159 | 000316
160 | 000319
161 | 000324
162 | 000326
163 | 000327
164 | 000330
165 | 000333
166 | 000335
167 | 000339
168 | 000341
169 | 000342
170 | 000345
171 | 000346
172 | 000348
173 | 000350
174 | 000351
175 | 000353
176 | 000356
177 | 000357
178 | 000358
179 | 000360
180 | 000361
181 | 000362
182 | 000364
183 | 000365
184 | 000366
185 | 000368
186 | 000369
187 | 000371
188 | 000375
189 | 000376
190 | 000377
191 | 000378
192 | 000383
193 | 000384
194 | 000385
195 | 000386
196 | 000388
197 | 000389
198 | 000390
199 | 000392
200 | 000393
201 | 000397
202 | 000398
203 | 000399
204 | 000401
205 | 000402
206 | 000405
207 | 000409
208 | 000410
209 | 000412
210 | 000413
211 | 000414
212 | 000415
213 | 000418
214 | 000421
215 | 000422
216 | 000423
217 | 000425
218 | 000426
219 | 000429
220 | 000432
221 | 000434
222 | 000436
223 | 000437
224 | 000440
225 | 000441
226 | 000442
227 | 000444
228 | 000445
229 | 000447
230 | 000449
231 | 000451
232 | 000452
233 | 000453
234 | 000455
235 | 000456
236 | 000457
237 | 000458
238 | 000465
239 | 000466
240 | 000467
241 | 000471
242 | 000472
243 | 000473
244 | 000475
245 | 000478
246 | 000479
247 | 000481
248 | 000485
249 | 000487
250 | 000488
251 | 000490
252 | 000493
253 | 000495
254 | 000497
255 | 000502
256 | 000504
257 | 000505
258 | 000506
259 | 000507
260 | 000510
261 | 000511
262 | 000512
263 | 000517
264 | 000521
265 | 000527
266 | 000529
267 | 000532
268 | 000533
269 | 000534
270 | 000536
271 | 000538
272 | 000539
273 | 000542
274 | 000546
275 | 000547
276 | 000548
277 | 000551
278 | 000553
279 | 000556
280 | 000557
281 | 000558
282 | 000560
283 | 000561
284 | 000562
285 | 000566
286 | 000567
287 | 000568
288 | 000569
289 | 000570
290 | 000571
291 | 000572
292 | 000573
293 | 000574
294 | 000575
295 | 000576
296 | 000578
297 | 000580
298 | 000584
299 | 000585
300 | 000586
301 | 000587
302 | 000593
303 | 000594
304 | 000595
305 | 000596
306 | 000600
307 | 000602
308 | 000603
309 | 000604
310 | 000606
311 | 000607
312 | 000611
313 | 000614
314 | 000615
315 | 000616
316 | 000617
317 | 000618
318 | 000621
319 | 000623
320 | 000624
321 | 000627
322 | 000629
323 | 000630
324 | 000631
325 | 000634
326 | 000636
327 | 000638
328 | 000639
329 | 000640
330 | 000641
331 | 000642
332 | 000643
333 | 000644
334 | 000646
335 | 000649
336 | 000650
337 | 000651
338 | 000652
339 | 000655
340 | 000658
341 | 000659
342 | 000662
343 | 000664
344 | 000665
345 | 000666
346 | 000668
347 | 000669
348 | 000670
349 | 000673
350 | 000674
351 | 000678
352 | 000679
353 | 000681
354 | 000683
355 | 000687
356 | 000691
357 | 000692
358 | 000693
359 | 000696
360 | 000697
361 | 000698
362 | 000701
363 | 000703
364 | 000704
365 | 000706
366 | 000708
367 | 000715
368 | 000716
369 | 000718
370 | 000719
371 | 000721
372 | 000722
373 | 000723
374 | 000724
375 | 000725
376 | 000727
377 | 000732
378 | 000734
379 | 000735
380 | 000736
381 | 000737
382 | 000741
383 | 000743
384 | 000744
385 | 000745
386 | 000747
387 | 000749
388 | 000751
389 | 000757
390 | 000758
391 | 000759
392 | 000762
393 | 000765
394 | 000766
395 | 000769
396 | 000773
397 | 000775
398 | 000778
399 | 000779
400 | 000781


--------------------------------------------------------------------------------
/det/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Object Detection
 3 | 
 4 | **We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)
 5 | we are releasing the training code and files, the models and more experiments will come soon.**
 6 | 
 7 | ### Object Detection Performance on PASCAL VOC. ([More experiments](https://github.com/soeaver/caffe-model/blob/master/det/VOC_Benchmark.md))
 8 | 
 9 | #### **1. Original Faster-RCNN training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
10 | 
11 |  Network|mAP@50(%)|training<br/>speed|training<br/>memory|testing<br/>speed|testing<br/>memory
12 |  :---:|:---:|:---:|:---:|:---:|:---:
13 |  resnet18 | 70.02 | 9.5 img/s | 1,235MB | 17.5 img/s | 989MB
14 |  resnet101-v2| 79.6 | 3.1 img/s | 6,495MB | 7.1 img/s | 4,573MB
15 |  wrn50-2| 78.59 | 2.1 img/s | 4,895MB | 4.9 img/s | 3,499MB
16 |  resnext50-32x4d| 77.99 | 3.6 img/s | 5,315MB | 7.4 img/s | 4,305MB
17 |  resnext101-32x4d| 79.98 | 2.7 img/s | 7,836MB | 6.3 img/s | 5,705MB
18 |  inception-v4| 81.49 | 2.6 img/s | 6,759MB | 5.4 img/s | 4,683MB
19 |  inception-resnet-v2| 80.0 | 2.0 img/s<br/> (batch=112) | 11,497MB | 3.2 img/s | 8,409MB
20 |  air101| 81.0 | 2.4 img/s | 7,747MB | 5.1 img/s | 5,777MB
21 |  
22 |  - To reduce memory usage, we merge all the models batchnorm layer parameters into scale layer, more details please refer to [faster-rcnn-resnet](https://github.com/Eniac-Xie/faster-rcnn-resnet#modification) or [pva-faster-rcnn](https://github.com/sanghoon/pva-faster-rcnn/blob/master/tools/gen_merged_model.py);
23 |  - We also split the deploy file to rpn deploy file and rcnn deploy file for adopting more testing tricks.
24 |  - Performanc, speed and memory are calculated on [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) with Nvidia Titan pascal, we do not guarantee that the results can be reproduced under any other conditions;
25 |  - All the models are trained on a single scale (600*1000) with image flipping and train-batch=128 for 80,000 iterations, tested on the same single scale with test-batch=300 and nms=0.3;
26 |  
27 |  
28 | #### **2. Faster-RCNN-2fc-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
29 |  
30 |   Network|mAP@50(%)|training<br/>speed|training<br/>memory|testing<br/>speed|testing<br/>memory
31 |  :---:|:---:|:---:|:---:|:---:|:---:
32 |  se-inception-v2 | (77.57) | 9.4 img/s | 2,453MB | 15.9 img/s | 1,573MB
33 |  se-resnet50 | (79.73) | 6.2 img/s | 4,129MB | 12.8 img/s | 2,175MB
34 |  resnet101-v2 | 80.6(80.49) | 5.0 img/s | 5,833MB | 10.5 img/s | 3,147MB
35 |  air101 | (81.47) | 3.4 img/s | 6,653MB | 8.7 img/s | 4,503MB
36 |  inception-v4-3x3 | 81.12(81.30) | 3.73 img/s | 5,383MB | 10.1 img/s | 3,217MB
37 |  
38 |  - 2fc means: conv256d --- fc1024d --- fc1024d;
39 |  - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587);
40 |  
41 |  
42 | #### **3. RFCN-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
43 | 
44 |  Network|mAP@50(%)|training<br/>speed|training<br/>memory|testing<br/>speed|testing<br/>memory
45 |  :---:|:---:|:---:|:---:|:---:|:---:
46 |  resnet18 | 71.82 | 14.3 img/s | 1,215MB | 23.4 img/s | 899MB
47 |  se-inception-v2| (78.23) | 10.2 img/s | 2,303MB | 14.0 img/s | 1,567MB
48 |  se-resnet50 | (79.19) | 6.3 img/s | 3.999MB | 11.7 img/s | 2,205MB
49 |  resnet101-v2| 78.93(79.9) | 4.9 img/s | 5,719MB | 10.4 img/s | 3,097MB
50 |  resnext101-32x4d| 79.98(80.35) | 3.8 img/s | 6,977MB | 8.8 img/s | 4,761M
51 |  air101| 79.42(80.93) | 3.4 img/s | 6,525MB | 8.5 img/s | 4,477MB
52 |  inception-v4| 80.2 | 4.1 img/s | 4,371MB | 10.3 img/s | 2,343MB
53 | 
54 |  - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587);
55 |  
56 |  
57 |  ### Object Detection Performance on MSCOCO. ([More experiments](https://github.com/soeaver/caffe-model/blob/master/det/MSCOCO_Benchmark.md))
58 |  
59 |  #### **1. Results training on MSCOCO2017-trainval and testing on test-dev2017.**
60 | 
61 |  Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L
62 |  :---:|:---:|:---:|:---:|:---:|:---:|:---:
63 |  **RFCN-se-inception-v2** <br/> with ms-train & ohem & multigrid | 32.6 | 53.6 | 34.5 | 12.5 | 35.1 | 48.4
64 |  **RFCN-se-resnet50** <br/> with ms-train & ohem & multigrid | 32.9 | 54.4 | 34.8 | 13.0 | 35.3 | 48.1
65 |  **RFCN-air101** <br/> with ms-train & ohem & multigrid | 38.2 | 60.1 | 41.2 | 18.2 | 41.9 | 53.0
66 |  **Faster-2fc-air101** <br/> with ms-train & ohem & multigrid | 36.5 | 60.4 | 38.1 | 15.5 | 39.5 | 53.5
67 |  
68 |  - All the models are test on a single scale (600*1000) without any bells and whistles;
69 |  
70 |  
71 |  #### **2. Context Pyramid Attention Network (CPANet) results training on MSCOCO2017-trainval and testing on test-dev2017.**
72 |  
73 |   Network|mAP|mAP@50|mAP@75|mAP@S|mAP@M|mAP@L
74 |  :---:|:---:|:---:|:---:|:---:|:---:|:---:
75 |  **CPANet-air101** <br/> with ms-train & ohem & multigrid & 800-scale-test | 41.9 | 64.8 | 45.5 | 24.0 | 45.9 | 54.6
76 |  **CPANet-air101** <br/> with ms-train & ohem & multigrid & 800-scale-test & snms | 42.7 | 65.4 | 46.7 | 24.6 | 46.8 | 55.6
77 |  **CPANet-air101** <br/> with ms-train & ohem & multigrid & 800-scale-test & snms & flipping | 43.5 | 65.9 | 47.5 | 25.1 | 47.7 | 56.6
78 | 


--------------------------------------------------------------------------------
/det/VOC_Benchmark.md:
--------------------------------------------------------------------------------
 1 | ## VOC Detection Benchmark
 2 | 
 3 | **We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)**
 4 | 
 5 | ### **1. Original Faster-RCNN training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
 6 | 
 7 |  Network|mAP@50(%)|training<br/>speed|training<br/>memory|testing<br/>speed|testing<br/>memory
 8 |  :---:|:---:|:---:|:---:|:---:|:---:
 9 |  resnet18 | 70.02 | 9.5 img/s | 1,235MB | 17.5 img/s | 989MB
10 |  resnet101-v2| 79.6 | 3.1 img/s | 6,495MB | 7.1 img/s | 4,573MB
11 |  resnet152-v2| 80.72 | 2.8 img/s | 9,315MB | 6.2 img/s | 6,021MB
12 |  wrn50-2| 78.59 | 2.1 img/s | 4,895MB | 4.9 img/s | 3,499MB
13 |  resnext50-32x4d| 77.99 | 3.6 img/s | 5,315MB | 7.4 img/s | 4,305MB
14 |  resnext101-32x4d| 79.98 | 2.7 img/s | 7,836MB | 6.3 img/s | 5,705MB
15 |  resnext101-64x4d| 80.71 | 2.0 img/s<br/> (batch=96) | 11,277MB | 3.7 img/s | 9,461MB
16 |  inception-v3| 78.6 | 4.1 img/s | 4,325MB | 7.3 img/s | 3,445MB
17 |  xception| 76.6 | 3.3 img/s | 7,341MB | 7.8 img/s | 2,979MB
18 |  inception-v4| 81.49 | 2.6 img/s | 6,759MB | 5.4 img/s | 4,683MB
19 |  inception-resnet-v2| 80.0 | 2.0 img/s<br/> (batch=112) | 11,497MB | 3.2 img/s | 8,409MB
20 |  densenet-201| 77.53 | 3.9 img/s<br/> (batch=72) | 10,073MB | 5.5 img/s | 9,955MB
21 |  resnet38a| 80.1 | 1.4 img/s | 8,723MB | 3.4 img/s | 5,501MB
22 |  air101| 81.0 | 2.4 img/s | 7,747MB | 5.1 img/s | 5,777MB
23 |  
24 |  - To reduce memory usage, we merge all the models batchnorm layer parameters into scale layer, more details please refer to [faster-rcnn-resnet](https://github.com/Eniac-Xie/faster-rcnn-resnet#modification) or [pva-faster-rcnn](https://github.com/sanghoon/pva-faster-rcnn/blob/master/tools/gen_merged_model.py);
25 |  - We also split the deploy file to rpn deploy file and rcnn deploy file for adopting more testing tricks.
26 |  - Performanc, speed and memory are calculated on [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) with Nvidia Titan pascal, we do not guarantee that the results can be reproduced under any other conditions;
27 |  - All the models are trained on a single scale (600*1000) with image flipping and train-batch=128 for 80,000 iterations, tested on the same single scale with test-batch=300 and nms=0.3;
28 |  
29 |  
30 | **Comparisons on VOC 2007 test using Faster-RCNN with inception-v4.**
31 |  
32 |  Method|mAP@50| improvment |test speed
33 |  :---|:---:|:---:|:---:
34 |  baseline inception-v4 | 81.49 | -- | 5.4 img/s
35 |  &nbsp;+multi-scale training | 83.79 | 2.30 | 5.4 img/s
36 |  &nbsp;+box voting | 83.95 | 0.16 | 5.4 img/s
37 |  &nbsp;+nms=0.4 | 84.22 | 0.27 | 5.4 img/s
38 |  &nbsp;+image flipping test | 84.54 | 0.32 | 2.7 img/s
39 |  &nbsp;+multi-scale testing | 85.78 | 1.24 | 0.13 img/s
40 |  
41 |  - The SCALES for multi-scale training is (200, 400, 600, 800, 1000) and MAX_SIZE is 1666; 
42 |  - For multi-scale training, we double the training iterations (160000 for VOC0712trainval);
43 |  - The SCALES for multi-scale testing is (400, 600, 800, 1000, 1200) and MAX_SIZE is 2000;
44 |  
45 |  ### **2. Faster-RCNN-2fc-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
46 |  
47 |   Network|mAP@50(%)|training<br/>speed|training<br/>memory|testing<br/>speed|testing<br/>memory
48 |  :---:|:---:|:---:|:---:|:---:|:---:
49 |  se-inception-v2 | (77.57) | 9.4 img/s | 2,453MB | 15.9 img/s | 1,573MB
50 |  se-resnet50 | (79.73) | 6.2 img/s | 4,129MB | 12.8 img/s | 2,175MB
51 |  resnet101-v2 w/o OHEM | 80.18 | 5.4 img/s | 5,807MB | 10.5 img/s | 3,147MB
52 |  resnet101-v2 | 80.6(80.49) | 5.0 img/s | 5,833MB | 10.5 img/s | 3,147MB
53 |  air101 | (81.47) | 3.4 img/s | 6,653MB | 8.7 img/s | 4,503MB
54 |  air101-context | (82.09) | 3.3 img/s | 6,773MB | 8.6 img/s | 4,577MB
55 |  air101-fpn w/o OHEM | 81.44 | 2.4 img/s | 7,063MB | 3.8 img/s | 4,433MB
56 |  inception-v4-3x3 | 81.12(81.30) | 3.73 img/s | 5,383MB | 10.1 img/s | 3,217MB
57 |  
58 |  - 2fc means: conv256d --- fc1024d --- fc1024d;
59 |  - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587);
60 |  
61 |  
62 | ### **3. RFCN-OHEM training on VOC 2007+2012 trainval and testing on VOC 2007 test.**
63 | 
64 |  Network|mAP@50(%)|training<br/>speed|training<br/>memory|testing<br/>speed|testing<br/>memory
65 |  :---:|:---:|:---:|:---:|:---:|:---:
66 |  resnet18 | 71.82 | 14.3 img/s | 1,215MB | 23.4 img/s | 899MB
67 |  resnext26-32x4d| 72.07 | 7.5 img/s | 2,521MB | 15.0 img/s | 1,797MB
68 |  se-inception-v2| (78.23) | 10.2 img/s | 2,303MB | 14.0 img/s | 1,567MB
69 |  se-resnet50 | (79.19) | 6.3 img/s | 3.999MB | 11.7 img/s | 2,205MB
70 |  resnet101-v2| 78.93(79.9) | 4.9 img/s | 5,719MB | 10.4 img/s | 3,097MB
71 |  resnext101-32x4d| 79.98(80.35) | 3.8 img/s | 6,977MB | 8.8 img/s | 4,761MB
72 |  resnext101-64x4d| 80.26(79.88) | 2.4 img/s | 10,203MB | 6.2 img/s | 8,529MB
73 |  air101| 79.42(80.93) | 3.4 img/s | 6,525MB | 8.5 img/s | 4,477MB
74 |  air152| (81.18) | 2.6 img/s | 9,331MB | 6.7 img/s | 6,151MB
75 |  inception-v4| 80.2 | 4.1 img/s | 4,371MB | 10.3 img/s | 2,343MB
76 |  inception-v4-3x3 | 81.15 | 3.7 img/s | 5,207MB | 9.5 img/s | 3,151MB
77 | 
78 |  - The mAP@50 score in parentheses is training with ohem and [multigrid](https://arxiv.org/abs/1706.05587);
79 |  
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Caffe-model
 2 | Caffe models (include classification, detection and segmentation) and deploy prototxt for resnet, resnext, inception_v3, inception_v4, inception_resnet, wider_resnet, densenet, aligned-inception-resne(x)t, DPNs and other networks.
 3 | 
 4 | Clone the caffe-model repository
 5 |   ```Shell
 6 |   git clone https://github.com/soeaver/caffe-model --recursive
 7 |   ```
 8 | 
 9 | ## We recommend using these caffe models with [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv)
10 | Please install [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) for evaluating and finetuning.
11 | 
12 | ## Disclaimer
13 | 
14 | Most of the pre-train models are converted from other projects, the main contribution belongs to the original authors.
15 | 
16 | Project links:
17 | 
18 | [mxnet-model-gallery](https://github.com/dmlc/mxnet-model-gallery)、 [tensorflow slim](https://github.com/tensorflow/models/tree/master/slim)、 [craftGBD](https://github.com/craftGBD/craftGBD)、 [ResNeXt](https://github.com/facebookresearch/ResNeXt)、 [DenseNet](https://github.com/liuzhuang13/DenseNet)、 [wide-residual-networks](https://github.com/szagoruyko/wide-residual-networks)、 [keras deep-learning-models](https://github.com/fchollet/deep-learning-models)、 [ademxapp](https://github.com/itijyou/ademxapp)、 [DPNs](https://github.com/cypw/DPNs)、[Senet](https://github.com/hujie-frank/SENet)
19 | 
20 | 
21 | ## CLS (Classification, more details are in [cls](https://github.com/soeaver/caffe-model/tree/master/cls))
22 | ### Performance on imagenet validation.
23 | **Top-1/5 error of pre-train models in this repository (Pre-train models download [urls](https://github.com/soeaver/caffe-model/tree/master/cls#performance-on-imagenet-validation)).**
24 | 
25 |  Network|224/299<br/>(single-crop)|224/299<br/>(12-crop)|320/395<br/>(single-crop)|320/395<br/>(12-crop)
26 |  :---:|:---:|:---:|:---:|:---:
27 |  resnet101-v2| 21.95/6.12 | 19.99/5.04 | 20.37/5.16 | 19.29/4.57
28 |  resnet152-v2| 20.85/5.42 | 19.24/4.68 | 19.66/4.73 | 18.84/4.32
29 |  resnet269-v2| 19.71/5.00 | **18.25**/4.20 | 18.70/4.33 | **17.87**/3.85
30 |  inception-v3| 21.67/5.75 | 19.60/4.73 | 20.10/4.82 | 19.25/4.24 
31 |  xception| 20.90/5.49 | 19.68/4.90 | 19.58/4.77 | 18.91/4.39 
32 |  inception-v4| 20.03/5.09 | 18.60/4.30 | 18.68/4.32 |18.12/3.92 
33 |  inception-resnet-v2| 19.86/**4.83** | 18.46/**4.08** | 18.75/**4.02** | 18.15/**3.71**
34 |  resnext50-32x4d| 22.37/6.31 | 20.53/5.35 | 21.10/5.53 | 20.37/5.03
35 |  resnext101-32x4d| 21.30/5.79 | 19.47/4.89 | 19.91/4.97 | 19.19/4.59
36 |  resnext101-64x4d| 20.60/5.41 | 18.88/4.59 | 19.26/4.63 | 18.48/4.31
37 |  wrn50-2<br/>(resnet50-1x128d)| 22.13/6.13 | 20.09/5.06 | 20.68/5.28 | 19.83/4.87
38 |  air101| 21.32/5.76 | 19.36/4.84 | 19.92/4.75 | 19.05/4.43
39 |  dpn-92| 20.81/5.47 | 18.99/4.59 | 19.23/4.64 | 18.68/4.24
40 |  dpn-107| 19.70/5.06 | ../.. | 18.41/4.25 | ../..
41 |  
42 | 
43 | ## DET (Detection, more details are in [det](https://github.com/soeaver/caffe-model/tree/master/det))
44 | ### Object Detection Performance on PASCAL VOC.
45 | **Original faster rcnn train on VOC 2007+2012 trainval and test on VOC 2007 test.**
46 | 
47 |  Network|mAP@50|train speed|train memory|test speed|test memory
48 |  :---:|:---:|:---:|:---:|:---:|:---:
49 |  resnet18 | 70.02 | 9.5 img/s | 1,235MB | 17.5 img/s | 989MB
50 |  resnet101-v2| 79.6 | 3.1 img/s | 6,495MB | 7.1 img/s | 4,573MB
51 |  resnet152-v2| 80.72 | 2.8 img/s | 9,315MB | 6.2 img/s | 6,021MB
52 |  wrn50-2| 78.59 | 2.1 img/s | 4,895MB | 4.9 img/s | 3,499MB
53 |  resnext50-32x4d| 77.99 | 3.6 img/s | 5,315MB | 7.4 img/s | 4,305MB
54 |  resnext101-32x4d| 79.98 | 2.7 img/s | 7,836MB | 6.3 img/s | 5,705MB
55 |  resnext101-64x4d| 80.71 | 2.0 img/s<br/> (batch=96) | 11,277MB | 3.7 img/s | 9,461MB
56 |  inception-v3| 78.6 | 4.1 img/s | 4,325MB | 7.3 img/s | 3,445MB
57 |  inception-v4| 81.49 | 2.6 img/s | 6,759MB | 5.4 img/s | 4,683MB
58 |  inception-resnet-v2| 80.0 | 2.0 img/s<br/> (batch=112) | 11,497MB | 3.2 img/s | 8,409MB
59 |  densenet-201| 77.53 | 3.9 img/s<br/> (batch=72) | 10,073MB | 5.5 img/s | 9,955MB
60 |  resnet38a| 80.1 | 1.4 img/s | 8,723MB | 3.4 img/s | 5,501MB
61 |  
62 |  
63 | ## SEG (Segmentation, more details are in [seg](https://github.com/soeaver/caffe-model/tree/master/seg))
64 | ### Object Segmentation Performance on PASCAL VOC.
65 | **PSPNet training on [SBD](http://home.bharathh.info/pubs/pdfs/BharathICCV2011.pdf) (10,582 images) and testing on VOC 2012 validation (1,449 images).**
66 | 
67 |  Network|mIoU(%)|pixel acc(%)|training<br/>speed|training<br/>memory|testing<br/>speed|testing<br/>memory
68 |  :---:|:---:|:---:|:---:|:---:|:---:|:---:
69 |  resnet101-v2| 77.94 | 94.94 | 1.6 img/s | 8,023MB | 3.0 img/s | 4,071MB
70 |  resnet101-v2-selu| 77.10 | 94.80 | 1.6 img/s | 8,017MB | 3.0 img/s | 4,065MB
71 |  resnext101-32x4d| 77.79 | 94.92 | 1.3 img/s | 8,891MB | 2.6 img/s | 5,241MB
72 |  air101| 77.64 | 94.93 | 1.3 img/s | 10,017MB | 2.5 img/s | 5,241MB
73 |  inception-v4| 77.58 | 94.83 | -- img/s | --MB | -- img/s | --MB
74 |  
75 | 
76 | ## License
77 | 
78 | caffe-model is released under the MIT License (refer to the LICENSE file for details).
79 | 
80 | 
81 | ## Acknowlegement
82 | 
83 | I greatly thank [Yangqing Jia](https://github.com/Yangqing) and [BVLC group](https://www.github.com/BVLC/caffe) for developing Caffe.
84 | 
85 | And I would like to thank all the authors of every network.
86 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/resnet18/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet18-priv-merge.prototxt:
--------------------------------------------------------------------------------
  1 | input: "res4b"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 256
  5 |   dim: 40
  6 |   dim: 40
  7 | }
  8 | 
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 300
 12 |   dim: 5
 13 | }
 14 | 
 15 | #============== RCNN ===============
 16 | layer {
 17 |   name: "roi_pool"
 18 |   type: "ROIPooling"
 19 |   bottom: "res4b"
 20 |   bottom: "rois"
 21 |   top: "roi_pool"
 22 |   roi_pooling_param {
 23 |     pooled_w: 14
 24 |     pooled_h: 14
 25 |     spatial_scale: 0.062500
 26 |   }
 27 | }
 28 | layer {
 29 |   bottom: "roi_pool"
 30 |   top: "res5a_branch1"
 31 |   name: "res5a_branch1"
 32 |   type: "Convolution"
 33 |   param {
 34 |     lr_mult: 1
 35 |     decay_mult: 1
 36 |   }
 37 |   convolution_param {
 38 |     num_output: 512
 39 |     kernel_size: 1
 40 |     pad: 0
 41 |     stride: 2
 42 |     bias_term: false
 43 |   }
 44 | }
 45 | layer {
 46 |   bottom: "res5a_branch1"
 47 |   top: "res5a_branch1"
 48 |   name: "scale5a_branch1"
 49 |   type: "Scale"
 50 |   scale_param {
 51 |     bias_term: true
 52 |   }
 53 |   param {
 54 |     lr_mult: 0.0
 55 |     decay_mult: 0.0
 56 |   }
 57 |   param {
 58 |     lr_mult: 0.0
 59 |     decay_mult: 0.0
 60 |   }
 61 | }
 62 | layer {
 63 |   bottom: "roi_pool"
 64 |   top: "res5a_branch2a"
 65 |   name: "res5a_branch2a"
 66 |   type: "Convolution"
 67 |   param {
 68 |     lr_mult: 1
 69 |     decay_mult: 1
 70 |   }
 71 |   convolution_param {
 72 |     num_output: 512
 73 |     kernel_size: 3
 74 |     pad: 1
 75 |     stride: 2
 76 |     bias_term: false
 77 |   }
 78 | }
 79 | layer {
 80 |   bottom: "res5a_branch2a"
 81 |   top: "res5a_branch2a"
 82 |   name: "scale5a_branch2a"
 83 |   type: "Scale"
 84 |   scale_param {
 85 |     bias_term: true
 86 |   }
 87 |   param {
 88 |     lr_mult: 0.0
 89 |     decay_mult: 0.0
 90 |   }
 91 |   param {
 92 |     lr_mult: 0.0
 93 |     decay_mult: 0.0
 94 |   }
 95 | }
 96 | layer {
 97 |   bottom: "res5a_branch2a"
 98 |   top: "res5a_branch2a"
 99 |   name: "res5a_branch2a_relu"
100 |   type: "ReLU"
101 | }
102 | layer {
103 |   bottom: "res5a_branch2a"
104 |   top: "res5a_branch2b"
105 |   name: "res5a_branch2b"
106 |   type: "Convolution"
107 |   param {
108 |     lr_mult: 1
109 |     decay_mult: 1
110 |   }
111 |   convolution_param {
112 |     num_output: 512
113 |     kernel_size: 3
114 |     pad: 1
115 |     stride: 1
116 |     bias_term: false
117 |   }
118 | }
119 | layer {
120 |   bottom: "res5a_branch2b"
121 |   top: "res5a_branch2b"
122 |   name: "scale5a_branch2b"
123 |   type: "Scale"
124 |   scale_param {
125 |     bias_term: true
126 |   }
127 |   param {
128 |     lr_mult: 0.0
129 |     decay_mult: 0.0
130 |   }
131 |   param {
132 |     lr_mult: 0.0
133 |     decay_mult: 0.0
134 |   }
135 | }
136 | layer {
137 |   bottom: "res5a_branch1"
138 |   bottom: "res5a_branch2b"
139 |   top: "res5a"
140 |   name: "res5a"
141 |   type: "Eltwise"
142 | }
143 | layer {
144 |   bottom: "res5a"
145 |   top: "res5a"
146 |   name: "res5a_relu"
147 |   type: "ReLU"
148 | }
149 | layer {
150 |   bottom: "res5a"
151 |   top: "res5b_branch2a"
152 |   name: "res5b_branch2a"
153 |   type: "Convolution"
154 |   param {
155 |     lr_mult: 1
156 |     decay_mult: 1
157 |   }
158 |   convolution_param {
159 |     num_output: 512
160 |     kernel_size: 3
161 |     pad: 1
162 |     stride: 1
163 |     bias_term: false
164 |   }
165 | }
166 | layer {
167 |   bottom: "res5b_branch2a"
168 |   top: "res5b_branch2a"
169 |   name: "scale5b_branch2a"
170 |   type: "Scale"
171 |   scale_param {
172 |     bias_term: true
173 |   }
174 |   param {
175 |     lr_mult: 0.0
176 |     decay_mult: 0.0
177 |   }
178 |   param {
179 |     lr_mult: 0.0
180 |     decay_mult: 0.0
181 |   }
182 | }
183 | layer {
184 |   bottom: "res5b_branch2a"
185 |   top: "res5b_branch2a"
186 |   name: "res5b_branch2a_relu"
187 |   type: "ReLU"
188 | }
189 | layer {
190 |   bottom: "res5b_branch2a"
191 |   top: "res5b_branch2b"
192 |   name: "res5b_branch2b"
193 |   type: "Convolution"
194 |   param {
195 |     lr_mult: 1
196 |     decay_mult: 1
197 |   }
198 |   convolution_param {
199 |     num_output: 512
200 |     kernel_size: 3
201 |     pad: 1
202 |     stride: 1
203 |     bias_term: false
204 |   }
205 | }
206 | layer {
207 |   bottom: "res5b_branch2b"
208 |   top: "res5b_branch2b"
209 |   name: "scale5b_branch2b"
210 |   type: "Scale"
211 |   scale_param {
212 |     bias_term: true
213 |   }
214 |   param {
215 |     lr_mult: 0.0
216 |     decay_mult: 0.0
217 |   }
218 |   param {
219 |     lr_mult: 0.0
220 |     decay_mult: 0.0
221 |   }
222 | }
223 | layer {
224 |   bottom: "res5a"
225 |   bottom: "res5b_branch2b"
226 |   top: "res5b"
227 |   name: "res5b"
228 |   type: "Eltwise"
229 | }
230 | layer {
231 |   bottom: "res5b"
232 |   top: "res5b"
233 |   name: "res5b_relu"
234 |   type: "ReLU"
235 | }
236 | layer {
237 |   bottom: "res5b"
238 |   top: "pool5"
239 |   name: "pool5"
240 |   type: "Pooling"
241 |   pooling_param {
242 |     global_pooling: true
243 |     pool: AVE
244 |   }
245 | }
246 | layer {
247 |   name: "cls_score"
248 |   type: "InnerProduct"
249 |   bottom: "pool5"
250 |   top: "cls_score"
251 |   param {
252 |     lr_mult: 1
253 |     decay_mult: 1
254 |   }
255 |   param {
256 |     lr_mult: 2
257 |     decay_mult: 0
258 |   }
259 |   inner_product_param {
260 |     num_output: 21
261 |     weight_filler {
262 |       type: "msra"
263 |       std: 0.01
264 |     }
265 |     bias_filler {
266 |       type: "constant"
267 |       value: 0
268 |     }
269 |   }
270 | }
271 | layer {
272 |   name: "bbox_pred"
273 |   type: "InnerProduct"
274 |   bottom: "pool5"
275 |   top: "bbox_pred"
276 |   param {
277 |     lr_mult: 1
278 |     decay_mult: 1
279 |   }
280 |   param {
281 |     lr_mult: 2
282 |     decay_mult: 0
283 |   }
284 |   inner_product_param {
285 |     num_output: 84
286 |     weight_filler {
287 |       type: "msra"
288 |       std: 0.01
289 |     }
290 |     bias_filler {
291 |       type: "constant"
292 |       value: 0
293 |     }
294 |   }
295 | }
296 | layer {
297 |   name: "cls_prob"
298 |   type: "Softmax"
299 |   bottom: "cls_score"
300 |   top: "cls_prob"
301 | }
302 | 
303 | 
304 | 


--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg13-pytorch.prototxt:
--------------------------------------------------------------------------------
  1 | input: "data"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 3
  5 |   dim: 224 
  6 |   dim: 224
  7 | }
  8 | 
  9 | layer {
 10 |   name: "conv1_1"
 11 |   type: "Convolution"
 12 |   bottom: "data"
 13 |   top: "conv1_1"
 14 |   convolution_param {
 15 |     bias_term: true
 16 |     num_output: 64
 17 |     pad: 1
 18 |     kernel_size: 3
 19 |     stride: 1
 20 |   }
 21 | }
 22 | 
 23 | layer {
 24 |   name: "relu1_1"
 25 |   type: "ReLU"
 26 |   bottom: "conv1_1"
 27 |   top: "conv1_1"
 28 | }
 29 | 
 30 | layer {
 31 |   name: "conv1_2"
 32 |   type: "Convolution"
 33 |   bottom: "conv1_1"
 34 |   top: "conv1_2"
 35 |   convolution_param {
 36 |     bias_term: true
 37 |     num_output: 64
 38 |     pad: 1
 39 |     kernel_size: 3
 40 |     stride: 1
 41 |   }
 42 | }
 43 | 
 44 | layer {
 45 |   name: "relu1_2"
 46 |   type: "ReLU"
 47 |   bottom: "conv1_2"
 48 |   top: "conv1_2"
 49 | }
 50 | 
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "conv1_2"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 2
 59 |     stride: 2
 60 |   }
 61 | }
 62 | 
 63 | layer {
 64 |   name: "conv2_1"
 65 |   type: "Convolution"
 66 |   bottom: "pool1"
 67 |   top: "conv2_1"
 68 |   convolution_param {
 69 |     bias_term: true
 70 |     num_output: 128
 71 |     pad: 1
 72 |     kernel_size: 3
 73 |     stride: 1
 74 |   }
 75 | }
 76 | 
 77 | layer {
 78 |   name: "relu2_1"
 79 |   type: "ReLU"
 80 |   bottom: "conv2_1"
 81 |   top: "conv2_1"
 82 | }
 83 | 
 84 | layer {
 85 |   name: "conv2_2"
 86 |   type: "Convolution"
 87 |   bottom: "conv2_1"
 88 |   top: "conv2_2"
 89 |   convolution_param {
 90 |     bias_term: true
 91 |     num_output: 128
 92 |     pad: 1
 93 |     kernel_size: 3
 94 |     stride: 1
 95 |   }
 96 | }
 97 | 
 98 | layer {
 99 |   name: "relu2_2"
100 |   type: "ReLU"
101 |   bottom: "conv2_2"
102 |   top: "conv2_2"
103 | }
104 | 
105 | layer {
106 |   name: "pool2"
107 |   type: "Pooling"
108 |   bottom: "conv2_2"
109 |   top: "pool2"
110 |   pooling_param {
111 |     pool: MAX
112 |     kernel_size: 2
113 |     stride: 2
114 |   }
115 | }
116 | 
117 | layer {
118 |   name: "conv3_1"
119 |   type: "Convolution"
120 |   bottom: "pool2"
121 |   top: "conv3_1"
122 |   convolution_param {
123 |     bias_term: true
124 |     num_output: 256
125 |     pad: 1
126 |     kernel_size: 3
127 |     stride: 1
128 |   }
129 | }
130 | 
131 | layer {
132 |   name: "relu3_1"
133 |   type: "ReLU"
134 |   bottom: "conv3_1"
135 |   top: "conv3_1"
136 | }
137 | 
138 | layer {
139 |   name: "conv3_2"
140 |   type: "Convolution"
141 |   bottom: "conv3_1"
142 |   top: "conv3_2"
143 |   convolution_param {
144 |     bias_term: true
145 |     num_output: 256
146 |     pad: 1
147 |     kernel_size: 3
148 |     stride: 1
149 |   }
150 | }
151 | 
152 | layer {
153 |   name: "relu3_2"
154 |   type: "ReLU"
155 |   bottom: "conv3_2"
156 |   top: "conv3_2"
157 | }
158 | 
159 | layer {
160 |   name: "pool3"
161 |   type: "Pooling"
162 |   bottom: "conv3_2"
163 |   top: "pool3"
164 |   pooling_param {
165 |     pool: MAX
166 |     kernel_size: 2
167 |     stride: 2
168 |   }
169 | }
170 | 
171 | layer {
172 |   name: "conv4_1"
173 |   type: "Convolution"
174 |   bottom: "pool3"
175 |   top: "conv4_1"
176 |   convolution_param {
177 |     bias_term: true
178 |     num_output: 512
179 |     pad: 1
180 |     kernel_size: 3
181 |     stride: 1
182 |   }
183 | }
184 | 
185 | layer {
186 |   name: "relu4_1"
187 |   type: "ReLU"
188 |   bottom: "conv4_1"
189 |   top: "conv4_1"
190 | }
191 | 
192 | layer {
193 |   name: "conv4_2"
194 |   type: "Convolution"
195 |   bottom: "conv4_1"
196 |   top: "conv4_2"
197 |   convolution_param {
198 |     bias_term: true
199 |     num_output: 512
200 |     pad: 1
201 |     kernel_size: 3
202 |     stride: 1
203 |   }
204 | }
205 | 
206 | layer {
207 |   name: "relu4_2"
208 |   type: "ReLU"
209 |   bottom: "conv4_2"
210 |   top: "conv4_2"
211 | }
212 | 
213 | layer {
214 |   name: "pool4"
215 |   type: "Pooling"
216 |   bottom: "conv4_2"
217 |   top: "pool4"
218 |   pooling_param {
219 |     pool: MAX
220 |     kernel_size: 2
221 |     stride: 2
222 |   }
223 | }
224 | 
225 | layer {
226 |   name: "conv5_1"
227 |   type: "Convolution"
228 |   bottom: "pool4"
229 |   top: "conv5_1"
230 |   convolution_param {
231 |     bias_term: true
232 |     num_output: 512
233 |     pad: 1
234 |     kernel_size: 3
235 |     stride: 1
236 |   }
237 | }
238 | 
239 | layer {
240 |   name: "relu5_1"
241 |   type: "ReLU"
242 |   bottom: "conv5_1"
243 |   top: "conv5_1"
244 | }
245 | 
246 | layer {
247 |   name: "conv5_2"
248 |   type: "Convolution"
249 |   bottom: "conv5_1"
250 |   top: "conv5_2"
251 |   convolution_param {
252 |     bias_term: true
253 |     num_output: 512
254 |     pad: 1
255 |     kernel_size: 3
256 |     stride: 1
257 |   }
258 | }
259 | 
260 | layer {
261 |   name: "relu5_2"
262 |   type: "ReLU"
263 |   bottom: "conv5_2"
264 |   top: "conv5_2"
265 | }
266 | 
267 | layer {
268 |   name: "pool5"
269 |   type: "Pooling"
270 |   bottom: "conv5_2"
271 |   top: "pool5"
272 |   pooling_param {
273 |     pool: MAX
274 |     kernel_size: 2
275 |     stride: 2
276 |   }
277 | }
278 | 
279 | layer {
280 |   bottom: "pool5"
281 |   top: "fc6"
282 |   name: "fc6"
283 |   type: "InnerProduct"
284 |   inner_product_param {
285 |     num_output: 4096
286 |   }
287 | }
288 | 
289 | layer {
290 |   name: "relu6"
291 |   type: "ReLU"
292 |   bottom: "fc6"
293 |   top: "fc6"
294 | }
295 | 
296 | layer {
297 |   name: "dropout6"
298 |   type: "Dropout"
299 |   bottom: "fc6"
300 |   top: "fc6"
301 |   dropout_param {
302 |     dropout_ratio: 0.5
303 |   }
304 | }
305 | 
306 | layer {
307 |   bottom: "fc6"
308 |   top: "fc7"
309 |   name: "fc7"
310 |   type: "InnerProduct"
311 |   inner_product_param {
312 |     num_output: 4096
313 |   }
314 | }
315 | 
316 | layer {
317 |   name: "relu7"
318 |   type: "ReLU"
319 |   bottom: "fc7"
320 |   top: "fc7"
321 | }
322 | 
323 | layer {
324 |   name: "dropout7"
325 |   type: "Dropout"
326 |   bottom: "fc7"
327 |   top: "fc7"
328 |   dropout_param {
329 |     dropout_ratio: 0.5
330 |   }
331 | }
332 | 
333 | layer {
334 |   bottom: "fc7"
335 |   top: "classifier"
336 |   name: "classifier"
337 |   type: "InnerProduct"
338 |   inner_product_param {
339 |     num_output: 1000
340 |   }
341 | }
342 | 
343 | layer {
344 |   name: "prob"
345 |   type: "Softmax"
346 |   bottom: "classifier"
347 |   top: "prob"
348 | }
349 | 


--------------------------------------------------------------------------------
/seg/evaluation_seg.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append('/home/prmct/workspace/PSPNet-0120/python/')
  4 | 
  5 | import caffe
  6 | import cv2
  7 | import numpy as np
  8 | import datetime
  9 | 
 10 | gpu_mode = True
 11 | gpu_id = 3
 12 | data_root = '/home/prmct/Database/VOC_PASCAL/VOC2012_test/JPEGImages/'
 13 | val_file = 'test_205.txt'
 14 | save_root = './predict205_40000_ms/'
 15 | model_weights = 'psp_resnext101_32x4d_coco_sbd_iter_40000.caffemodel'
 16 | model_deploy = 'deploy_psp_resnext101_32x4d_merge_bn_scale.prototxt'
 17 | prob_layer = 'prob'  # output layer, normally Softmax
 18 | class_num = 21
 19 | base_size = 512
 20 | crop_size = 473
 21 | raw_scale = 57.5  # image scale factor, 1.0 or 128.0
 22 | # mean_value = np.array([104.008, 116.669, 122.675])
 23 | # mean_value = np.array([128, 128, 128])
 24 | mean_value = np.array([103.52, 116.28, 123.675])
 25 | scale_array = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # multi scale
 26 | # scale_array = [1.0]  # single scale
 27 | flip = True
 28 | class_offset = 0
 29 | crf = False
 30 | crf_deploy = '/home/prmct/Program/segmentation/deploy_crf.prototxt'
 31 | crf_factor = 4.0
 32 | 
 33 | if gpu_mode:
 34 |     caffe.set_mode_gpu()
 35 |     caffe.set_device(gpu_id)
 36 | else:
 37 |     caffe.set_mode_cpu()
 38 | net = caffe.Net(model_deploy, model_weights, caffe.TEST)
 39 | 
 40 | if crf:
 41 |     net_crf = caffe.Net(crf_deploy, caffe.TEST)
 42 | 
 43 | 
 44 | def eval_batch():
 45 |     eval_images = []
 46 |     f = open(val_file, 'r')
 47 |     for i in f:
 48 |         eval_images.append(i.strip())
 49 | 
 50 |     skip_num = 0
 51 |     eval_len = len(eval_images)
 52 |     start_time = datetime.datetime.now()
 53 |     for i in xrange(eval_len - skip_num):
 54 |         _img = cv2.imread(data_root + eval_images[i + skip_num] + '.jpg')
 55 |         h, w, d = _img.shape
 56 | 
 57 |         score_map = np.zeros((h, w, class_num), dtype=np.float32)
 58 |         for j in scale_array:
 59 |             long_size = float(base_size) * j + 1
 60 |             ratio = long_size / max(h, w)
 61 |             new_size = (int(w * ratio), int(h * ratio))
 62 |             _scale = cv2.resize(_img, new_size)
 63 |             score_map += cv2.resize(scale_process(_scale), (w, h))
 64 |         score_map /= len(scale_array)
 65 | 
 66 |         if crf:
 67 |             tmp_data = np.asarray([_img.transpose(2, 0, 1)], dtype=np.float32)
 68 |             tmp_score = np.asarray([score_map.transpose(2, 0, 1)], dtype=np.float32)
 69 |             net_crf.blobs['data'].reshape(*tmp_data.shape)
 70 |             net_crf.blobs['data'].data[...] = tmp_data / raw_scale
 71 |             net_crf.blobs['data_dim'].data[...] = [[[h, w]]]
 72 |             net_crf.blobs['score'].reshape(*tmp_score.shape)
 73 |             net_crf.blobs['score'].data[...] = tmp_score * crf_factor
 74 |             net_crf.forward()
 75 |             score_map = net_crf.blobs[prob_layer].data[0].transpose(1, 2, 0)
 76 | 
 77 |         cv2.imwrite(save_root + eval_images[i + skip_num] + '.png', score_map.argmax(2) + class_offset)
 78 |         print 'Testing image: ' + str(i + 1) + '/' + str(eval_len) + '  ' + str(eval_images[i + skip_num])
 79 |     end_time = datetime.datetime.now()
 80 |     print '\nEvaluation process ends at: {}. \nTime cost is: {}. '.format(str(end_time), str(end_time - start_time))
 81 |     print '\n{} images has been tested. \nThe model is: {}'.format(str(eval_len), model_weights)
 82 | 
 83 | 
 84 | def scale_process(_scale):
 85 |     sh, sw, sd = _scale.shape
 86 |     _scale = np.asarray(_scale, dtype=np.float32)
 87 |     long_size = max(sh, sw)
 88 |     short_size = min(sh, sw)
 89 |     if long_size <= crop_size:
 90 |         input_data = pad_img(_scale - mean_value)
 91 |         score = caffe_process(input_data)[:sh, :sw, :]
 92 |     else:
 93 |         stride_rate = 2.0 / 3
 94 |         stride = np.ceil(crop_size * stride_rate)
 95 |         _pad = _scale
 96 |         if short_size < crop_size:
 97 |             _pad = pad_img(_scale - mean_value) + mean_value
 98 | 
 99 |         ph, pw, pd = _pad.shape
100 |         h_grid = int(np.ceil(float(ph - crop_size) / stride)) + 1
101 |         w_grid = int(np.ceil(float(pw - crop_size) / stride)) + 1
102 |         data_scale = np.zeros((ph, pw, class_num), dtype=np.float32)
103 |         count_scale = np.zeros((ph, pw, class_num), dtype=np.float32)
104 |         for grid_yidx in xrange(0, h_grid):
105 |             for grid_xidx in xrange(0, w_grid):
106 |                 s_x = int(grid_xidx * stride)
107 |                 s_y = int(grid_yidx * stride)
108 |                 e_x = min(s_x + crop_size, pw)
109 |                 e_y = min(s_y + crop_size, ph)
110 |                 s_x = int(e_x - crop_size)
111 |                 s_y = int(e_y - crop_size)
112 |                 _sub = _pad[s_y:e_y, s_x:e_x, :]
113 |                 count_scale[s_y:e_y, s_x:e_x, :] += 1.0
114 |                 input_data = pad_img(_sub - mean_value)
115 |                 data_scale[s_y:e_y, s_x:e_x, :] += caffe_process(input_data)
116 |         score = data_scale / count_scale
117 |         score = score[:sh, :sw, :]
118 | 
119 |     return score
120 | 
121 | 
122 | def pad_img(_scale):
123 |     sh, sw, sd = _scale.shape
124 |     if sh < crop_size:
125 |         _pad = np.zeros((crop_size, sw, sd), dtype=np.float32)
126 |         _pad[:sh, :, :] = _scale
127 |         _scale = _pad
128 |     sh, sw, sd = _scale.shape
129 |     if sw < crop_size:
130 |         _pad = np.zeros((sh, crop_size, sd), dtype=np.float32)
131 |         _pad[:, :sw, :] = _scale
132 |         _scale = _pad
133 | 
134 |     return _scale
135 | 
136 | 
137 | def caffe_process(_input):
138 |     h, w, d = _input.shape
139 |     _score = np.zeros((h, w, class_num), dtype=np.float32)
140 |     if flip:
141 |         _flip = _input[:, ::-1]
142 |         _flip = _flip.transpose(2, 0, 1)
143 |         _flip = _flip.reshape((1,) + _flip.shape)
144 |         net.blobs['data'].reshape(*_flip.shape)
145 |         net.blobs['data'].data[...] = _flip / raw_scale
146 |         # net.blobs['data_dim'].data[...] = [[[h, w]]]
147 |         net.forward()
148 |         _score += net.blobs[prob_layer].data[0].transpose(1, 2, 0)[:, ::-1]
149 | 
150 |     _input = _input.transpose(2, 0, 1)
151 |     _input = _input.reshape((1,) + _input.shape)
152 |     net.blobs['data'].reshape(*_input.shape)
153 |     net.blobs['data'].data[...] = _input / raw_scale
154 |     # net.blobs['data_dim'].data[...] = [[[h, w]]]
155 |     net.forward()
156 |     _score += net.blobs[prob_layer].data[0].transpose(1, 2, 0)
157 | 
158 |     return _score / int(flip + 1)
159 | 
160 | if __name__ == '__main__':
161 |     eval_batch()
162 | 
163 | 


--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg16-5x.prototxt:
--------------------------------------------------------------------------------
  1 | input: "data"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 3
  5 |   dim: 224
  6 |   dim: 224
  7 | }
  8 | layer {
  9 |   name: "conv1_1"
 10 |   type: "Convolution"
 11 |   bottom: "data"
 12 |   top: "conv1_1"
 13 |   convolution_param {
 14 |     num_output: 24
 15 |     pad: 1
 16 |     kernel_size: 3
 17 |   }
 18 | }
 19 | layer {
 20 |   name: "relu1_1"
 21 |   type: "ReLU"
 22 |   bottom: "conv1_1"
 23 |   top: "conv1_1"
 24 | }
 25 | layer {
 26 |   name: "conv1_2"
 27 |   type: "Convolution"
 28 |   bottom: "conv1_1"
 29 |   top: "conv1_2"
 30 |   convolution_param {
 31 |     num_output: 22
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1_2"
 38 |   type: "ReLU"
 39 |   bottom: "conv1_2"
 40 |   top: "conv1_2"
 41 | }
 42 | layer {
 43 |   name: "pool1"
 44 |   type: "Pooling"
 45 |   bottom: "conv1_2"
 46 |   top: "pool1"
 47 |   pooling_param {
 48 |     pool: MAX
 49 |     kernel_size: 2
 50 |     stride: 2
 51 |   }
 52 | }
 53 | layer {
 54 |   name: "conv2_1"
 55 |   type: "Convolution"
 56 |   bottom: "pool1"
 57 |   top: "conv2_1"
 58 |   convolution_param {
 59 |     num_output: 41
 60 |     pad: 1
 61 |     kernel_size: 3
 62 |   }
 63 | }
 64 | layer {
 65 |   name: "relu2_1"
 66 |   type: "ReLU"
 67 |   bottom: "conv2_1"
 68 |   top: "conv2_1"
 69 | }
 70 | layer {
 71 |   name: "conv2_2"
 72 |   type: "Convolution"
 73 |   bottom: "conv2_1"
 74 |   top: "conv2_2"
 75 |   convolution_param {
 76 |     num_output: 51
 77 |     pad: 1
 78 |     kernel_size: 3
 79 |   }
 80 | }
 81 | layer {
 82 |   name: "relu2_2"
 83 |   type: "ReLU"
 84 |   bottom: "conv2_2"
 85 |   top: "conv2_2"
 86 | }
 87 | layer {
 88 |   name: "pool2"
 89 |   type: "Pooling"
 90 |   bottom: "conv2_2"
 91 |   top: "pool2"
 92 |   pooling_param {
 93 |     pool: MAX
 94 |     kernel_size: 2
 95 |     stride: 2
 96 |   }
 97 | }
 98 | layer {
 99 |   name: "conv3_1"
100 |   type: "Convolution"
101 |   bottom: "pool2"
102 |   top: "conv3_1"
103 |   convolution_param {
104 |     num_output: 108
105 |     pad: 1
106 |     kernel_size: 3
107 |   }
108 | }
109 | layer {
110 |   name: "relu3_1"
111 |   type: "ReLU"
112 |   bottom: "conv3_1"
113 |   top: "conv3_1"
114 | }
115 | layer {
116 |   name: "conv3_2"
117 |   type: "Convolution"
118 |   bottom: "conv3_1"
119 |   top: "conv3_2"
120 |   convolution_param {
121 |     num_output: 89
122 |     pad: 1
123 |     kernel_size: 3
124 |   }
125 | }
126 | layer {
127 |   name: "relu3_2"
128 |   type: "ReLU"
129 |   bottom: "conv3_2"
130 |   top: "conv3_2"
131 | }
132 | layer {
133 |   name: "conv3_3"
134 |   type: "Convolution"
135 |   bottom: "conv3_2"
136 |   top: "conv3_3"
137 |   convolution_param {
138 |     num_output: 111
139 |     pad: 1
140 |     kernel_size: 3
141 |   }
142 | }
143 | layer {
144 |   name: "relu3_3"
145 |   type: "ReLU"
146 |   bottom: "conv3_3"
147 |   top: "conv3_3"
148 | }
149 | layer {
150 |   name: "pool3"
151 |   type: "Pooling"
152 |   bottom: "conv3_3"
153 |   top: "pool3"
154 |   pooling_param {
155 |     pool: MAX
156 |     kernel_size: 2
157 |     stride: 2
158 |   }
159 | }
160 | layer {
161 |   name: "conv4_1"
162 |   type: "Convolution"
163 |   bottom: "pool3"
164 |   top: "conv4_1"
165 |   convolution_param {
166 |     num_output: 184
167 |     pad: 1
168 |     kernel_size: 3
169 |   }
170 | }
171 | layer {
172 |   name: "relu4_1"
173 |   type: "ReLU"
174 |   bottom: "conv4_1"
175 |   top: "conv4_1"
176 | }
177 | layer {
178 |   name: "conv4_2"
179 |   type: "Convolution"
180 |   bottom: "conv4_1"
181 |   top: "conv4_2"
182 |   convolution_param {
183 |     num_output: 276
184 |     pad: 1
185 |     kernel_size: 3
186 |   }
187 | }
188 | layer {
189 |   name: "relu4_2"
190 |   type: "ReLU"
191 |   bottom: "conv4_2"
192 |   top: "conv4_2"
193 | }
194 | layer {
195 |   name: "conv4_3"
196 |   type: "Convolution"
197 |   bottom: "conv4_2"
198 |   top: "conv4_3"
199 |   convolution_param {
200 |     num_output: 228
201 |     pad: 1
202 |     kernel_size: 3
203 |   }
204 | }
205 | layer {
206 |   name: "relu4_3"
207 |   type: "ReLU"
208 |   bottom: "conv4_3"
209 |   top: "conv4_3"
210 | }
211 | layer {
212 |   name: "pool4"
213 |   type: "Pooling"
214 |   bottom: "conv4_3"
215 |   top: "pool4"
216 |   pooling_param {
217 |     pool: MAX
218 |     kernel_size: 2
219 |     stride: 2
220 |   }
221 | }
222 | layer {
223 |   name: "conv5_1"
224 |   type: "Convolution"
225 |   bottom: "pool4"
226 |   top: "conv5_1"
227 |   convolution_param {
228 |     num_output: 512
229 |     pad: 1
230 |     kernel_size: 3
231 |   }
232 | }
233 | layer {
234 |   name: "relu5_1"
235 |   type: "ReLU"
236 |   bottom: "conv5_1"
237 |   top: "conv5_1"
238 | }
239 | layer {
240 |   name: "conv5_2"
241 |   type: "Convolution"
242 |   bottom: "conv5_1"
243 |   top: "conv5_2"
244 |   convolution_param {
245 |     num_output: 512
246 |     pad: 1
247 |     kernel_size: 3
248 |   }
249 | }
250 | layer {
251 |   name: "relu5_2"
252 |   type: "ReLU"
253 |   bottom: "conv5_2"
254 |   top: "conv5_2"
255 | }
256 | layer {
257 |   name: "conv5_3"
258 |   type: "Convolution"
259 |   bottom: "conv5_2"
260 |   top: "conv5_3"
261 |   convolution_param {
262 |     num_output: 512
263 |     pad: 1
264 |     kernel_size: 3
265 |   }
266 | }
267 | layer {
268 |   name: "relu5_3"
269 |   type: "ReLU"
270 |   bottom: "conv5_3"
271 |   top: "conv5_3"
272 | }
273 | layer {
274 |   name: "pool5"
275 |   type: "Pooling"
276 |   bottom: "conv5_3"
277 |   top: "pool5"
278 |   pooling_param {
279 |     pool: MAX
280 |     kernel_size: 2
281 |     stride: 2
282 |   }
283 | }
284 | layer {
285 |   name: "fc6"
286 |   type: "InnerProduct"
287 |   bottom: "pool5"
288 |   top: "fc6"
289 |   inner_product_param {
290 |     num_output: 4096
291 |   }
292 | }
293 | layer {
294 |   name: "relu6"
295 |   type: "ReLU"
296 |   bottom: "fc6"
297 |   top: "fc6"
298 | }
299 | layer {
300 |   name: "drop6"
301 |   type: "Dropout"
302 |   bottom: "fc6"
303 |   top: "fc6"
304 |   dropout_param {
305 |     dropout_ratio: 0.5
306 |   }
307 | }
308 | layer {
309 |   name: "fc7"
310 |   type: "InnerProduct"
311 |   bottom: "fc6"
312 |   top: "fc7"
313 |   inner_product_param {
314 |     num_output: 4096
315 |   }
316 | }
317 | layer {
318 |   name: "relu7"
319 |   type: "ReLU"
320 |   bottom: "fc7"
321 |   top: "fc7"
322 | }
323 | layer {
324 |   name: "drop7"
325 |   type: "Dropout"
326 |   bottom: "fc7"
327 |   top: "fc7"
328 |   dropout_param {
329 |     dropout_ratio: 0.5
330 |   }
331 | }
332 | layer {
333 |   name: "fc8"
334 |   type: "InnerProduct"
335 |   bottom: "fc7"
336 |   top: "fc8"
337 |   inner_product_param {
338 |     num_output: 1000
339 |   }
340 | }
341 | layer {
342 |   name: "prob"
343 |   type: "Softmax"
344 |   bottom: "fc8"
345 |   top: "prob"
346 | }
347 | 
348 | 


--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg16-dsd.prototxt:
--------------------------------------------------------------------------------
  1 | input: "data"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 3
  5 |   dim: 224
  6 |   dim: 224
  7 | }
  8 | layer {
  9 |   bottom: "data"
 10 |   top: "conv1_1"
 11 |   name: "conv1_1"
 12 |   type: "Convolution"
 13 |   convolution_param {
 14 |     num_output: 64
 15 |     pad: 1
 16 |     kernel_size: 3
 17 |   }
 18 | }
 19 | layer {
 20 |   bottom: "conv1_1"
 21 |   top: "conv1_1"
 22 |   name: "relu1_1"
 23 |   type: "ReLU"
 24 | }
 25 | layer {
 26 |   bottom: "conv1_1"
 27 |   top: "conv1_2"
 28 |   name: "conv1_2"
 29 |   type: "Convolution"
 30 |   convolution_param {
 31 |     num_output: 64
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   bottom: "conv1_2"
 38 |   top: "conv1_2"
 39 |   name: "relu1_2"
 40 |   type: "ReLU"
 41 | }
 42 | layer {
 43 |   bottom: "conv1_2"
 44 |   top: "pool1"
 45 |   name: "pool1"
 46 |   type: "Pooling"
 47 |   pooling_param {
 48 |     pool: MAX
 49 |     kernel_size: 2
 50 |     stride: 2
 51 |   }
 52 | }
 53 | layer {
 54 |   bottom: "pool1"
 55 |   top: "conv2_1"
 56 |   name: "conv2_1"
 57 |   type: "Convolution"
 58 |   convolution_param {
 59 |     num_output: 128
 60 |     pad: 1
 61 |     kernel_size: 3
 62 |   }
 63 | }
 64 | layer {
 65 |   bottom: "conv2_1"
 66 |   top: "conv2_1"
 67 |   name: "relu2_1"
 68 |   type: "ReLU"
 69 | }
 70 | layer {
 71 |   bottom: "conv2_1"
 72 |   top: "conv2_2"
 73 |   name: "conv2_2"
 74 |   type: "Convolution"
 75 |   convolution_param {
 76 |     num_output: 128
 77 |     pad: 1
 78 |     kernel_size: 3
 79 |   }
 80 | }
 81 | layer {
 82 |   bottom: "conv2_2"
 83 |   top: "conv2_2"
 84 |   name: "relu2_2"
 85 |   type: "ReLU"
 86 | }
 87 | layer {
 88 |   bottom: "conv2_2"
 89 |   top: "pool2"
 90 |   name: "pool2"
 91 |   type: "Pooling"
 92 |   pooling_param {
 93 |     pool: MAX
 94 |     kernel_size: 2
 95 |     stride: 2
 96 |   }
 97 | }
 98 | layer {
 99 |   bottom: "pool2"
100 |   top: "conv3_1"
101 |   name: "conv3_1"
102 |   type: "Convolution"
103 |   convolution_param {
104 |     num_output: 256
105 |     pad: 1
106 |     kernel_size: 3
107 |   }
108 | }
109 | layer {
110 |   bottom: "conv3_1"
111 |   top: "conv3_1"
112 |   name: "relu3_1"
113 |   type: "ReLU"
114 | }
115 | layer {
116 |   bottom: "conv3_1"
117 |   top: "conv3_2"
118 |   name: "conv3_2"
119 |   type: "Convolution"
120 |   convolution_param {
121 |     num_output: 256
122 |     pad: 1
123 |     kernel_size: 3
124 |   }
125 | }
126 | layer {
127 |   bottom: "conv3_2"
128 |   top: "conv3_2"
129 |   name: "relu3_2"
130 |   type: "ReLU"
131 | }
132 | layer {
133 |   bottom: "conv3_2"
134 |   top: "conv3_3"
135 |   name: "conv3_3"
136 |   type: "Convolution"
137 |   convolution_param {
138 |     num_output: 256
139 |     pad: 1
140 |     kernel_size: 3
141 |   }
142 | }
143 | layer {
144 |   bottom: "conv3_3"
145 |   top: "conv3_3"
146 |   name: "relu3_3"
147 |   type: "ReLU"
148 | }
149 | layer {
150 |   bottom: "conv3_3"
151 |   top: "pool3"
152 |   name: "pool3"
153 |   type: "Pooling"
154 |   pooling_param {
155 |     pool: MAX
156 |     kernel_size: 2
157 |     stride: 2
158 |   }
159 | }
160 | layer {
161 |   bottom: "pool3"
162 |   top: "conv4_1"
163 |   name: "conv4_1"
164 |   type: "Convolution"
165 |   convolution_param {
166 |     num_output: 512
167 |     pad: 1
168 |     kernel_size: 3
169 |   }
170 | }
171 | layer {
172 |   bottom: "conv4_1"
173 |   top: "conv4_1"
174 |   name: "relu4_1"
175 |   type: "ReLU"
176 | }
177 | layer {
178 |   bottom: "conv4_1"
179 |   top: "conv4_2"
180 |   name: "conv4_2"
181 |   type: "Convolution"
182 |   convolution_param {
183 |     num_output: 512
184 |     pad: 1
185 |     kernel_size: 3
186 |   }
187 | }
188 | layer {
189 |   bottom: "conv4_2"
190 |   top: "conv4_2"
191 |   name: "relu4_2"
192 |   type: "ReLU"
193 | }
194 | layer {
195 |   bottom: "conv4_2"
196 |   top: "conv4_3"
197 |   name: "conv4_3"
198 |   type: "Convolution"
199 |   convolution_param {
200 |     num_output: 512
201 |     pad: 1
202 |     kernel_size: 3
203 |   }
204 | }
205 | layer {
206 |   bottom: "conv4_3"
207 |   top: "conv4_3"
208 |   name: "relu4_3"
209 |   type: "ReLU"
210 | }
211 | layer {
212 |   bottom: "conv4_3"
213 |   top: "pool4"
214 |   name: "pool4"
215 |   type: "Pooling"
216 |   pooling_param {
217 |     pool: MAX
218 |     kernel_size: 2
219 |     stride: 2
220 |   }
221 | }
222 | layer {
223 |   bottom: "pool4"
224 |   top: "conv5_1"
225 |   name: "conv5_1"
226 |   type: "Convolution"
227 |   convolution_param {
228 |     num_output: 512
229 |     pad: 1
230 |     kernel_size: 3
231 |   }
232 | }
233 | layer {
234 |   bottom: "conv5_1"
235 |   top: "conv5_1"
236 |   name: "relu5_1"
237 |   type: "ReLU"
238 | }
239 | layer {
240 |   bottom: "conv5_1"
241 |   top: "conv5_2"
242 |   name: "conv5_2"
243 |   type: "Convolution"
244 |   convolution_param {
245 |     num_output: 512
246 |     pad: 1
247 |     kernel_size: 3
248 |   }
249 | }
250 | layer {
251 |   bottom: "conv5_2"
252 |   top: "conv5_2"
253 |   name: "relu5_2"
254 |   type: "ReLU"
255 | }
256 | layer {
257 |   bottom: "conv5_2"
258 |   top: "conv5_3"
259 |   name: "conv5_3"
260 |   type: "Convolution"
261 |   convolution_param {
262 |     num_output: 512
263 |     pad: 1
264 |     kernel_size: 3
265 |   }
266 | }
267 | layer {
268 |   bottom: "conv5_3"
269 |   top: "conv5_3"
270 |   name: "relu5_3"
271 |   type: "ReLU"
272 | }
273 | layer {
274 |   bottom: "conv5_3"
275 |   top: "pool5"
276 |   name: "pool5"
277 |   type: "Pooling"
278 |   pooling_param {
279 |     pool: MAX
280 |     kernel_size: 2
281 |     stride: 2
282 |   }
283 | }
284 | layer {
285 |   bottom: "pool5"
286 |   top: "fc6"
287 |   name: "fc6"
288 |   type: "InnerProduct"
289 |   inner_product_param {
290 |     num_output: 4096
291 |   }
292 | }
293 | layer {
294 |   bottom: "fc6"
295 |   top: "fc6"
296 |   name: "relu6"
297 |   type: "ReLU"
298 | }
299 | layer {
300 |   bottom: "fc6"
301 |   top: "fc6"
302 |   name: "drop6"
303 |   type: "Dropout"
304 |   dropout_param {
305 |     dropout_ratio: 0.5
306 |   }
307 | }
308 | layer {
309 |   bottom: "fc6"
310 |   top: "fc7"
311 |   name: "fc7"
312 |   type: "InnerProduct"
313 |   inner_product_param {
314 |     num_output: 4096
315 |   }
316 | }
317 | layer {
318 |   bottom: "fc7"
319 |   top: "fc7"
320 |   name: "relu7"
321 |   type: "ReLU"
322 | }
323 | layer {
324 |   bottom: "fc7"
325 |   top: "fc7"
326 |   name: "drop7"
327 |   type: "Dropout"
328 |   dropout_param {
329 |     dropout_ratio: 0.5
330 |   }
331 | }
332 | layer {
333 |   bottom: "fc7"
334 |   top: "fc8"
335 |   name: "fc8"
336 |   type: "InnerProduct"
337 |   inner_product_param {
338 |     num_output: 1000
339 |   }
340 | }
341 | layer {
342 |   name: "prob"
343 |   type: "Softmax"
344 |   bottom: "fc8"
345 |   top: "prob"
346 | }
347 | 
348 | 


--------------------------------------------------------------------------------
/det/rfcn/tools/score.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import xml.etree.ElementTree as ET
  3 | 
  4 | gt_root = '~/Database/VOC_PASCAL/VOC2007_test/Annotations/'
  5 | val_file = '~/2007test.txt'
  6 | det_root = '~/predict_ss/'
  7 | 
  8 | _classes = ('__background__',  # always index 0
  9 |             'aeroplane', 'bicycle', 'bird', 'boat',
 10 |             'bottle', 'bus', 'car', 'cat', 'chair',
 11 |             'cow', 'diningtable', 'dog', 'horse',
 12 |             'motorbike', 'person', 'pottedplant',
 13 |             'sheep', 'sofa', 'train', 'tvmonitor')
 14 | 
 15 | def parse_rec(filename):
 16 |     """ Parse a PASCAL VOC xml file """
 17 |     tree = ET.parse(filename)
 18 |     objects = []
 19 |     for obj in tree.findall('object'):
 20 |         obj_struct = {}
 21 |         obj_struct['name'] = obj.find('name').text
 22 |         obj_struct['pose'] = obj.find('pose').text
 23 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 24 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 25 |         bbox = obj.find('bndbox')
 26 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 27 |                               int(bbox.find('ymin').text),
 28 |                               int(bbox.find('xmax').text),
 29 |                               int(bbox.find('ymax').text)]
 30 |         objects.append(obj_struct)
 31 | 
 32 |     return objects
 33 | 
 34 | def voc_ap(rec, prec, use_07_metric=False):
 35 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 36 |     Compute VOC AP given precision and recall.
 37 |     If use_07_metric is true, uses the
 38 |     VOC 07 11 point method (default:False).
 39 |     """
 40 |     if use_07_metric:
 41 |         # 11 point metric
 42 |         ap = 0.
 43 |         for t in np.arange(0., 1.1, 0.1):
 44 |             if np.sum(rec >= t) == 0:
 45 |                 p = 0
 46 |             else:
 47 |                 p = np.max(prec[rec >= t])
 48 |             ap = ap + p / 11.
 49 |     else:
 50 |         # correct AP calculation
 51 |         # first append sentinel values at the end
 52 |         mrec = np.concatenate(([0.], rec, [1.]))
 53 |         mpre = np.concatenate(([0.], prec, [0.]))
 54 | 
 55 |         # compute the precision envelope
 56 |         for i in range(mpre.size - 1, 0, -1):
 57 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 58 | 
 59 |         # to calculate area under PR curve, look for points
 60 |         # where X axis (recall) changes value
 61 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 62 | 
 63 |         # and sum (\Delta recall) * prec
 64 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 65 |     return ap
 66 | 
 67 | def voc_eval(filename, classname, ovthresh=0.5, use_07_metric=False):
 68 | 
 69 |     eval_images = []
 70 |     f = open(val_file, 'r')
 71 |     for i in f:
 72 |         eval_images.append(i.strip())
 73 | 
 74 |     recs = {}
 75 |     for imagename in eval_images:
 76 |         recs[imagename] = parse_rec(gt_root + imagename + '.xml')
 77 | 
 78 |     class_recs = {}
 79 | 
 80 |     npos = 0
 81 |     for imagename in eval_images:
 82 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
 83 |         bbox = np.array([x['bbox'] for x in R])
 84 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
 85 |         det = [False] * len(R)
 86 |         npos = npos + sum(~difficult)
 87 |         class_recs[imagename] = {'bbox': bbox,
 88 |                                  'difficult': difficult,
 89 |                                  'det': det}
 90 |     detfile = det_root + 'comp4' + '_det' + '_test_' + classname + '.txt'
 91 |     with open(detfile, 'r') as f:
 92 |         lines = f.readlines()
 93 |     splitlines = [x.strip().split(' ') for x in lines]
 94 |     image_ids = [x[0] for x in splitlines]
 95 |     confidence = np.array([float(x[1]) for x in splitlines])
 96 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
 97 | 
 98 |     # sort by confidence
 99 |     sorted_ind = np.argsort(-confidence)
100 |     sorted_scores = np.sort(-confidence)
101 |     BB = BB[sorted_ind, :]
102 |     image_ids = [image_ids[x] for x in sorted_ind]
103 | 
104 |     # go down dets and mark TPs and FPs
105 |     nd = len(image_ids)
106 |     tp = np.zeros(nd)
107 |     fp = np.zeros(nd)
108 |     for d in range(nd):
109 |         R = class_recs[image_ids[d]]
110 |         bb = BB[d, :].astype(float)
111 |         ovmax = -np.inf
112 |         BBGT = R['bbox'].astype(float)
113 | 
114 |         if BBGT.size > 0:
115 |             # compute overlaps
116 |             # intersection
117 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
118 |             iymin = np.maximum(BBGT[:, 1], bb[1])
119 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
120 |             iymax = np.minimum(BBGT[:, 3], bb[3])
121 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
122 |             ih = np.maximum(iymax - iymin + 1., 0.)
123 |             inters = iw * ih
124 | 
125 |             # union
126 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
127 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
128 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
129 | 
130 |             overlaps = inters / uni
131 |             ovmax = np.max(overlaps)
132 |             jmax = np.argmax(overlaps)
133 | 
134 |         if ovmax > ovthresh:
135 |             if not R['difficult'][jmax]:
136 |                 if not R['det'][jmax]:
137 |                     tp[d] = 1.
138 |                     R['det'][jmax] = 1
139 |                 else:
140 |                     fp[d] = 1.
141 |         else:
142 |             fp[d] = 1.
143 | 
144 |     # compute precision recall
145 |     fp = np.cumsum(fp)
146 |     tp = np.cumsum(tp)
147 |     rec = tp / float(npos)
148 |     # avoid divide by zero in case the first detection matches a difficult
149 |     # ground truth
150 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
151 |     ap = voc_ap(rec, prec, use_07_metric)
152 | 
153 |     return rec, prec, ap
154 | 
155 | def compute_ap():
156 | 
157 |     aps = []
158 |     for i, cls in enumerate(_classes):
159 |         if cls == '__background__':
160 |             continue
161 |         filename = det_root + 'comp4' + '_det' + '_test_' + cls + '.txt'
162 |         rec, prec, ap = voc_eval(filename, cls, ovthresh=0.5, use_07_metric=True)
163 | 
164 |         aps += [ap]
165 |         print('AP for {} = {:.4f}'.format(cls, ap))
166 | 
167 |     print('Mean AP = {:.4f}'.format(np.mean(aps)))
168 |     print('~~~~~~~~')
169 |     print('Results:')
170 | 
171 |     for ap in aps:
172 |         print('{:.3f}'.format(ap))
173 |     print('{:.3f}'.format(np.mean(aps)))
174 |     print('~~~~~~~~')
175 | 
176 | if __name__ == '__main__':
177 |     compute_ap()
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/tools/score.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import xml.etree.ElementTree as ET
  3 | 
  4 | gt_root = '~/Database/VOC_PASCAL/VOC2007_test/Annotations/'
  5 | val_file = '~/2007test.txt'
  6 | det_root = '~/predict_ss/'
  7 | 
  8 | _classes = ('__background__',  # always index 0
  9 |             'aeroplane', 'bicycle', 'bird', 'boat',
 10 |             'bottle', 'bus', 'car', 'cat', 'chair',
 11 |             'cow', 'diningtable', 'dog', 'horse',
 12 |             'motorbike', 'person', 'pottedplant',
 13 |             'sheep', 'sofa', 'train', 'tvmonitor')
 14 | 
 15 | def parse_rec(filename):
 16 |     """ Parse a PASCAL VOC xml file """
 17 |     tree = ET.parse(filename)
 18 |     objects = []
 19 |     for obj in tree.findall('object'):
 20 |         obj_struct = {}
 21 |         obj_struct['name'] = obj.find('name').text
 22 |         obj_struct['pose'] = obj.find('pose').text
 23 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 24 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 25 |         bbox = obj.find('bndbox')
 26 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 27 |                               int(bbox.find('ymin').text),
 28 |                               int(bbox.find('xmax').text),
 29 |                               int(bbox.find('ymax').text)]
 30 |         objects.append(obj_struct)
 31 | 
 32 |     return objects
 33 | 
 34 | def voc_ap(rec, prec, use_07_metric=False):
 35 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 36 |     Compute VOC AP given precision and recall.
 37 |     If use_07_metric is true, uses the
 38 |     VOC 07 11 point method (default:False).
 39 |     """
 40 |     if use_07_metric:
 41 |         # 11 point metric
 42 |         ap = 0.
 43 |         for t in np.arange(0., 1.1, 0.1):
 44 |             if np.sum(rec >= t) == 0:
 45 |                 p = 0
 46 |             else:
 47 |                 p = np.max(prec[rec >= t])
 48 |             ap = ap + p / 11.
 49 |     else:
 50 |         # correct AP calculation
 51 |         # first append sentinel values at the end
 52 |         mrec = np.concatenate(([0.], rec, [1.]))
 53 |         mpre = np.concatenate(([0.], prec, [0.]))
 54 | 
 55 |         # compute the precision envelope
 56 |         for i in range(mpre.size - 1, 0, -1):
 57 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 58 | 
 59 |         # to calculate area under PR curve, look for points
 60 |         # where X axis (recall) changes value
 61 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 62 | 
 63 |         # and sum (\Delta recall) * prec
 64 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 65 |     return ap
 66 | 
 67 | def voc_eval(filename, classname, ovthresh=0.5, use_07_metric=False):
 68 | 
 69 |     eval_images = []
 70 |     f = open(val_file, 'r')
 71 |     for i in f:
 72 |         eval_images.append(i.strip())
 73 | 
 74 |     recs = {}
 75 |     for imagename in eval_images:
 76 |         recs[imagename] = parse_rec(gt_root + imagename + '.xml')
 77 | 
 78 |     class_recs = {}
 79 | 
 80 |     npos = 0
 81 |     for imagename in eval_images:
 82 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
 83 |         bbox = np.array([x['bbox'] for x in R])
 84 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
 85 |         det = [False] * len(R)
 86 |         npos = npos + sum(~difficult)
 87 |         class_recs[imagename] = {'bbox': bbox,
 88 |                                  'difficult': difficult,
 89 |                                  'det': det}
 90 |     detfile = det_root + 'comp4' + '_det' + '_test_' + classname + '.txt'
 91 |     with open(detfile, 'r') as f:
 92 |         lines = f.readlines()
 93 |     splitlines = [x.strip().split(' ') for x in lines]
 94 |     image_ids = [x[0] for x in splitlines]
 95 |     confidence = np.array([float(x[1]) for x in splitlines])
 96 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
 97 | 
 98 |     # sort by confidence
 99 |     sorted_ind = np.argsort(-confidence)
100 |     sorted_scores = np.sort(-confidence)
101 |     BB = BB[sorted_ind, :]
102 |     image_ids = [image_ids[x] for x in sorted_ind]
103 | 
104 |     # go down dets and mark TPs and FPs
105 |     nd = len(image_ids)
106 |     tp = np.zeros(nd)
107 |     fp = np.zeros(nd)
108 |     for d in range(nd):
109 |         R = class_recs[image_ids[d]]
110 |         bb = BB[d, :].astype(float)
111 |         ovmax = -np.inf
112 |         BBGT = R['bbox'].astype(float)
113 | 
114 |         if BBGT.size > 0:
115 |             # compute overlaps
116 |             # intersection
117 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
118 |             iymin = np.maximum(BBGT[:, 1], bb[1])
119 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
120 |             iymax = np.minimum(BBGT[:, 3], bb[3])
121 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
122 |             ih = np.maximum(iymax - iymin + 1., 0.)
123 |             inters = iw * ih
124 | 
125 |             # union
126 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
127 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
128 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
129 | 
130 |             overlaps = inters / uni
131 |             ovmax = np.max(overlaps)
132 |             jmax = np.argmax(overlaps)
133 | 
134 |         if ovmax > ovthresh:
135 |             if not R['difficult'][jmax]:
136 |                 if not R['det'][jmax]:
137 |                     tp[d] = 1.
138 |                     R['det'][jmax] = 1
139 |                 else:
140 |                     fp[d] = 1.
141 |         else:
142 |             fp[d] = 1.
143 | 
144 |     # compute precision recall
145 |     fp = np.cumsum(fp)
146 |     tp = np.cumsum(tp)
147 |     rec = tp / float(npos)
148 |     # avoid divide by zero in case the first detection matches a difficult
149 |     # ground truth
150 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
151 |     ap = voc_ap(rec, prec, use_07_metric)
152 | 
153 |     return rec, prec, ap
154 | 
155 | def compute_ap():
156 | 
157 |     aps = []
158 |     for i, cls in enumerate(_classes):
159 |         if cls == '__background__':
160 |             continue
161 |         filename = det_root + 'comp4' + '_det' + '_test_' + cls + '.txt'
162 |         rec, prec, ap = voc_eval(filename, cls, ovthresh=0.5, use_07_metric=True)
163 | 
164 |         aps += [ap]
165 |         print('AP for {} = {:.4f}'.format(cls, ap))
166 | 
167 |     print('Mean AP = {:.4f}'.format(np.mean(aps)))
168 |     print('~~~~~~~~')
169 |     print('Results:')
170 | 
171 |     for ap in aps:
172 |         print('{:.3f}'.format(ap))
173 |     print('{:.3f}'.format(np.mean(aps)))
174 |     print('~~~~~~~~')
175 | 
176 | if __name__ == '__main__':
177 |     compute_ap()
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 


--------------------------------------------------------------------------------
/cls/evaluation_cls.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append('~/caffe-master-0116/python')
  4 | 
  5 | import numpy as np
  6 | import caffe
  7 | import cv2
  8 | import datetime
  9 | 
 10 | gpu_mode = True
 11 | gpu_id = 0
 12 | data_root = '~/Database/ILSVRC2012'
 13 | val_file = 'ILSVRC2012_val.txt'
 14 | save_log = 'log{}.txt'.format(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
 15 | model_weights = 'resnet-v2/resnet101_v2.caffemodel'
 16 | model_deploy = 'resnet-v2/deploy_resnet101_v2.prototxt'
 17 | prob_layer = 'prob'
 18 | class_num = 1000
 19 | base_size = 256 # short size
 20 | crop_size = 224
 21 | # mean_value = np.array([128.0, 128.0, 128.0])  # BGR
 22 | mean_value = np.array([102.9801, 115.9465, 122.7717])  # BGR
 23 | # std = np.array([128.0, 128.0, 128.0])  # BGR
 24 | std = np.array([1.0, 1.0, 1.0])  # BGR
 25 | crop_num = 1  # 1 and others for center(single)-crop, 12 for mirror(12)-crop, 144 for multi(144)-crop
 26 | batch_size = 1
 27 | top_k = (1, 5)
 28 | 
 29 | if gpu_mode:
 30 |     caffe.set_mode_gpu()
 31 |     caffe.set_device(gpu_id)
 32 | else:
 33 |     caffe.set_mode_cpu()
 34 | net = caffe.Net(model_deploy, model_weights, caffe.TEST)
 35 | 
 36 | 
 37 | def eval_batch():
 38 |     eval_images = []
 39 |     ground_truth = []
 40 |     f = open(val_file, 'r')
 41 |     for i in f:
 42 |         eval_images.append(i.strip().split(' ')[0])
 43 |         ground_truth.append(int(i.strip().split(' ')[1]))
 44 |     f.close()
 45 | 
 46 |     skip_num = 0
 47 |     eval_len = len(eval_images)
 48 |     accuracy = np.zeros(len(top_k))
 49 |     # eval_len = 100
 50 |     start_time = datetime.datetime.now()
 51 |     for i in xrange(eval_len - skip_num):
 52 |         _img = cv2.imread(data_root + eval_images[i + skip_num])
 53 |         _img = cv2.resize(_img, (int(_img.shape[1] * base_size / min(_img.shape[:2])),
 54 |                                  int(_img.shape[0] * base_size / min(_img.shape[:2])))
 55 |                           )
 56 |         _img = image_preprocess(_img)
 57 | 
 58 |         score_vec = np.zeros(class_num, dtype=np.float32)
 59 |         crops = []
 60 |         if crop_num == 1:
 61 |             crops.append(center_crop(_img))
 62 |         elif crop_num == 12:
 63 |             crops.extend(mirror_crop(_img))
 64 |         elif crop_num == 144:
 65 |             crops.extend(multi_crop(_img))
 66 |         else:
 67 |             crops.append(center_crop(_img))
 68 | 
 69 |         iter_num = int(len(crops) / batch_size)
 70 |         for j in xrange(iter_num):
 71 |             score_vec += caffe_process(np.asarray(crops, dtype=np.float32)[j*batch_size:(j+1)*batch_size])
 72 |         score_index = (-score_vec / len(crops)).argsort()
 73 | 
 74 |         print 'Testing image: ' + str(i + 1) + '/' + str(eval_len - skip_num) + '  ' + str(score_index[0]) + '/' + str(
 75 |             ground_truth[i + skip_num]),
 76 |         for j in xrange(len(top_k)):
 77 |             if ground_truth[i + skip_num] in score_index[:top_k[j]]:
 78 |                 accuracy[j] += 1
 79 |             tmp_acc = float(accuracy[j]) / float(i + 1)
 80 |             if top_k[j] == 1:
 81 |                 print '\ttop_' + str(top_k[j]) + ':' + str(tmp_acc),
 82 |             else:
 83 |                 print 'top_' + str(top_k[j]) + ':' + str(tmp_acc)
 84 | 
 85 |     end_time = datetime.datetime.now()
 86 |     w = open(save_log, 'w')
 87 |     s1 = 'Evaluation process ends at: {}. \nTime cost is: {}. '.format(str(end_time), str(end_time - start_time))
 88 |     s2 = '\nThe model is: {}. \nThe val file is: {}. \n{} images has been tested, crop_num is: {}, base_size is: {}, ' \
 89 |          'crop_size is: {}.'.format(model_weights, val_file, str(eval_len), str(crop_num), str(base_size), str(crop_size))
 90 |     s3 = '\nThe mean value is: ({}, {}, {}).'.format(str(mean_value[0]), str(mean_value[1]), str(mean_value[2]))
 91 |     s4 = ''
 92 |     for i in xrange(len(top_k)):
 93 |         _acc = float(accuracy[i]) / float(eval_len)
 94 |         s4 += '\nAccuracy of top_{} is: {}; correct num is {}.'.format(str(top_k[i]), str(_acc), str(int(accuracy[i])))
 95 |     print s1, s2, s3, s4
 96 |     w.write(s1 + s2 + s3 + s4)
 97 |     w.close()
 98 | 
 99 | 
100 | def image_preprocess(img):
101 |     b, g, r = cv2.split(img)
102 |     return cv2.merge([(b-mean_value[0])/std[0], (g-mean_value[1])/std[1], (r-mean_value[2])/std[2]])
103 | 
104 | 
105 | def center_crop(img): # single crop
106 |     short_edge = min(img.shape[:2])
107 |     if short_edge < crop_size:
108 |         return
109 |     yy = int((img.shape[0] - crop_size) / 2)
110 |     xx = int((img.shape[1] - crop_size) / 2)
111 |     return img[yy: yy + crop_size, xx: xx + crop_size]
112 | 
113 | 
114 | def over_sample(img):  # 12 crops of image
115 |     short_edge = min(img.shape[:2])
116 |     if short_edge < crop_size:
117 |         return
118 |     yy = int((img.shape[0] - crop_size) / 2)
119 |     xx = int((img.shape[1] - crop_size) / 2)
120 |     sample_list = [img[:crop_size, :crop_size], img[-crop_size:, -crop_size:], img[:crop_size, -crop_size:],
121 |                    img[-crop_size:, :crop_size], img[yy: yy + crop_size, xx: xx + crop_size],
122 |                    cv2.resize(img, (crop_size, crop_size))]
123 |     return sample_list
124 | 
125 | 
126 | def mirror_crop(img):  # 12*len(size_list) crops
127 |     crop_list = []
128 |     img_resize = cv2.resize(img, (base_size, base_size))
129 |     mirror = img_resize[:, ::-1]
130 |     crop_list.extend(over_sample(img_resize))
131 |     crop_list.extend(over_sample(mirror))
132 |     return crop_list
133 | 
134 | 
135 | def multi_crop(img):  # 144(12*12) crops
136 |     crop_list = []
137 |     size_list = [256, 288, 320, 352]  # crop_size: 224
138 |     # size_list = [270, 300, 330, 360]  # crop_size: 235
139 |     # size_list = [320, 352, 384, 416]  # crop_size: 299
140 |     # size_list = [352, 384, 416, 448]  # crop_size: 320
141 |     short_edge = min(img.shape[:2])
142 |     for i in size_list:
143 |         img_resize = cv2.resize(img, (img.shape[1] * i / short_edge, img.shape[0] * i / short_edge))
144 |         yy = int((img_resize.shape[0] - i) / 2)
145 |         xx = int((img_resize.shape[1] - i) / 2)
146 |         for j in xrange(3):
147 |             left_center_right = img_resize[yy * j: yy * j + i, xx * j: xx * j + i]
148 |             mirror = left_center_right[:, ::-1]
149 |             crop_list.extend(over_sample(left_center_right))
150 |             crop_list.extend(over_sample(mirror))
151 |     return crop_list
152 | 
153 | 
154 | def caffe_process(_input):
155 |     _input = _input.transpose(0, 3, 1, 2)
156 |     net.blobs['data'].reshape(*_input.shape)
157 |     net.blobs['data'].data[...] = _input
158 |     net.forward()
159 | 
160 |     return np.sum(net.blobs[prob_layer].data, axis=0)
161 | 
162 | 
163 | if __name__ == '__main__':
164 |     eval_batch()
165 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/mobilenet/rpn_rcnn_deploys/rcnn_deploy_faster_voc_mobilenet-dw.prototxt:
--------------------------------------------------------------------------------
  1 | input: "conv5_5/sep"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 512
  5 |   dim: 40
  6 |   dim: 40
  7 | }
  8 | 
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 300
 12 |   dim: 5
 13 | }
 14 | 
 15 | #============== RCNN ===============
 16 | layer {
 17 |   name: "roi_pool"
 18 |   type: "ROIPooling"
 19 |   bottom: "conv5_5/sep"
 20 |   bottom: "rois"
 21 |   top: "roi_pool"
 22 |   roi_pooling_param {
 23 |     pooled_w: 14
 24 |     pooled_h: 14
 25 |     spatial_scale: 0.062500
 26 |   }
 27 | }
 28 | 
 29 | layer {
 30 |   name: "conv5_6/dw"
 31 |   type: "ConvolutionDepthwise"
 32 |   bottom: "roi_pool"
 33 |   top: "conv5_6/dw"
 34 |   param {
 35 |     lr_mult: 1
 36 |     decay_mult: 1
 37 |   }
 38 |   convolution_param {
 39 |     num_output: 512
 40 |     bias_term: false
 41 |     pad: 1
 42 |     kernel_size: 3
 43 |     group: 512
 44 |     #engine: CAFFE
 45 |     stride: 2
 46 |     weight_filler {
 47 |       type: "msra"
 48 |     }
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "conv5_6/dw/bn"
 53 |   type: "BatchNorm"
 54 |   bottom: "conv5_6/dw"
 55 |   top: "conv5_6/dw"
 56 |   param {
 57 |     lr_mult: 0
 58 |     decay_mult: 0
 59 |   }
 60 |   param {
 61 |     lr_mult: 0
 62 |     decay_mult: 0
 63 |   }
 64 |   param {
 65 |     lr_mult: 0
 66 |     decay_mult: 0
 67 |   }
 68 | }
 69 | layer {
 70 |   name: "conv5_6/dw/scale"
 71 |   type: "Scale"
 72 |   bottom: "conv5_6/dw"
 73 |   top: "conv5_6/dw"
 74 |   scale_param {
 75 |     filler {
 76 |       value: 1
 77 |     }
 78 |     bias_term: true
 79 |     bias_filler {
 80 |       value: 0
 81 |     }
 82 |   }
 83 |   param {
 84 |     lr_mult: 0.0
 85 |     decay_mult: 0.0
 86 |   }
 87 |   param {
 88 |     lr_mult: 0.0
 89 |     decay_mult: 0.0
 90 |   }
 91 | }
 92 | layer {
 93 |   name: "relu5_6/dw"
 94 |   type: "ReLU"
 95 |   bottom: "conv5_6/dw"
 96 |   top: "conv5_6/dw"
 97 | }
 98 | layer {
 99 |   name: "conv5_6/sep"
100 |   type: "Convolution"
101 |   bottom: "conv5_6/dw"
102 |   top: "conv5_6/sep"
103 |   param {
104 |     lr_mult: 1
105 |     decay_mult: 1
106 |   }
107 |   convolution_param {
108 |     num_output: 1024
109 |     bias_term: false
110 |     pad: 0
111 |     kernel_size: 1
112 |     stride: 1
113 |     weight_filler {
114 |       type: "msra"
115 |     }
116 |   }
117 | }
118 | layer {
119 |   name: "conv5_6/sep/bn"
120 |   type: "BatchNorm"
121 |   bottom: "conv5_6/sep"
122 |   top: "conv5_6/sep"
123 |   param {
124 |     lr_mult: 0
125 |     decay_mult: 0
126 |   }
127 |   param {
128 |     lr_mult: 0
129 |     decay_mult: 0
130 |   }
131 |   param {
132 |     lr_mult: 0
133 |     decay_mult: 0
134 |   }
135 | }
136 | layer {
137 |   name: "conv5_6/sep/scale"
138 |   type: "Scale"
139 |   bottom: "conv5_6/sep"
140 |   top: "conv5_6/sep"
141 |   scale_param {
142 |     filler {
143 |       value: 1
144 |     }
145 |     bias_term: true
146 |     bias_filler {
147 |       value: 0
148 |     }
149 |   }
150 |   param {
151 |     lr_mult: 0.0
152 |     decay_mult: 0.0
153 |   }
154 |   param {
155 |     lr_mult: 0.0
156 |     decay_mult: 0.0
157 |   }
158 | }
159 | layer {
160 |   name: "relu5_6/sep"
161 |   type: "ReLU"
162 |   bottom: "conv5_6/sep"
163 |   top: "conv5_6/sep"
164 | }
165 | layer {
166 |   name: "conv6/dw"
167 |   type: "ConvolutionDepthwise"
168 |   bottom: "conv5_6/sep"
169 |   top: "conv6/dw"
170 |   param {
171 |     lr_mult: 1
172 |     decay_mult: 1
173 |   }
174 |   convolution_param {
175 |     num_output: 1024
176 |     bias_term: false
177 |     pad: 1
178 |     kernel_size: 3
179 |     group: 1024
180 |     #engine: CAFFE
181 |     stride: 1
182 |     weight_filler {
183 |       type: "msra"
184 |     }
185 |   }
186 | }
187 | layer {
188 |   name: "conv6/dw/bn"
189 |   type: "BatchNorm"
190 |   bottom: "conv6/dw"
191 |   top: "conv6/dw"
192 |   param {
193 |     lr_mult: 0
194 |     decay_mult: 0
195 |   }
196 |   param {
197 |     lr_mult: 0
198 |     decay_mult: 0
199 |   }
200 |   param {
201 |     lr_mult: 0
202 |     decay_mult: 0
203 |   }
204 | }
205 | layer {
206 |   name: "conv6/dw/scale"
207 |   type: "Scale"
208 |   bottom: "conv6/dw"
209 |   top: "conv6/dw"
210 |   scale_param {
211 |     filler {
212 |       value: 1
213 |     }
214 |     bias_term: true
215 |     bias_filler {
216 |       value: 0
217 |     }
218 |   }
219 |   param {
220 |     lr_mult: 0.0
221 |     decay_mult: 0.0
222 |   }
223 |   param {
224 |     lr_mult: 0.0
225 |     decay_mult: 0.0
226 |   }
227 | }
228 | layer {
229 |   name: "relu6/dw"
230 |   type: "ReLU"
231 |   bottom: "conv6/dw"
232 |   top: "conv6/dw"
233 | }
234 | layer {
235 |   name: "conv6/sep"
236 |   type: "Convolution"
237 |   bottom: "conv6/dw"
238 |   top: "conv6/sep"
239 |   param {
240 |     lr_mult: 1
241 |     decay_mult: 1
242 |   }
243 |   convolution_param {
244 |     num_output: 1024
245 |     bias_term: false
246 |     pad: 0
247 |     kernel_size: 1
248 |     stride: 1
249 |     weight_filler {
250 |       type: "msra"
251 |     }
252 |   }
253 | }
254 | layer {
255 |   name: "conv6/sep/bn"
256 |   type: "BatchNorm"
257 |   bottom: "conv6/sep"
258 |   top: "conv6/sep"
259 |   param {
260 |     lr_mult: 0
261 |     decay_mult: 0
262 |   }
263 |   param {
264 |     lr_mult: 0
265 |     decay_mult: 0
266 |   }
267 |   param {
268 |     lr_mult: 0
269 |     decay_mult: 0
270 |   }
271 | }
272 | layer {
273 |   name: "conv6/sep/scale"
274 |   type: "Scale"
275 |   bottom: "conv6/sep"
276 |   top: "conv6/sep"
277 |   scale_param {
278 |     filler {
279 |       value: 1
280 |     }
281 |     bias_term: true
282 |     bias_filler {
283 |       value: 0
284 |     }
285 |   }
286 |   param {
287 |     lr_mult: 0.0
288 |     decay_mult: 0.0
289 |   }
290 |   param {
291 |     lr_mult: 0.0
292 |     decay_mult: 0.0
293 |   }
294 | }
295 | layer {
296 |   name: "relu6/sep"
297 |   type: "ReLU"
298 |   bottom: "conv6/sep"
299 |   top: "conv6/sep"
300 | }
301 | layer {
302 |   name: "pool6"
303 |   type: "Pooling"
304 |   bottom: "conv6/sep"
305 |   top: "pool6"
306 |   pooling_param {
307 |     pool: AVE
308 |     global_pooling: true
309 |   }
310 | }
311 | layer {
312 |   name: "cls_score"
313 |   type: "InnerProduct"
314 |   bottom: "pool6"
315 |   top: "cls_score"
316 |   param {
317 |     lr_mult: 1
318 |     decay_mult: 1
319 |   }
320 |   param {
321 |     lr_mult: 2
322 |     decay_mult: 0
323 |   }
324 |   inner_product_param {
325 |     num_output: 21
326 |     weight_filler {
327 |       type: "msra"
328 |       std: 0.01
329 |     }
330 |     bias_filler {
331 |       type: "constant"
332 |       value: 0
333 |     }
334 |   }
335 | }
336 | layer {
337 |   name: "bbox_pred"
338 |   type: "InnerProduct"
339 |   bottom: "pool6"
340 |   top: "bbox_pred"
341 |   param {
342 |     lr_mult: 1
343 |     decay_mult: 1
344 |   }
345 |   param {
346 |     lr_mult: 2
347 |     decay_mult: 0
348 |   }
349 |   inner_product_param {
350 |     num_output: 84
351 |     weight_filler {
352 |       type: "msra"
353 |       std: 0.01
354 |     }
355 |     bias_filler {
356 |       type: "constant"
357 |       value: 0
358 |     }
359 |   }
360 | }
361 | layer {
362 |   name: "cls_prob"
363 |   type: "Softmax"
364 |   bottom: "cls_score"
365 |   top: "cls_prob"
366 | }
367 | 
368 | 


--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg16-pytorch.prototxt:
--------------------------------------------------------------------------------
  1 | input: "data"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 3
  5 |   dim: 224 
  6 |   dim: 224
  7 | }
  8 | 
  9 | layer {
 10 |   name: "conv1_1"
 11 |   type: "Convolution"
 12 |   bottom: "data"
 13 |   top: "conv1_1"
 14 |   convolution_param {
 15 |     bias_term: true
 16 |     num_output: 64
 17 |     pad: 1
 18 |     kernel_size: 3
 19 |     stride: 1
 20 |   }
 21 | }
 22 | 
 23 | layer {
 24 |   name: "relu1_1"
 25 |   type: "ReLU"
 26 |   bottom: "conv1_1"
 27 |   top: "conv1_1"
 28 | }
 29 | 
 30 | layer {
 31 |   name: "conv1_2"
 32 |   type: "Convolution"
 33 |   bottom: "conv1_1"
 34 |   top: "conv1_2"
 35 |   convolution_param {
 36 |     bias_term: true
 37 |     num_output: 64
 38 |     pad: 1
 39 |     kernel_size: 3
 40 |     stride: 1
 41 |   }
 42 | }
 43 | 
 44 | layer {
 45 |   name: "relu1_2"
 46 |   type: "ReLU"
 47 |   bottom: "conv1_2"
 48 |   top: "conv1_2"
 49 | }
 50 | 
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "conv1_2"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 2
 59 |     stride: 2
 60 |   }
 61 | }
 62 | 
 63 | layer {
 64 |   name: "conv2_1"
 65 |   type: "Convolution"
 66 |   bottom: "pool1"
 67 |   top: "conv2_1"
 68 |   convolution_param {
 69 |     bias_term: true
 70 |     num_output: 128
 71 |     pad: 1
 72 |     kernel_size: 3
 73 |     stride: 1
 74 |   }
 75 | }
 76 | 
 77 | layer {
 78 |   name: "relu2_1"
 79 |   type: "ReLU"
 80 |   bottom: "conv2_1"
 81 |   top: "conv2_1"
 82 | }
 83 | 
 84 | layer {
 85 |   name: "conv2_2"
 86 |   type: "Convolution"
 87 |   bottom: "conv2_1"
 88 |   top: "conv2_2"
 89 |   convolution_param {
 90 |     bias_term: true
 91 |     num_output: 128
 92 |     pad: 1
 93 |     kernel_size: 3
 94 |     stride: 1
 95 |   }
 96 | }
 97 | 
 98 | layer {
 99 |   name: "relu2_2"
100 |   type: "ReLU"
101 |   bottom: "conv2_2"
102 |   top: "conv2_2"
103 | }
104 | 
105 | layer {
106 |   name: "pool2"
107 |   type: "Pooling"
108 |   bottom: "conv2_2"
109 |   top: "pool2"
110 |   pooling_param {
111 |     pool: MAX
112 |     kernel_size: 2
113 |     stride: 2
114 |   }
115 | }
116 | 
117 | layer {
118 |   name: "conv3_1"
119 |   type: "Convolution"
120 |   bottom: "pool2"
121 |   top: "conv3_1"
122 |   convolution_param {
123 |     bias_term: true
124 |     num_output: 256
125 |     pad: 1
126 |     kernel_size: 3
127 |     stride: 1
128 |   }
129 | }
130 | 
131 | layer {
132 |   name: "relu3_1"
133 |   type: "ReLU"
134 |   bottom: "conv3_1"
135 |   top: "conv3_1"
136 | }
137 | 
138 | layer {
139 |   name: "conv3_2"
140 |   type: "Convolution"
141 |   bottom: "conv3_1"
142 |   top: "conv3_2"
143 |   convolution_param {
144 |     bias_term: true
145 |     num_output: 256
146 |     pad: 1
147 |     kernel_size: 3
148 |     stride: 1
149 |   }
150 | }
151 | 
152 | layer {
153 |   name: "relu3_2"
154 |   type: "ReLU"
155 |   bottom: "conv3_2"
156 |   top: "conv3_2"
157 | }
158 | 
159 | layer {
160 |   name: "conv3_3"
161 |   type: "Convolution"
162 |   bottom: "conv3_2"
163 |   top: "conv3_3"
164 |   convolution_param {
165 |     bias_term: true
166 |     num_output: 256
167 |     pad: 1
168 |     kernel_size: 3
169 |     stride: 1
170 |   }
171 | }
172 | 
173 | layer {
174 |   name: "relu3_3"
175 |   type: "ReLU"
176 |   bottom: "conv3_3"
177 |   top: "conv3_3"
178 | }
179 | 
180 | layer {
181 |   name: "pool3"
182 |   type: "Pooling"
183 |   bottom: "conv3_3"
184 |   top: "pool3"
185 |   pooling_param {
186 |     pool: MAX
187 |     kernel_size: 2
188 |     stride: 2
189 |   }
190 | }
191 | 
192 | layer {
193 |   name: "conv4_1"
194 |   type: "Convolution"
195 |   bottom: "pool3"
196 |   top: "conv4_1"
197 |   convolution_param {
198 |     bias_term: true
199 |     num_output: 512
200 |     pad: 1
201 |     kernel_size: 3
202 |     stride: 1
203 |   }
204 | }
205 | 
206 | layer {
207 |   name: "relu4_1"
208 |   type: "ReLU"
209 |   bottom: "conv4_1"
210 |   top: "conv4_1"
211 | }
212 | 
213 | layer {
214 |   name: "conv4_2"
215 |   type: "Convolution"
216 |   bottom: "conv4_1"
217 |   top: "conv4_2"
218 |   convolution_param {
219 |     bias_term: true
220 |     num_output: 512
221 |     pad: 1
222 |     kernel_size: 3
223 |     stride: 1
224 |   }
225 | }
226 | 
227 | layer {
228 |   name: "relu4_2"
229 |   type: "ReLU"
230 |   bottom: "conv4_2"
231 |   top: "conv4_2"
232 | }
233 | 
234 | layer {
235 |   name: "conv4_3"
236 |   type: "Convolution"
237 |   bottom: "conv4_2"
238 |   top: "conv4_3"
239 |   convolution_param {
240 |     bias_term: true
241 |     num_output: 512
242 |     pad: 1
243 |     kernel_size: 3
244 |     stride: 1
245 |   }
246 | }
247 | 
248 | layer {
249 |   name: "relu4_3"
250 |   type: "ReLU"
251 |   bottom: "conv4_3"
252 |   top: "conv4_3"
253 | }
254 | 
255 | layer {
256 |   name: "pool4"
257 |   type: "Pooling"
258 |   bottom: "conv4_3"
259 |   top: "pool4"
260 |   pooling_param {
261 |     pool: MAX
262 |     kernel_size: 2
263 |     stride: 2
264 |   }
265 | }
266 | 
267 | layer {
268 |   name: "conv5_1"
269 |   type: "Convolution"
270 |   bottom: "pool4"
271 |   top: "conv5_1"
272 |   convolution_param {
273 |     bias_term: true
274 |     num_output: 512
275 |     pad: 1
276 |     kernel_size: 3
277 |     stride: 1
278 |   }
279 | }
280 | 
281 | layer {
282 |   name: "relu5_1"
283 |   type: "ReLU"
284 |   bottom: "conv5_1"
285 |   top: "conv5_1"
286 | }
287 | 
288 | layer {
289 |   name: "conv5_2"
290 |   type: "Convolution"
291 |   bottom: "conv5_1"
292 |   top: "conv5_2"
293 |   convolution_param {
294 |     bias_term: true
295 |     num_output: 512
296 |     pad: 1
297 |     kernel_size: 3
298 |     stride: 1
299 |   }
300 | }
301 | 
302 | layer {
303 |   name: "relu5_2"
304 |   type: "ReLU"
305 |   bottom: "conv5_2"
306 |   top: "conv5_2"
307 | }
308 | 
309 | layer {
310 |   name: "conv5_3"
311 |   type: "Convolution"
312 |   bottom: "conv5_2"
313 |   top: "conv5_3"
314 |   convolution_param {
315 |     bias_term: true
316 |     num_output: 512
317 |     pad: 1
318 |     kernel_size: 3
319 |     stride: 1
320 |   }
321 | }
322 | 
323 | layer {
324 |   name: "relu5_3"
325 |   type: "ReLU"
326 |   bottom: "conv5_3"
327 |   top: "conv5_3"
328 | }
329 | 
330 | layer {
331 |   name: "pool5"
332 |   type: "Pooling"
333 |   bottom: "conv5_3"
334 |   top: "pool5"
335 |   pooling_param {
336 |     pool: MAX
337 |     kernel_size: 2
338 |     stride: 2
339 |   }
340 | }
341 | 
342 | layer {
343 |   bottom: "pool5"
344 |   top: "fc6"
345 |   name: "fc6"
346 |   type: "InnerProduct"
347 |   inner_product_param {
348 |     num_output: 4096
349 |   }
350 | }
351 | 
352 | layer {
353 |   name: "relu6"
354 |   type: "ReLU"
355 |   bottom: "fc6"
356 |   top: "fc6"
357 | }
358 | 
359 | layer {
360 |   name: "dropout6"
361 |   type: "Dropout"
362 |   bottom: "fc6"
363 |   top: "fc6"
364 |   dropout_param {
365 |     dropout_ratio: 0.5
366 |   }
367 | }
368 | 
369 | layer {
370 |   bottom: "fc6"
371 |   top: "fc7"
372 |   name: "fc7"
373 |   type: "InnerProduct"
374 |   inner_product_param {
375 |     num_output: 4096
376 |   }
377 | }
378 | 
379 | layer {
380 |   name: "relu7"
381 |   type: "ReLU"
382 |   bottom: "fc7"
383 |   top: "fc7"
384 | }
385 | 
386 | layer {
387 |   name: "dropout7"
388 |   type: "Dropout"
389 |   bottom: "fc7"
390 |   top: "fc7"
391 |   dropout_param {
392 |     dropout_ratio: 0.5
393 |   }
394 | }
395 | 
396 | layer {
397 |   bottom: "fc7"
398 |   top: "classifier"
399 |   name: "classifier"
400 |   type: "InnerProduct"
401 |   inner_product_param {
402 |     num_output: 1000
403 |   }
404 | }
405 | 
406 | layer {
407 |   name: "prob"
408 |   type: "Softmax"
409 |   bottom: "classifier"
410 |   top: "prob"
411 | }
412 | 


--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg16-tf.prototxt:
--------------------------------------------------------------------------------
  1 | input: "data"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 3
  5 |   dim: 224 
  6 |   dim: 224
  7 | }
  8 | 
  9 | layer {
 10 |   name: "conv1_1"
 11 |   type: "Convolution"
 12 |   bottom: "data"
 13 |   top: "conv1_1"
 14 |   convolution_param {
 15 |     bias_term: true
 16 |     num_output: 64
 17 |     pad: 1
 18 |     kernel_size: 3
 19 |     stride: 1
 20 |   }
 21 | }
 22 | 
 23 | layer {
 24 |   name: "relu1_1"
 25 |   type: "ReLU"
 26 |   bottom: "conv1_1"
 27 |   top: "conv1_1"
 28 | }
 29 | 
 30 | layer {
 31 |   name: "conv1_2"
 32 |   type: "Convolution"
 33 |   bottom: "conv1_1"
 34 |   top: "conv1_2"
 35 |   convolution_param {
 36 |     bias_term: true
 37 |     num_output: 64
 38 |     pad: 1
 39 |     kernel_size: 3
 40 |     stride: 1
 41 |   }
 42 | }
 43 | 
 44 | layer {
 45 |   name: "relu1_2"
 46 |   type: "ReLU"
 47 |   bottom: "conv1_2"
 48 |   top: "conv1_2"
 49 | }
 50 | 
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "conv1_2"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 2
 59 |     stride: 2
 60 |   }
 61 | }
 62 | 
 63 | layer {
 64 |   name: "conv2_1"
 65 |   type: "Convolution"
 66 |   bottom: "pool1"
 67 |   top: "conv2_1"
 68 |   convolution_param {
 69 |     bias_term: true
 70 |     num_output: 128
 71 |     pad: 1
 72 |     kernel_size: 3
 73 |     stride: 1
 74 |   }
 75 | }
 76 | 
 77 | layer {
 78 |   name: "relu2_1"
 79 |   type: "ReLU"
 80 |   bottom: "conv2_1"
 81 |   top: "conv2_1"
 82 | }
 83 | 
 84 | layer {
 85 |   name: "conv2_2"
 86 |   type: "Convolution"
 87 |   bottom: "conv2_1"
 88 |   top: "conv2_2"
 89 |   convolution_param {
 90 |     bias_term: true
 91 |     num_output: 128
 92 |     pad: 1
 93 |     kernel_size: 3
 94 |     stride: 1
 95 |   }
 96 | }
 97 | 
 98 | layer {
 99 |   name: "relu2_2"
100 |   type: "ReLU"
101 |   bottom: "conv2_2"
102 |   top: "conv2_2"
103 | }
104 | 
105 | layer {
106 |   name: "pool2"
107 |   type: "Pooling"
108 |   bottom: "conv2_2"
109 |   top: "pool2"
110 |   pooling_param {
111 |     pool: MAX
112 |     kernel_size: 2
113 |     stride: 2
114 |   }
115 | }
116 | 
117 | layer {
118 |   name: "conv3_1"
119 |   type: "Convolution"
120 |   bottom: "pool2"
121 |   top: "conv3_1"
122 |   convolution_param {
123 |     bias_term: true
124 |     num_output: 256
125 |     pad: 1
126 |     kernel_size: 3
127 |     stride: 1
128 |   }
129 | }
130 | 
131 | layer {
132 |   name: "relu3_1"
133 |   type: "ReLU"
134 |   bottom: "conv3_1"
135 |   top: "conv3_1"
136 | }
137 | 
138 | layer {
139 |   name: "conv3_2"
140 |   type: "Convolution"
141 |   bottom: "conv3_1"
142 |   top: "conv3_2"
143 |   convolution_param {
144 |     bias_term: true
145 |     num_output: 256
146 |     pad: 1
147 |     kernel_size: 3
148 |     stride: 1
149 |   }
150 | }
151 | 
152 | layer {
153 |   name: "relu3_2"
154 |   type: "ReLU"
155 |   bottom: "conv3_2"
156 |   top: "conv3_2"
157 | }
158 | 
159 | layer {
160 |   name: "conv3_3"
161 |   type: "Convolution"
162 |   bottom: "conv3_2"
163 |   top: "conv3_3"
164 |   convolution_param {
165 |     bias_term: true
166 |     num_output: 256
167 |     pad: 1
168 |     kernel_size: 3
169 |     stride: 1
170 |   }
171 | }
172 | 
173 | layer {
174 |   name: "relu3_3"
175 |   type: "ReLU"
176 |   bottom: "conv3_3"
177 |   top: "conv3_3"
178 | }
179 | 
180 | layer {
181 |   name: "pool3"
182 |   type: "Pooling"
183 |   bottom: "conv3_3"
184 |   top: "pool3"
185 |   pooling_param {
186 |     pool: MAX
187 |     kernel_size: 2
188 |     stride: 2
189 |   }
190 | }
191 | 
192 | layer {
193 |   name: "conv4_1"
194 |   type: "Convolution"
195 |   bottom: "pool3"
196 |   top: "conv4_1"
197 |   convolution_param {
198 |     bias_term: true
199 |     num_output: 512
200 |     pad: 1
201 |     kernel_size: 3
202 |     stride: 1
203 |   }
204 | }
205 | 
206 | layer {
207 |   name: "relu4_1"
208 |   type: "ReLU"
209 |   bottom: "conv4_1"
210 |   top: "conv4_1"
211 | }
212 | 
213 | layer {
214 |   name: "conv4_2"
215 |   type: "Convolution"
216 |   bottom: "conv4_1"
217 |   top: "conv4_2"
218 |   convolution_param {
219 |     bias_term: true
220 |     num_output: 512
221 |     pad: 1
222 |     kernel_size: 3
223 |     stride: 1
224 |   }
225 | }
226 | 
227 | layer {
228 |   name: "relu4_2"
229 |   type: "ReLU"
230 |   bottom: "conv4_2"
231 |   top: "conv4_2"
232 | }
233 | 
234 | layer {
235 |   name: "conv4_3"
236 |   type: "Convolution"
237 |   bottom: "conv4_2"
238 |   top: "conv4_3"
239 |   convolution_param {
240 |     bias_term: true
241 |     num_output: 512
242 |     pad: 1
243 |     kernel_size: 3
244 |     stride: 1
245 |   }
246 | }
247 | 
248 | layer {
249 |   name: "relu4_3"
250 |   type: "ReLU"
251 |   bottom: "conv4_3"
252 |   top: "conv4_3"
253 | }
254 | 
255 | layer {
256 |   name: "pool4"
257 |   type: "Pooling"
258 |   bottom: "conv4_3"
259 |   top: "pool4"
260 |   pooling_param {
261 |     pool: MAX
262 |     kernel_size: 2
263 |     stride: 2
264 |   }
265 | }
266 | 
267 | layer {
268 |   name: "conv5_1"
269 |   type: "Convolution"
270 |   bottom: "pool4"
271 |   top: "conv5_1"
272 |   convolution_param {
273 |     bias_term: true
274 |     num_output: 512
275 |     pad: 1
276 |     kernel_size: 3
277 |     stride: 1
278 |   }
279 | }
280 | 
281 | layer {
282 |   name: "relu5_1"
283 |   type: "ReLU"
284 |   bottom: "conv5_1"
285 |   top: "conv5_1"
286 | }
287 | 
288 | layer {
289 |   name: "conv5_2"
290 |   type: "Convolution"
291 |   bottom: "conv5_1"
292 |   top: "conv5_2"
293 |   convolution_param {
294 |     bias_term: true
295 |     num_output: 512
296 |     pad: 1
297 |     kernel_size: 3
298 |     stride: 1
299 |   }
300 | }
301 | 
302 | layer {
303 |   name: "relu5_2"
304 |   type: "ReLU"
305 |   bottom: "conv5_2"
306 |   top: "conv5_2"
307 | }
308 | 
309 | layer {
310 |   name: "conv5_3"
311 |   type: "Convolution"
312 |   bottom: "conv5_2"
313 |   top: "conv5_3"
314 |   convolution_param {
315 |     bias_term: true
316 |     num_output: 512
317 |     pad: 1
318 |     kernel_size: 3
319 |     stride: 1
320 |   }
321 | }
322 | 
323 | layer {
324 |   name: "relu5_3"
325 |   type: "ReLU"
326 |   bottom: "conv5_3"
327 |   top: "conv5_3"
328 | }
329 | 
330 | layer {
331 |   name: "pool5"
332 |   type: "Pooling"
333 |   bottom: "conv5_3"
334 |   top: "pool5"
335 |   pooling_param {
336 |     pool: MAX
337 |     kernel_size: 2
338 |     stride: 2
339 |   }
340 | }
341 | 
342 | layer {
343 |   name: "fc6"
344 |   type: "Convolution"
345 |   bottom: "pool5"
346 |   top: "fc6"
347 |   convolution_param {
348 |     bias_term: true
349 |     num_output: 4096
350 |     kernel_size: 7
351 |     stride: 1
352 |   }
353 | }
354 | 
355 | layer {
356 |   name: "relu6"
357 |   type: "ReLU"
358 |   bottom: "fc6"
359 |   top: "fc6"
360 | }
361 | 
362 | layer {
363 |   name: "dropout6"
364 |   type: "Dropout"
365 |   bottom: "fc6"
366 |   top: "fc6"
367 |   dropout_param {
368 |     dropout_ratio: 0.5
369 |   }
370 | }
371 | 
372 | layer {
373 |   name: "fc7"
374 |   type: "Convolution"
375 |   bottom: "fc6"
376 |   top: "fc7"
377 |   convolution_param {
378 |     bias_term: true
379 |     num_output: 4096
380 |     kernel_size: 1
381 |     stride: 1
382 |   }
383 | }
384 | 
385 | layer {
386 |   name: "relu7"
387 |   type: "ReLU"
388 |   bottom: "fc7"
389 |   top: "fc7"
390 | }
391 | 
392 | layer {
393 |   name: "dropout7"
394 |   type: "Dropout"
395 |   bottom: "fc7"
396 |   top: "fc7"
397 |   dropout_param {
398 |     dropout_ratio: 0.5
399 |   }
400 | }
401 | 
402 | layer {
403 |   name: "fc8"
404 |   type: "Convolution"
405 |   bottom: "fc7"
406 |   top: "fc8"
407 |   convolution_param {
408 |     bias_term: true
409 |     num_output: 1000
410 |     kernel_size: 1
411 |     stride: 1
412 |   }
413 | }
414 | 
415 | layer {
416 |   name: "reshape"
417 |   type: "Reshape"
418 |   bottom: "fc8"
419 |   top: "reshape"
420 |   reshape_param {
421 |     shape {
422 |       dim: 0
423 |       dim: 0
424 |     }
425 |   }
426 | }
427 | 
428 | layer {
429 |   name: "prob"
430 |   type: "Softmax"
431 |   bottom: "reshape"
432 |   top: "prob"
433 | }


--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/xception/rpn_rcnn_deploys/rcnn_deploy_faster_voc_xception-dw-merge-aligned.prototxt:
--------------------------------------------------------------------------------
  1 | input: "xception11_elewise"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 728
  5 |   dim: 40
  6 |   dim: 40
  7 | }
  8 | 
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 300
 12 |   dim: 5
 13 | }
 14 | 
 15 | #============== RCNN ===============
 16 | layer {
 17 |   name: "roi_pool"
 18 |   type: "ROIPooling"
 19 |   bottom: "xception11_elewise"
 20 |   bottom: "rois"
 21 |   top: "roi_pool"
 22 |   roi_pooling_param {
 23 |     pooled_w: 8
 24 |     pooled_h: 8
 25 |     spatial_scale: 0.062500
 26 |   }
 27 | }
 28 | layer {
 29 |   name: "xception12_match_conv"
 30 |   type: "Convolution"
 31 |   bottom: "roi_pool"
 32 |   top: "xception12_match_conv"
 33 |   param {
 34 |     lr_mult: 1
 35 |     decay_mult: 1
 36 |   }
 37 |   convolution_param {
 38 |     bias_term: false
 39 |     num_output: 1024
 40 |     pad: 0
 41 |     kernel_size: 1
 42 |     stride: 1
 43 |   }
 44 | }
 45 | layer {
 46 |   name: "xception12_match_conv_scale"
 47 |   type: "Scale"
 48 |   bottom: "xception12_match_conv"
 49 |   top: "xception12_match_conv"
 50 |   scale_param {
 51 |     bias_term: true
 52 |   }
 53 |   param {
 54 |     lr_mult: 0.0
 55 |     decay_mult: 0.0
 56 |   }
 57 |   param {
 58 |     lr_mult: 0.0
 59 |     decay_mult: 0.0
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "xception12_relu"
 64 |   type: "ReLU"
 65 |   bottom: "roi_pool"
 66 |   top: "xception12_relu"
 67 | }
 68 | layer {
 69 |   name: "xception12_conv1_1"
 70 |   type: "ConvolutionDepthwise"
 71 |   bottom: "xception12_relu"
 72 |   top: "xception12_conv1_1"
 73 |   param {
 74 |     lr_mult: 1
 75 |     decay_mult: 1
 76 |   }
 77 |   convolution_param {
 78 |     bias_term: false
 79 |     num_output: 728
 80 |     group: 728
 81 |     pad: 1
 82 |     kernel_size: 3
 83 |     stride: 1
 84 |   }
 85 | }
 86 | layer {
 87 |   name: "xception12_conv1_2"
 88 |   type: "Convolution"
 89 |   bottom: "xception12_conv1_1"
 90 |   top: "xception12_conv1_2"
 91 |   param {
 92 |     lr_mult: 1
 93 |     decay_mult: 1
 94 |   }
 95 |   convolution_param {
 96 |     bias_term: false
 97 |     num_output: 728
 98 |     pad: 0
 99 |     kernel_size: 1
100 |     stride: 1
101 |   }
102 | }
103 | layer {
104 |   name: "xception12_conv1_scale"
105 |   type: "Scale"
106 |   bottom: "xception12_conv1_2"
107 |   top: "xception12_conv1_2"
108 |   scale_param {
109 |     bias_term: true
110 |   }
111 |   param {
112 |     lr_mult: 0.0
113 |     decay_mult: 0.0
114 |   }
115 |   param {
116 |     lr_mult: 0.0
117 |     decay_mult: 0.0
118 |   }
119 | }
120 | layer {
121 |   name: "xception12_conv1_relu"
122 |   type: "ReLU"
123 |   bottom: "xception12_conv1_2"
124 |   top: "xception12_conv1_2"
125 | }
126 | layer {
127 |   name: "xception12_conv2_1"
128 |   type: "ConvolutionDepthwise"
129 |   bottom: "xception12_conv1_2"
130 |   top: "xception12_conv2_1"
131 |   param {
132 |     lr_mult: 1
133 |     decay_mult: 1
134 |   }
135 |   convolution_param {
136 |     bias_term: false
137 |     num_output: 728
138 |     group: 728
139 |     pad: 1
140 |     kernel_size: 3
141 |     stride: 1
142 |   }
143 | }
144 | layer {
145 |   name: "xception12_conv2_2"
146 |   type: "Convolution"
147 |   bottom: "xception12_conv2_1"
148 |   top: "xception12_conv2_2"
149 |   param {
150 |     lr_mult: 1
151 |     decay_mult: 1
152 |   }
153 |   convolution_param {
154 |     bias_term: false
155 |     num_output: 1024
156 |     pad: 0
157 |     kernel_size: 1
158 |     stride: 1
159 |   }
160 | }
161 | layer {
162 |   name: "xception12_conv2_scale"
163 |   type: "Scale"
164 |   bottom: "xception12_conv2_2"
165 |   top: "xception12_conv2_2"
166 |   scale_param {
167 |     bias_term: true
168 |   }
169 |   param {
170 |     lr_mult: 0.0
171 |     decay_mult: 0.0
172 |   }
173 |   param {
174 |     lr_mult: 0.0
175 |     decay_mult: 0.0
176 |   }
177 | }
178 | layer {
179 |   name: "xception12_pool"
180 |   type: "Pooling"
181 |   bottom: "xception12_conv2_2"
182 |   top: "xception12_pool"
183 |   pooling_param {
184 |     pool: MAX
185 |     kernel_size: 3
186 |     stride: 1
187 |     pad: 1
188 |     ceil_mode: false
189 |   }
190 | }
191 | layer {
192 |   name: "xception12_elewise"
193 |   type: "Eltwise"
194 |   bottom: "xception12_match_conv"
195 |   bottom: "xception12_pool"
196 |   top: "xception12_elewise"
197 |   eltwise_param {
198 |     operation: SUM
199 |   }
200 | }
201 | layer {
202 |   name: "conv3_1"
203 |   type: "ConvolutionDepthwise"
204 |   bottom: "xception12_elewise"
205 |   top: "conv3_1"
206 |   param {
207 |     lr_mult: 1
208 |     decay_mult: 1
209 |   }
210 |   convolution_param {
211 |     bias_term: false
212 |     num_output: 1024
213 |     group: 1024
214 |     pad: 1
215 |     kernel_size: 3
216 |     stride: 1
217 |   }
218 | }
219 | layer {
220 |   name: "conv3_2"
221 |   type: "Convolution"
222 |   bottom: "conv3_1"
223 |   top: "conv3_2"
224 |   param {
225 |     lr_mult: 1
226 |     decay_mult: 1
227 |   }
228 |   convolution_param {
229 |     bias_term: false
230 |     num_output: 1536
231 |     pad: 0
232 |     kernel_size: 1
233 |     stride: 1
234 |   }
235 | }
236 | layer {
237 |   name: "conv3_scale"
238 |   type: "Scale"
239 |   bottom: "conv3_2"
240 |   top: "conv3_2"
241 |   scale_param {
242 |     bias_term: true
243 |   }
244 |   param {
245 |     lr_mult: 0.0
246 |     decay_mult: 0.0
247 |   }
248 |   param {
249 |     lr_mult: 0.0
250 |     decay_mult: 0.0
251 |   }
252 | }
253 | layer {
254 |   name: "conv3_relu"
255 |   type: "ReLU"
256 |   bottom: "conv3_2"
257 |   top: "conv3_2"
258 | }
259 | layer {
260 |   name: "conv4_1"
261 |   type: "ConvolutionDepthwise"
262 |   bottom: "conv3_2"
263 |   top: "conv4_1"
264 |   param {
265 |     lr_mult: 1
266 |     decay_mult: 1
267 |   }
268 |   convolution_param {
269 |     bias_term: false
270 |     num_output: 1536
271 |     group: 1536
272 |     pad: 1
273 |     kernel_size: 3
274 |     stride: 1
275 |   }
276 | }
277 | layer {
278 |   name: "conv4_2"
279 |   type: "Convolution"
280 |   bottom: "conv4_1"
281 |   top: "conv4_2"
282 |   param {
283 |     lr_mult: 1
284 |     decay_mult: 1
285 |   }
286 |   convolution_param {
287 |     bias_term: false
288 |     num_output: 2048
289 |     pad: 0
290 |     kernel_size: 1
291 |     stride: 1
292 |   }
293 | }
294 | layer {
295 |   name: "conv4_scale"
296 |   type: "Scale"
297 |   bottom: "conv4_2"
298 |   top: "conv4_2"
299 |   scale_param {
300 |     bias_term: true
301 |   }
302 |   param {
303 |     lr_mult: 0.0
304 |     decay_mult: 0.0
305 |   }
306 |   param {
307 |     lr_mult: 0.0
308 |     decay_mult: 0.0
309 |   }
310 | }
311 | layer {
312 |   name: "conv4_relu"
313 |   type: "ReLU"
314 |   bottom: "conv4_2"
315 |   top: "conv4_2"
316 | }
317 | layer {
318 |   name: "pool_ave"
319 |   type: "Pooling"
320 |   bottom: "conv4_2"
321 |   top: "pool_ave"
322 |   pooling_param {
323 |     global_pooling : true
324 |     pool: AVE
325 |   }
326 | }
327 | layer {
328 |   name: "cls_score"
329 |   type: "InnerProduct"
330 |   bottom: "pool_ave"
331 |   top: "cls_score"
332 |   param {
333 |     lr_mult: 1
334 |     decay_mult: 1
335 |   }
336 |   param {
337 |     lr_mult: 2
338 |     decay_mult: 0
339 |   }
340 |   inner_product_param {
341 |     num_output: 21
342 |     weight_filler {
343 |       type: "msra"
344 |       std: 0.01
345 |     }
346 |     bias_filler {
347 |       type: "constant"
348 |       value: 0
349 |     }
350 |   }
351 | }
352 | layer {
353 |   name: "bbox_pred"
354 |   type: "InnerProduct"
355 |   bottom: "pool_ave"
356 |   top: "bbox_pred"
357 |   param {
358 |     lr_mult: 1
359 |     decay_mult: 1
360 |   }
361 |   param {
362 |     lr_mult: 2
363 |     decay_mult: 0
364 |   }
365 |   inner_product_param {
366 |     num_output: 84
367 |     weight_filler {
368 |       type: "msra"
369 |       std: 0.01
370 |     }
371 |     bias_filler {
372 |       type: "constant"
373 |       value: 0
374 |     }
375 |   }
376 | }
377 | layer {
378 |   name: "cls_prob"
379 |   type: "Softmax"
380 |   bottom: "cls_score"
381 |   top: "cls_prob"
382 | }
383 | 
384 | 
385 | 


--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg19-pytorch.prototxt:
--------------------------------------------------------------------------------
  1 | input: "data"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 3
  5 |   dim: 224 
  6 |   dim: 224
  7 | }
  8 | 
  9 | layer {
 10 |   name: "conv1_1"
 11 |   type: "Convolution"
 12 |   bottom: "data"
 13 |   top: "conv1_1"
 14 |   convolution_param {
 15 |     bias_term: true
 16 |     num_output: 64
 17 |     pad: 1
 18 |     kernel_size: 3
 19 |     stride: 1
 20 |   }
 21 | }
 22 | layer {
 23 |   name: "relu1_1"
 24 |   type: "ReLU"
 25 |   bottom: "conv1_1"
 26 |   top: "conv1_1"
 27 | }
 28 | 
 29 | layer {
 30 |   name: "conv1_2"
 31 |   type: "Convolution"
 32 |   bottom: "conv1_1"
 33 |   top: "conv1_2"
 34 |   convolution_param {
 35 |     bias_term: true
 36 |     num_output: 64
 37 |     pad: 1
 38 |     kernel_size: 3
 39 |     stride: 1
 40 |   }
 41 | }
 42 | layer {
 43 |   name: "relu1_2"
 44 |   type: "ReLU"
 45 |   bottom: "conv1_2"
 46 |   top: "conv1_2"
 47 | }
 48 | 
 49 | layer {
 50 |   name: "pool1"
 51 |   type: "Pooling"
 52 |   bottom: "conv1_2"
 53 |   top: "pool1"
 54 |   pooling_param {
 55 |     pool: MAX
 56 |     kernel_size: 2
 57 |     stride: 2
 58 |   }
 59 | }
 60 | 
 61 | layer {
 62 |   name: "conv2_1"
 63 |   type: "Convolution"
 64 |   bottom: "pool1"
 65 |   top: "conv2_1"
 66 |   convolution_param {
 67 |     bias_term: true
 68 |     num_output: 128
 69 |     pad: 1
 70 |     kernel_size: 3
 71 |     stride: 1
 72 |   }
 73 | }
 74 | layer {
 75 |   name: "relu2_1"
 76 |   type: "ReLU"
 77 |   bottom: "conv2_1"
 78 |   top: "conv2_1"
 79 | }
 80 | 
 81 | layer {
 82 |   name: "conv2_2"
 83 |   type: "Convolution"
 84 |   bottom: "conv2_1"
 85 |   top: "conv2_2"
 86 |   convolution_param {
 87 |     bias_term: true
 88 |     num_output: 128
 89 |     pad: 1
 90 |     kernel_size: 3
 91 |     stride: 1
 92 |   }
 93 | }
 94 | layer {
 95 |   name: "relu2_2"
 96 |   type: "ReLU"
 97 |   bottom: "conv2_2"
 98 |   top: "conv2_2"
 99 | }
100 | 
101 | layer {
102 |   name: "pool2"
103 |   type: "Pooling"
104 |   bottom: "conv2_2"
105 |   top: "pool2"
106 |   pooling_param {
107 |     pool: MAX
108 |     kernel_size: 2
109 |     stride: 2
110 |   }
111 | }
112 | 
113 | layer {
114 |   name: "conv3_1"
115 |   type: "Convolution"
116 |   bottom: "pool2"
117 |   top: "conv3_1"
118 |   convolution_param {
119 |     bias_term: true
120 |     num_output: 256
121 |     pad: 1
122 |     kernel_size: 3
123 |     stride: 1
124 |   }
125 | }
126 | layer {
127 |   name: "relu3_1"
128 |   type: "ReLU"
129 |   bottom: "conv3_1"
130 |   top: "conv3_1"
131 | }
132 | 
133 | layer {
134 |   name: "conv3_2"
135 |   type: "Convolution"
136 |   bottom: "conv3_1"
137 |   top: "conv3_2"
138 |   convolution_param {
139 |     bias_term: true
140 |     num_output: 256
141 |     pad: 1
142 |     kernel_size: 3
143 |     stride: 1
144 |   }
145 | }
146 | layer {
147 |   name: "relu3_2"
148 |   type: "ReLU"
149 |   bottom: "conv3_2"
150 |   top: "conv3_2"
151 | }
152 | 
153 | layer {
154 |   name: "conv3_3"
155 |   type: "Convolution"
156 |   bottom: "conv3_2"
157 |   top: "conv3_3"
158 |   convolution_param {
159 |     bias_term: true
160 |     num_output: 256
161 |     pad: 1
162 |     kernel_size: 3
163 |     stride: 1
164 |   }
165 | }
166 | layer {
167 |   name: "relu3_3"
168 |   type: "ReLU"
169 |   bottom: "conv3_3"
170 |   top: "conv3_3"
171 | }
172 | layer {
173 |   name: "conv3_4"
174 |   type: "Convolution"
175 |   bottom: "conv3_3"
176 |   top: "conv3_4"
177 |   convolution_param {
178 |     bias_term: true
179 |     num_output: 256
180 |     pad: 1
181 |     kernel_size: 3
182 |     stride: 1
183 |   }
184 | }
185 | layer {
186 |   name: "relu3_4"
187 |   type: "ReLU"
188 |   bottom: "conv3_4"
189 |   top: "conv3_4"
190 | }
191 | 
192 | layer {
193 |   name: "pool3"
194 |   type: "Pooling"
195 |   bottom: "conv3_4"
196 |   top: "pool3"
197 |   pooling_param {
198 |     pool: MAX
199 |     kernel_size: 2
200 |     stride: 2
201 |   }
202 | }
203 | 
204 | layer {
205 |   name: "conv4_1"
206 |   type: "Convolution"
207 |   bottom: "pool3"
208 |   top: "conv4_1"
209 |   convolution_param {
210 |     bias_term: true
211 |     num_output: 512
212 |     pad: 1
213 |     kernel_size: 3
214 |     stride: 1
215 |   }
216 | }
217 | layer {
218 |   name: "relu4_1"
219 |   type: "ReLU"
220 |   bottom: "conv4_1"
221 |   top: "conv4_1"
222 | }
223 | 
224 | layer {
225 |   name: "conv4_2"
226 |   type: "Convolution"
227 |   bottom: "conv4_1"
228 |   top: "conv4_2"
229 |   convolution_param {
230 |     bias_term: true
231 |     num_output: 512
232 |     pad: 1
233 |     kernel_size: 3
234 |     stride: 1
235 |   }
236 | }
237 | layer {
238 |   name: "relu4_2"
239 |   type: "ReLU"
240 |   bottom: "conv4_2"
241 |   top: "conv4_2"
242 | }
243 | 
244 | layer {
245 |   name: "conv4_3"
246 |   type: "Convolution"
247 |   bottom: "conv4_2"
248 |   top: "conv4_3"
249 |   convolution_param {
250 |     bias_term: true
251 |     num_output: 512
252 |     pad: 1
253 |     kernel_size: 3
254 |     stride: 1
255 |   }
256 | }
257 | layer {
258 |   name: "relu4_3"
259 |   type: "ReLU"
260 |   bottom: "conv4_3"
261 |   top: "conv4_3"
262 | }
263 | 
264 | layer {
265 |   name: "conv4_4"
266 |   type: "Convolution"
267 |   bottom: "conv4_3"
268 |   top: "conv4_4"
269 |   convolution_param {
270 |     bias_term: true
271 |     num_output: 512
272 |     pad: 1
273 |     kernel_size: 3
274 |     stride: 1
275 |   }
276 | }
277 | layer {
278 |   name: "relu4_4"
279 |   type: "ReLU"
280 |   bottom: "conv4_4"
281 |   top: "conv4_4"
282 | }
283 | 
284 | layer {
285 |   name: "pool4"
286 |   type: "Pooling"
287 |   bottom: "conv4_4"
288 |   top: "pool4"
289 |   pooling_param {
290 |     pool: MAX
291 |     kernel_size: 2
292 |     stride: 2
293 |   }
294 | }
295 | 
296 | layer {
297 |   name: "conv5_1"
298 |   type: "Convolution"
299 |   bottom: "pool4"
300 |   top: "conv5_1"
301 |   convolution_param {
302 |     bias_term: true
303 |     num_output: 512
304 |     pad: 1
305 |     kernel_size: 3
306 |     stride: 1
307 |   }
308 | }
309 | layer {
310 |   name: "relu5_1"
311 |   type: "ReLU"
312 |   bottom: "conv5_1"
313 |   top: "conv5_1"
314 | }
315 | 
316 | layer {
317 |   name: "conv5_2"
318 |   type: "Convolution"
319 |   bottom: "conv5_1"
320 |   top: "conv5_2"
321 |   convolution_param {
322 |     bias_term: true
323 |     num_output: 512
324 |     pad: 1
325 |     kernel_size: 3
326 |     stride: 1
327 |   }
328 | }
329 | layer {
330 |   name: "relu5_2"
331 |   type: "ReLU"
332 |   bottom: "conv5_2"
333 |   top: "conv5_2"
334 | }
335 | 
336 | layer {
337 |   name: "conv5_3"
338 |   type: "Convolution"
339 |   bottom: "conv5_2"
340 |   top: "conv5_3"
341 |   convolution_param {
342 |     bias_term: true
343 |     num_output: 512
344 |     pad: 1
345 |     kernel_size: 3
346 |     stride: 1
347 |   }
348 | }
349 | layer {
350 |   name: "relu5_3"
351 |   type: "ReLU"
352 |   bottom: "conv5_3"
353 |   top: "conv5_3"
354 | }
355 | 
356 | layer {
357 |   name: "conv5_4"
358 |   type: "Convolution"
359 |   bottom: "conv5_3"
360 |   top: "conv5_4"
361 |   convolution_param {
362 |     bias_term: true
363 |     num_output: 512
364 |     pad: 1
365 |     kernel_size: 3
366 |     stride: 1
367 |   }
368 | }
369 | layer {
370 |   name: "relu5_4"
371 |   type: "ReLU"
372 |   bottom: "conv5_4"
373 |   top: "conv5_4"
374 | }
375 | 
376 | layer {
377 |   name: "pool5"
378 |   type: "Pooling"
379 |   bottom: "conv5_4"
380 |   top: "pool5"
381 |   pooling_param {
382 |     pool: MAX
383 |     kernel_size: 2
384 |     stride: 2
385 |   }
386 | }
387 | 
388 | layer {
389 |   bottom: "pool5"
390 |   top: "fc6"
391 |   name: "fc6"
392 |   type: "InnerProduct"
393 |   inner_product_param {
394 |     num_output: 4096
395 |   }
396 | }
397 | 
398 | layer {
399 |   name: "relu6"
400 |   type: "ReLU"
401 |   bottom: "fc6"
402 |   top: "fc6"
403 | }
404 | 
405 | layer {
406 |   name: "dropout6"
407 |   type: "Dropout"
408 |   bottom: "fc6"
409 |   top: "fc6"
410 |   dropout_param {
411 |     dropout_ratio: 0.5
412 |   }
413 | }
414 | 
415 | layer {
416 |   bottom: "fc6"
417 |   top: "fc7"
418 |   name: "fc7"
419 |   type: "InnerProduct"
420 |   inner_product_param {
421 |     num_output: 4096
422 |   }
423 | }
424 | 
425 | layer {
426 |   name: "relu7"
427 |   type: "ReLU"
428 |   bottom: "fc7"
429 |   top: "fc7"
430 | }
431 | 
432 | layer {
433 |   name: "dropout7"
434 |   type: "Dropout"
435 |   bottom: "fc7"
436 |   top: "fc7"
437 |   dropout_param {
438 |     dropout_ratio: 0.5
439 |   }
440 | }
441 | 
442 | layer {
443 |   bottom: "fc7"
444 |   top: "classifier"
445 |   name: "classifier"
446 |   type: "InnerProduct"
447 |   inner_product_param {
448 |     num_output: 1000
449 |   }
450 | }
451 | 
452 | layer {
453 |   name: "prob"
454 |   type: "Softmax"
455 |   bottom: "classifier"
456 |   top: "prob"
457 | }
458 | 


--------------------------------------------------------------------------------
/seg/pspnet/tools/image_seg_data.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import sys
  3 | 
  4 | sys.path.append('/home/prmct/workspace/py-RFCN-priv/caffe-priv/python')
  5 | 
  6 | import caffe
  7 | 
  8 | import numpy as np
  9 | 
 10 | 
 11 | class ImageSegDataLayer(caffe.Layer):
 12 |     def get_gpu_id(self, gpu_id=0):
 13 |         self.gpu_id = gpu_id
 14 |         if self.shuffle:
 15 |             np.random.seed(self.gpu_id)
 16 |             np.random.shuffle(self.indices)
 17 | 
 18 |     def setup(self, bottom, top):
 19 |         print self.param_str
 20 |         params = eval(self.param_str)
 21 | 
 22 |         self.color_factor = np.array(params.get('color_factor', (0.95, 1.05)))  #  (0.95, 1.05)
 23 |         self.contrast_factor = np.array(params.get('contrast_factor',  (0.95, 1.05)))  #  (0.95, 1.05)
 24 |         self.brightness_factor = np.array(params.get('brightness_factor',  (0.95, 1.05)))  #  (0.95, 1.05)
 25 |         self.mirror = params.get('mirror', True)
 26 |         self.gaussian_blur = params.get('gaussian_blur', True)
 27 |         self.scale_factor = np.array(params.get('scale_factor', (0.75, 2.0)))  # (0.75, 2.0)
 28 |         self.rotation_factor = np.array(params.get('rotation_factor', (-10, 10)))  # (-10, 10)
 29 | 
 30 |         self.crop_size = int(params.get('crop_size', 513))
 31 |         self.ignore_label = int(params.get('ignore_label', 255))
 32 |         self.mean = np.array(params.get('mean', (102.98, 115.947, 122.772)), dtype=np.float32)
 33 |         self.scale = float(params.get('scale', 1.0))
 34 | 
 35 |         self.root_dir = params['root_dir']
 36 |         self.source = params['source']
 37 |         self.batch_size = int(params.get('batch_size', 1))
 38 |         self.shuffle = params.get('shuffle', True)
 39 | 
 40 |         if len(top) != 2:
 41 |             raise Exception("Need to define two tops: data and label.")
 42 |         if len(bottom) != 0:
 43 |             raise Exception("Do not define a bottom.")  # data layers have no bottoms
 44 |         if len(self.color_factor) != 2:
 45 |             raise Exception("'color_factor' must have 2 values for factor range.")
 46 |         if len(self.contrast_factor) != 2:
 47 |             raise Exception("'contrast_factor' must have 2 values for factor range.")
 48 |         if len(self.brightness_factor) != 2:
 49 |             raise Exception("'brightness_factor' must have 2 values for factor range.")
 50 |         if len(self.mean) != 3:
 51 |             raise Exception("'mean' must have 3 values for B G R.")
 52 |         if len(self.scale_factor) != 2:
 53 |             raise Exception("'scale_factor' must have 2 values for factor range.")
 54 |         if self.crop_size <= 0:
 55 |             raise Exception("'Need positive crop_size.")
 56 | 
 57 |         self.indices = open(self.source, 'r').read().splitlines()
 58 |         self.epoch_num = len(self.indices)
 59 |         self.idx = 0
 60 | 
 61 |     def reshape(self, bottom, top):
 62 |         top[0].reshape(self.batch_size, 3, self.crop_size, self.crop_size)  # for images
 63 |         top[1].reshape(self.batch_size, 1, self.crop_size, self.crop_size)  # for labels
 64 | 
 65 |     def forward(self, bottom, top):
 66 |         batch_img = []
 67 |         batch_label = []
 68 |         for _ in xrange(self.batch_size):
 69 |             _img = cv2.imread('{}{}'.format(self.root_dir, self.indices[self.idx].split(' ')[0]))
 70 |             _label = cv2.imread('{}{}'.format(self.root_dir, self.indices[self.idx].split(' ')[1]), 0)
 71 | 
 72 |             if _img.shape[:2] != _label.shape:
 73 |                 raise Exception("Need to define two tops: data and label.")
 74 | 
 75 |             aug_img, aug_label = self.augmentation(_img, _label)
 76 |             batch_img.append(aug_img.transpose((2, 0, 1)))
 77 |             batch_label.append([aug_label])
 78 | 
 79 |             self.idx += 1
 80 |             if self.idx == self.epoch_num:
 81 |                 self.idx = 0
 82 |                 if self.shuffle:
 83 |                     np.random.seed(self.gpu_id)
 84 |                     np.random.shuffle(self.indices)
 85 |         batch_img = np.asarray(batch_img)
 86 |         batch_label = np.asarray(batch_label)
 87 | 
 88 |         top[0].data[...] = batch_img
 89 |         top[1].data[...] = batch_label
 90 | 
 91 |     def backward(self, top, propagate_down, bottom):
 92 |         pass
 93 | 
 94 |     def augmentation(self, img, label):
 95 |         ori_h, ori_w = img.shape[:2]
 96 | 
 97 |         _color = 1.0
 98 |         _contrast = 1.0
 99 |         _brightness = 1.0
100 | 
101 |         if self.color_factor[0] != 0 and self.color_factor[1] != 0 and self.color_factor[0] < self.color_factor[1]:
102 |             _color = np.random.randint(int(self.color_factor[0] * 100),
103 |                                        int(self.color_factor[1] * 100)) / 100.0
104 | 
105 |         if self.contrast_factor[0] != 0 and self.contrast_factor[1] != 0 and self.contrast_factor[0] < \
106 |                 self.contrast_factor[1]:
107 |             _contrast = np.random.randint(int(self.contrast_factor[0] * 100),
108 |                                           int(self.contrast_factor[1] * 100)) / 100.0
109 | 
110 |         if self.brightness_factor[0] != 0 and self.brightness_factor[1] != 0 and self.brightness_factor[0] < \
111 |                 self.brightness_factor[1]:
112 |             _brightness = np.random.randint(int(self.brightness_factor[0] * 100),
113 |                                             int(self.brightness_factor[1] * 100)) / 100.0
114 | 
115 |         _HSV = np.dot(cv2.cvtColor(img, cv2.COLOR_BGR2HSV).reshape((-1, 3)), 
116 |                       np.array([[_color, 0, 0], [0, _contrast, 0], [0, 0, _brightness]]))
117 |         _HSV_H = np.where(_HSV < 255, _HSV, 255)  
118 |         img = cv2.cvtColor(np.uint8(_HSV_H.reshape((-1, img.shape[1], 3))), cv2.COLOR_HSV2BGR)
119 | 
120 |         if self.gaussian_blur:
121 |             if not np.random.randint(0, 4):
122 |                 img = cv2.GaussianBlur(img, (3, 3), 0)
123 | 
124 |         img = np.asarray(img, dtype=np.float32)
125 |         label = np.asarray(label, dtype=np.uint8)
126 | 
127 |         if self.mirror:
128 |             if np.random.randint(0, 2):
129 |                 img = img[:, :: -1]
130 |                 label = label[:, :: -1]
131 | 
132 |         if self.scale_factor[0] != 0 and self.scale_factor[1] != 0 and self.scale_factor[0] < self.scale_factor[1]:
133 |             _scale = np.random.randint(int(self.scale_factor[0] * 100),
134 |                                        int(self.scale_factor[1] * 100)) / 100.0
135 |             res_w = int(_scale * ori_w)
136 |             res_h = int(_scale * ori_h)
137 |             img = cv2.resize(img, (res_w, res_h))
138 |             label = cv2.resize(label, (res_w, res_h), interpolation=cv2.cv.CV_INTER_NN)
139 | 
140 |         if self.rotation_factor[0] != 0 and self.rotation_factor[1] != 0 and self.rotation_factor[0] < \
141 |                 self.rotation_factor[1]:
142 |             if np.random.randint(0, 2):
143 |                 _rotation = np.random.randint(int(self.rotation_factor[0] * 100),
144 |                                               int(self.rotation_factor[1] * 100)) / 100.0
145 |                 tmp_h, tmp_w = img.shape[:2]
146 |                 rotate_mat = cv2.getRotationMatrix2D((tmp_w / 2, tmp_h / 2), _rotation,
147 |                                                      1)
148 |                 img = cv2.warpAffine(img, rotate_mat, (tmp_w, tmp_h),
149 |                                      borderValue=cv2.cv.Scalar(self.mean[0], self.mean[1], self.mean[2]))
150 |                 label = cv2.warpAffine(label, rotate_mat, (tmp_w, tmp_h), flags=cv2.cv.CV_INTER_NN,
151 |                                        borderValue=cv2.cv.Scalar(self.ignore_label))
152 | 
153 |         # perform random crop
154 |         pad_h = max(self.crop_size - img.shape[0], 0)
155 |         pad_w = max(self.crop_size - img.shape[1], 0)
156 |         pad_img = cv2.copyMakeBorder(img, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT,
157 |                                      value=cv2.cv.Scalar(self.mean[0], self.mean[1], self.mean[2]))
158 |         pad_label = cv2.copyMakeBorder(label, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT,
159 |                                        value=cv2.cv.Scalar(self.ignore_label))
160 |         off_h = np.random.randint(0, pad_img.shape[0] - self.crop_size + 1)
161 |         off_w = np.random.randint(0, pad_img.shape[1] - self.crop_size + 1)
162 |         aug_img = pad_img[off_h:off_h + self.crop_size, off_w:off_w + self.crop_size, :]
163 |         aug_label = pad_label[off_h:off_h + self.crop_size, off_w:off_w + self.crop_size]
164 | 
165 |         # perform (x-mean)*scale
166 |         aug_img -= self.mean
167 |         aug_img *= self.scale
168 | 
169 |         return aug_img, aug_label
170 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/resnet38a/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet38a-merge.prototxt:
--------------------------------------------------------------------------------
  1 | input: "res15_eletwise"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 1024
  5 |   dim: 40
  6 |   dim: 40
  7 | }
  8 | 
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 300
 12 |   dim: 5
 13 | }
 14 | 
 15 | #============== RCNN ===============
 16 | layer {
 17 |   name: "roi_pool"
 18 |   type: "ROIPooling"
 19 |   bottom: "res15_eletwise"
 20 |   bottom: "rois"
 21 |   top: "roi_pool"
 22 |   roi_pooling_param {
 23 |     pooled_w: 14
 24 |     pooled_h: 14
 25 |     spatial_scale: 0.062500
 26 |   }
 27 | }
 28 | layer {
 29 |   bottom: "roi_pool"
 30 |   top: "pool5"
 31 |   name: "pool5"
 32 |   type: "Pooling"
 33 |   pooling_param {
 34 |     kernel_size: 3
 35 |     pad: 0
 36 |     stride: 2
 37 |     pool: MAX
 38 |   }
 39 | }
 40 | layer {
 41 |   bottom: "pool5"
 42 |   top: "res16_scale"
 43 |   name: "res16_scale"
 44 |   type: "Scale"
 45 |   scale_param {
 46 |     bias_term: true
 47 |   }
 48 |   param {
 49 |     lr_mult: 0.0
 50 |     decay_mult: 0.0
 51 |   }
 52 |   param {
 53 |     lr_mult: 0.0
 54 |     decay_mult: 0.0
 55 |   }
 56 | }
 57 | layer {
 58 |   bottom: "res16_scale"
 59 |   top: "res16_scale"
 60 |   name: "res16_relu"
 61 |   type: "ReLU"
 62 | }
 63 | layer {
 64 |   bottom: "res16_scale"
 65 |   top: "res16_match_conv"
 66 |   name: "res16_match_conv"
 67 |   type: "Convolution"
 68 |   param {
 69 |     lr_mult: 1
 70 |     decay_mult: 1
 71 |   }
 72 |   convolution_param {
 73 |     num_output: 2048
 74 |     kernel_size: 1
 75 |     pad: 0
 76 |     stride: 1
 77 |     bias_term: false
 78 |   }
 79 | }
 80 | layer {
 81 |   bottom: "res16_scale"
 82 |   top: "res16_conv1"
 83 |   name: "res16_conv1"
 84 |   type: "Convolution"
 85 |   param {
 86 |     lr_mult: 1
 87 |     decay_mult: 1
 88 |   }
 89 |   convolution_param {
 90 |     num_output: 512
 91 |     kernel_size: 1
 92 |     pad: 0
 93 |     stride: 1
 94 |     bias_term: false
 95 |   }
 96 | }
 97 | layer {
 98 |   bottom: "res16_conv1"
 99 |   top: "res16_conv1"
100 |   name: "res16_conv1_scale"
101 |   type: "Scale"
102 |   scale_param {
103 |     bias_term: true
104 |   }
105 |   param {
106 |     lr_mult: 0.0
107 |     decay_mult: 0.0
108 |   }
109 |   param {
110 |     lr_mult: 0.0
111 |     decay_mult: 0.0
112 |   }
113 | }
114 | layer {
115 |   bottom: "res16_conv1"
116 |   top: "res16_conv1"
117 |   name: "res16_conv1_relu"
118 |   type: "ReLU"
119 | }
120 | layer {
121 |   bottom: "res16_conv1"
122 |   top: "res16_conv2"
123 |   name: "res16_conv2"
124 |   type: "Convolution"
125 |   param {
126 |     lr_mult: 1
127 |     decay_mult: 1
128 |   }
129 |   convolution_param {
130 |     num_output: 1024
131 |     kernel_size: 3
132 |     pad: 1
133 |     stride: 1
134 |     bias_term: false
135 |   }
136 | }
137 | layer {
138 |   bottom: "res16_conv2"
139 |   top: "res16_conv2"
140 |   name: "res16_conv2_scale"
141 |   type: "Scale"
142 |   scale_param {
143 |     bias_term: true
144 |   }
145 |   param {
146 |     lr_mult: 0.0
147 |     decay_mult: 0.0
148 |   }
149 |   param {
150 |     lr_mult: 0.0
151 |     decay_mult: 0.0
152 |   }
153 | }
154 | layer {
155 |   bottom: "res16_conv2"
156 |   top: "res16_conv2"
157 |   name: "res16_conv2_relu"
158 |   type: "ReLU"
159 | }
160 | layer {
161 |   bottom: "res16_conv2"
162 |   top: "res16_conv3"
163 |   name: "res16_conv3"
164 |   type: "Convolution"
165 |   param {
166 |     lr_mult: 1
167 |     decay_mult: 1
168 |   }
169 |   convolution_param {
170 |     num_output: 2048
171 |     kernel_size: 1
172 |     pad: 0
173 |     stride: 1
174 |     bias_term: false
175 |   }
176 | }
177 | layer {
178 |   bottom: "res16_match_conv"
179 |   bottom: "res16_conv3"
180 |   top: "res16_eletwise"
181 |   name: "res16_eletwise"
182 |   type: "Eltwise"
183 | }
184 | layer {
185 |   bottom: "res16_eletwise"
186 |   top: "res17_scale"
187 |   name: "res17_scale"
188 |   type: "Scale"
189 |   scale_param {
190 |     bias_term: true
191 |   }
192 |   param {
193 |     lr_mult: 0.0
194 |     decay_mult: 0.0
195 |   }
196 |   param {
197 |     lr_mult: 0.0
198 |     decay_mult: 0.0
199 |   }
200 | }
201 | layer {
202 |   bottom: "res17_scale"
203 |   top: "res17_scale"
204 |   name: "res17_relu"
205 |   type: "ReLU"
206 | }
207 | layer {
208 |   bottom: "res17_scale"
209 |   top: "res17_match_conv"
210 |   name: "res17_match_conv"
211 |   type: "Convolution"
212 |   param {
213 |     lr_mult: 1
214 |     decay_mult: 1
215 |   }
216 |   convolution_param {
217 |     num_output: 4096
218 |     kernel_size: 1
219 |     pad: 0
220 |     stride: 1
221 |     bias_term: false
222 |   }
223 | }
224 | layer {
225 |   bottom: "res17_scale"
226 |   top: "res17_conv1"
227 |   name: "res17_conv1"
228 |   type: "Convolution"
229 |   param {
230 |     lr_mult: 1
231 |     decay_mult: 1
232 |   }
233 |   convolution_param {
234 |     num_output: 1024
235 |     kernel_size: 1
236 |     pad: 0
237 |     stride: 1
238 |     bias_term: false
239 |   }
240 | }
241 | layer {
242 |   bottom: "res17_conv1"
243 |   top: "res17_conv1"
244 |   name: "res17_conv1_scale"
245 |   type: "Scale"
246 |   scale_param {
247 |     bias_term: true
248 |   }
249 |   param {
250 |     lr_mult: 0.0
251 |     decay_mult: 0.0
252 |   }
253 |   param {
254 |     lr_mult: 0.0
255 |     decay_mult: 0.0
256 |   }
257 | }
258 | layer {
259 |   bottom: "res17_conv1"
260 |   top: "res17_conv1"
261 |   name: "res17_conv1_relu"
262 |   type: "ReLU"
263 | }
264 | layer {
265 |   bottom: "res17_conv1"
266 |   top: "res17_conv2"
267 |   name: "res17_conv2"
268 |   type: "Convolution"
269 |   param {
270 |     lr_mult: 1
271 |     decay_mult: 1
272 |   }
273 |   convolution_param {
274 |     num_output: 2048
275 |     kernel_size: 3
276 |     pad: 1
277 |     stride: 1
278 |     bias_term: false
279 |   }
280 | }
281 | layer {
282 |   bottom: "res17_conv2"
283 |   top: "res17_conv2"
284 |   name: "res17_conv2_scale"
285 |   type: "Scale"
286 |   scale_param {
287 |     bias_term: true
288 |   }
289 |   param {
290 |     lr_mult: 0.0
291 |     decay_mult: 0.0
292 |   }
293 |   param {
294 |     lr_mult: 0.0
295 |     decay_mult: 0.0
296 |   }
297 | }
298 | layer {
299 |   bottom: "res17_conv2"
300 |   top: "res17_conv2"
301 |   name: "res17_conv2_relu"
302 |   type: "ReLU"
303 | }
304 | layer {
305 |   bottom: "res17_conv2"
306 |   top: "res17_conv3"
307 |   name: "res17_conv3"
308 |   type: "Convolution"
309 |   param {
310 |     lr_mult: 1
311 |     decay_mult: 1
312 |   }
313 |   convolution_param {
314 |     num_output: 4096
315 |     kernel_size: 1
316 |     pad: 0
317 |     stride: 1
318 |     bias_term: false
319 |   }
320 | }
321 | layer {
322 |   bottom: "res17_match_conv"
323 |   bottom: "res17_conv3"
324 |   top: "res17_eletwise"
325 |   name: "res17_eletwise"
326 |   type: "Eltwise"
327 | }
328 | layer {
329 |   bottom: "res17_eletwise"
330 |   top: "res17_eletwise"
331 |   name: "res17_eletwise_scale"
332 |   type: "Scale"
333 |   scale_param {
334 |     bias_term: true
335 |   }
336 |   param {
337 |     lr_mult: 0.0
338 |     decay_mult: 0.0
339 |   }
340 |   param {
341 |     lr_mult: 0.0
342 |     decay_mult: 0.0
343 |   }
344 | }
345 | layer {
346 |   bottom: "res17_eletwise"
347 |   top: "res17_eletwise"
348 |   name: "res17_eletwise_relu"
349 |   type: "ReLU"
350 | }
351 | layer {
352 |   bottom: "res17_eletwise"
353 |   top: "pool_ave"
354 |   name: "pool_ave"
355 |   type: "Pooling"
356 |   pooling_param {
357 |     global_pooling : true
358 |     pool: AVE
359 |   }
360 | }
361 | layer {
362 |   name: "cls_score"
363 |   type: "InnerProduct"
364 |   bottom: "pool_ave"
365 |   top: "cls_score"
366 |   param {
367 |     lr_mult: 1
368 |     decay_mult: 1
369 |   }
370 |   param {
371 |     lr_mult: 2
372 |     decay_mult: 0
373 |   }
374 |   inner_product_param {
375 |     num_output: 21
376 |     weight_filler {
377 |       type: "msra"
378 |       std: 0.01
379 |     }
380 |     bias_filler {
381 |       type: "constant"
382 |       value: 0
383 |     }
384 |   }
385 | }
386 | layer {
387 |   name: "bbox_pred"
388 |   type: "InnerProduct"
389 |   bottom: "pool_ave"
390 |   top: "bbox_pred"
391 |   param {
392 |     lr_mult: 1
393 |     decay_mult: 1
394 |   }
395 |   param {
396 |     lr_mult: 2
397 |     decay_mult: 0
398 |   }
399 |   inner_product_param {
400 |     num_output: 84
401 |     weight_filler {
402 |       type: "msra"
403 |       std: 0.01
404 |     }
405 |     bias_filler {
406 |       type: "constant"
407 |       value: 0
408 |     }
409 |   }
410 | }
411 | layer {
412 |   name: "cls_prob"
413 |   type: "Softmax"
414 |   bottom: "cls_score"
415 |   top: "cls_prob"
416 | }
417 | 
418 | 


--------------------------------------------------------------------------------
/cls/vgg/deploy_vgg13bn-pytorch.prototxt:
--------------------------------------------------------------------------------
  1 | input: "data"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 3
  5 |   dim: 224 
  6 |   dim: 224
  7 | }
  8 | 
  9 | layer {
 10 |   name: "conv1_1"
 11 |   type: "Convolution"
 12 |   bottom: "data"
 13 |   top: "conv1_1"
 14 |   convolution_param {
 15 |     bias_term: true
 16 |     num_output: 64
 17 |     pad: 1
 18 |     kernel_size: 3
 19 |     stride: 1
 20 |   }
 21 | }
 22 | layer {
 23 |   bottom: "conv1_1"
 24 |   top: "conv1_1"
 25 |   name: "conv1_1_bn"
 26 |   type: "BatchNorm"
 27 |   batch_norm_param {
 28 |     use_global_stats: true
 29 |   }
 30 | }
 31 | layer {
 32 |   bottom: "conv1_1"
 33 |   top: "conv1_1"
 34 |   name: "conv1_1_scale"
 35 |   type: "Scale"
 36 |   scale_param {
 37 |     bias_term: true
 38 |   }
 39 | }
 40 | layer {
 41 |   name: "relu1_1"
 42 |   type: "ReLU"
 43 |   bottom: "conv1_1"
 44 |   top: "conv1_1"
 45 | }
 46 | 
 47 | layer {
 48 |   name: "conv1_2"
 49 |   type: "Convolution"
 50 |   bottom: "conv1_1"
 51 |   top: "conv1_2"
 52 |   convolution_param {
 53 |     bias_term: true
 54 |     num_output: 64
 55 |     pad: 1
 56 |     kernel_size: 3
 57 |     stride: 1
 58 |   }
 59 | }
 60 | layer {
 61 |   bottom: "conv1_2"
 62 |   top: "conv1_2"
 63 |   name: "conv1_2_bn"
 64 |   type: "BatchNorm"
 65 |   batch_norm_param {
 66 |     use_global_stats: true
 67 |   }
 68 | }
 69 | layer {
 70 |   bottom: "conv1_2"
 71 |   top: "conv1_2"
 72 |   name: "conv1_2_scale"
 73 |   type: "Scale"
 74 |   scale_param {
 75 |     bias_term: true
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "relu1_2"
 80 |   type: "ReLU"
 81 |   bottom: "conv1_2"
 82 |   top: "conv1_2"
 83 | }
 84 | 
 85 | layer {
 86 |   name: "pool1"
 87 |   type: "Pooling"
 88 |   bottom: "conv1_2"
 89 |   top: "pool1"
 90 |   pooling_param {
 91 |     pool: MAX
 92 |     kernel_size: 2
 93 |     stride: 2
 94 |   }
 95 | }
 96 | 
 97 | layer {
 98 |   name: "conv2_1"
 99 |   type: "Convolution"
100 |   bottom: "pool1"
101 |   top: "conv2_1"
102 |   convolution_param {
103 |     bias_term: true
104 |     num_output: 128
105 |     pad: 1
106 |     kernel_size: 3
107 |     stride: 1
108 |   }
109 | }
110 | layer {
111 |   bottom: "conv2_1"
112 |   top: "conv2_1"
113 |   name: "conv2_1_bn"
114 |   type: "BatchNorm"
115 |   batch_norm_param {
116 |     use_global_stats: true
117 |   }
118 | }
119 | layer {
120 |   bottom: "conv2_1"
121 |   top: "conv2_1"
122 |   name: "conv2_1_scale"
123 |   type: "Scale"
124 |   scale_param {
125 |     bias_term: true
126 |   }
127 | }
128 | layer {
129 |   name: "relu2_1"
130 |   type: "ReLU"
131 |   bottom: "conv2_1"
132 |   top: "conv2_1"
133 | }
134 | 
135 | layer {
136 |   name: "conv2_2"
137 |   type: "Convolution"
138 |   bottom: "conv2_1"
139 |   top: "conv2_2"
140 |   convolution_param {
141 |     bias_term: true
142 |     num_output: 128
143 |     pad: 1
144 |     kernel_size: 3
145 |     stride: 1
146 |   }
147 | }
148 | layer {
149 |   bottom: "conv2_2"
150 |   top: "conv2_2"
151 |   name: "conv2_2_bn"
152 |   type: "BatchNorm"
153 |   batch_norm_param {
154 |     use_global_stats: true
155 |   }
156 | }
157 | layer {
158 |   bottom: "conv2_2"
159 |   top: "conv2_2"
160 |   name: "conv2_2_scale"
161 |   type: "Scale"
162 |   scale_param {
163 |     bias_term: true
164 |   }
165 | }
166 | layer {
167 |   name: "relu2_2"
168 |   type: "ReLU"
169 |   bottom: "conv2_2"
170 |   top: "conv2_2"
171 | }
172 | 
173 | layer {
174 |   name: "pool2"
175 |   type: "Pooling"
176 |   bottom: "conv2_2"
177 |   top: "pool2"
178 |   pooling_param {
179 |     pool: MAX
180 |     kernel_size: 2
181 |     stride: 2
182 |   }
183 | }
184 | 
185 | layer {
186 |   name: "conv3_1"
187 |   type: "Convolution"
188 |   bottom: "pool2"
189 |   top: "conv3_1"
190 |   convolution_param {
191 |     bias_term: true
192 |     num_output: 256
193 |     pad: 1
194 |     kernel_size: 3
195 |     stride: 1
196 |   }
197 | }
198 | layer {
199 |   bottom: "conv3_1"
200 |   top: "conv3_1"
201 |   name: "conv3_1_bn"
202 |   type: "BatchNorm"
203 |   batch_norm_param {
204 |     use_global_stats: true
205 |   }
206 | }
207 | layer {
208 |   bottom: "conv3_1"
209 |   top: "conv3_1"
210 |   name: "conv3_1_scale"
211 |   type: "Scale"
212 |   scale_param {
213 |     bias_term: true
214 |   }
215 | }
216 | layer {
217 |   name: "relu3_1"
218 |   type: "ReLU"
219 |   bottom: "conv3_1"
220 |   top: "conv3_1"
221 | }
222 | 
223 | layer {
224 |   name: "conv3_2"
225 |   type: "Convolution"
226 |   bottom: "conv3_1"
227 |   top: "conv3_2"
228 |   convolution_param {
229 |     bias_term: true
230 |     num_output: 256
231 |     pad: 1
232 |     kernel_size: 3
233 |     stride: 1
234 |   }
235 | }
236 | layer {
237 |   bottom: "conv3_2"
238 |   top: "conv3_2"
239 |   name: "conv3_2_bn"
240 |   type: "BatchNorm"
241 |   batch_norm_param {
242 |     use_global_stats: true
243 |   }
244 | }
245 | layer {
246 |   bottom: "conv3_2"
247 |   top: "conv3_2"
248 |   name: "conv3_2_scale"
249 |   type: "Scale"
250 |   scale_param {
251 |     bias_term: true
252 |   }
253 | }
254 | layer {
255 |   name: "relu3_2"
256 |   type: "ReLU"
257 |   bottom: "conv3_2"
258 |   top: "conv3_2"
259 | }
260 | 
261 | layer {
262 |   name: "pool3"
263 |   type: "Pooling"
264 |   bottom: "conv3_2"
265 |   top: "pool3"
266 |   pooling_param {
267 |     pool: MAX
268 |     kernel_size: 2
269 |     stride: 2
270 |   }
271 | }
272 | 
273 | layer {
274 |   name: "conv4_1"
275 |   type: "Convolution"
276 |   bottom: "pool3"
277 |   top: "conv4_1"
278 |   convolution_param {
279 |     bias_term: true
280 |     num_output: 512
281 |     pad: 1
282 |     kernel_size: 3
283 |     stride: 1
284 |   }
285 | }
286 | layer {
287 |   bottom: "conv4_1"
288 |   top: "conv4_1"
289 |   name: "conv4_1_bn"
290 |   type: "BatchNorm"
291 |   batch_norm_param {
292 |     use_global_stats: true
293 |   }
294 | }
295 | layer {
296 |   bottom: "conv4_1"
297 |   top: "conv4_1"
298 |   name: "conv4_1_scale"
299 |   type: "Scale"
300 |   scale_param {
301 |     bias_term: true
302 |   }
303 | }
304 | layer {
305 |   name: "relu4_1"
306 |   type: "ReLU"
307 |   bottom: "conv4_1"
308 |   top: "conv4_1"
309 | }
310 | 
311 | layer {
312 |   name: "conv4_2"
313 |   type: "Convolution"
314 |   bottom: "conv4_1"
315 |   top: "conv4_2"
316 |   convolution_param {
317 |     bias_term: true
318 |     num_output: 512
319 |     pad: 1
320 |     kernel_size: 3
321 |     stride: 1
322 |   }
323 | }
324 | layer {
325 |   bottom: "conv4_2"
326 |   top: "conv4_2"
327 |   name: "conv4_2_bn"
328 |   type: "BatchNorm"
329 |   batch_norm_param {
330 |     use_global_stats: true
331 |   }
332 | }
333 | layer {
334 |   bottom: "conv4_2"
335 |   top: "conv4_2"
336 |   name: "conv4_2_scale"
337 |   type: "Scale"
338 |   scale_param {
339 |     bias_term: true
340 |   }
341 | }
342 | layer {
343 |   name: "relu4_2"
344 |   type: "ReLU"
345 |   bottom: "conv4_2"
346 |   top: "conv4_2"
347 | }
348 | 
349 | layer {
350 |   name: "pool4"
351 |   type: "Pooling"
352 |   bottom: "conv4_2"
353 |   top: "pool4"
354 |   pooling_param {
355 |     pool: MAX
356 |     kernel_size: 2
357 |     stride: 2
358 |   }
359 | }
360 | 
361 | layer {
362 |   name: "conv5_1"
363 |   type: "Convolution"
364 |   bottom: "pool4"
365 |   top: "conv5_1"
366 |   convolution_param {
367 |     bias_term: true
368 |     num_output: 512
369 |     pad: 1
370 |     kernel_size: 3
371 |     stride: 1
372 |   }
373 | }
374 | layer {
375 |   bottom: "conv5_1"
376 |   top: "conv5_1"
377 |   name: "conv5_1_bn"
378 |   type: "BatchNorm"
379 |   batch_norm_param {
380 |     use_global_stats: true
381 |   }
382 | }
383 | layer {
384 |   bottom: "conv5_1"
385 |   top: "conv5_1"
386 |   name: "conv5_1_scale"
387 |   type: "Scale"
388 |   scale_param {
389 |     bias_term: true
390 |   }
391 | }
392 | layer {
393 |   name: "relu5_1"
394 |   type: "ReLU"
395 |   bottom: "conv5_1"
396 |   top: "conv5_1"
397 | }
398 | 
399 | layer {
400 |   name: "conv5_2"
401 |   type: "Convolution"
402 |   bottom: "conv5_1"
403 |   top: "conv5_2"
404 |   convolution_param {
405 |     bias_term: true
406 |     num_output: 512
407 |     pad: 1
408 |     kernel_size: 3
409 |     stride: 1
410 |   }
411 | }
412 | layer {
413 |   bottom: "conv5_2"
414 |   top: "conv5_2"
415 |   name: "conv5_2_bn"
416 |   type: "BatchNorm"
417 |   batch_norm_param {
418 |     use_global_stats: true
419 |   }
420 | }
421 | layer {
422 |   bottom: "conv5_2"
423 |   top: "conv5_2"
424 |   name: "conv5_2_scale"
425 |   type: "Scale"
426 |   scale_param {
427 |     bias_term: true
428 |   }
429 | }
430 | layer {
431 |   name: "relu5_2"
432 |   type: "ReLU"
433 |   bottom: "conv5_2"
434 |   top: "conv5_2"
435 | }
436 | 
437 | layer {
438 |   name: "pool5"
439 |   type: "Pooling"
440 |   bottom: "conv5_2"
441 |   top: "pool5"
442 |   pooling_param {
443 |     pool: MAX
444 |     kernel_size: 2
445 |     stride: 2
446 |   }
447 | }
448 | 
449 | layer {
450 |   bottom: "pool5"
451 |   top: "fc6"
452 |   name: "fc6"
453 |   type: "InnerProduct"
454 |   inner_product_param {
455 |     num_output: 4096
456 |   }
457 | }
458 | 
459 | layer {
460 |   name: "relu6"
461 |   type: "ReLU"
462 |   bottom: "fc6"
463 |   top: "fc6"
464 | }
465 | 
466 | layer {
467 |   name: "dropout6"
468 |   type: "Dropout"
469 |   bottom: "fc6"
470 |   top: "fc6"
471 |   dropout_param {
472 |     dropout_ratio: 0.5
473 |   }
474 | }
475 | 
476 | layer {
477 |   bottom: "fc6"
478 |   top: "fc7"
479 |   name: "fc7"
480 |   type: "InnerProduct"
481 |   inner_product_param {
482 |     num_output: 4096
483 |   }
484 | }
485 | 
486 | layer {
487 |   name: "relu7"
488 |   type: "ReLU"
489 |   bottom: "fc7"
490 |   top: "fc7"
491 | }
492 | 
493 | layer {
494 |   name: "dropout7"
495 |   type: "Dropout"
496 |   bottom: "fc7"
497 |   top: "fc7"
498 |   dropout_param {
499 |     dropout_ratio: 0.5
500 |   }
501 | }
502 | 
503 | layer {
504 |   bottom: "fc7"
505 |   top: "classifier"
506 |   name: "classifier"
507 |   type: "InnerProduct"
508 |   inner_product_param {
509 |     num_output: 1000
510 |   }
511 | }
512 | 
513 | layer {
514 |   name: "prob"
515 |   type: "Softmax"
516 |   bottom: "classifier"
517 |   top: "prob"
518 | }
519 | 


--------------------------------------------------------------------------------
/cls/README.md:
--------------------------------------------------------------------------------
  1 | ## CLS (Classification)
  2 | 
  3 | Please install [py-RFCN-priv](https://github.com/soeaver/py-RFCN-priv) for evaluating and finetuning.
  4 | 
  5 | ### Disclaimer
  6 | 
  7 | Most of the models are converted from other projects, the main contribution belongs to the original authors.
  8 | 
  9 | Project links:
 10 | 
 11 | [mxnet-model-gallery](https://github.com/dmlc/mxnet-model-gallery)、 [tensorflow slim](https://github.com/tensorflow/models/tree/master/slim)、 [craftGBD](https://github.com/craftGBD/craftGBD)、 [ResNeXt](https://github.com/facebookresearch/ResNeXt)、 [DenseNet](https://github.com/liuzhuang13/DenseNet)、 [wide-residual-networks](https://github.com/szagoruyko/wide-residual-networks)、 [keras deep-learning-models](https://github.com/fchollet/deep-learning-models)、 [ademxapp](https://github.com/itijyou/ademxapp)、 [DPNs](https://github.com/cypw/DPNs)、[Senet](https://github.com/hujie-frank/SENet)
 12 | 
 13 | 
 14 | ### Performance on imagenet validation.
 15 | **1. Top-1/5 error of pre-train models in this repository.**
 16 | 
 17 |  Network|224/299<br/>(single-crop)|224/299<br/>(12-crop)|320/395<br/>(single-crop)|320/395<br/>(12-crop)
 18 |  :---:|:---:|:---:|:---:|:---:
 19 |  resnet18-priv| 29.62/10.38 | 26.69/8.64 | 27.54/8.98 | 26.23/8.21
 20 |  resnext26-32x4d-priv| 24.93/7.75 | 23.54/6.89 | 24.20/7.21 | 23.19/6.60
 21 |  resnet101-v2| 21.95/6.12 | 19.99/5.04 | 20.37/5.16 | 19.29/4.57
 22 |  resnet152-v2| 20.85/5.42 | 19.24/4.68 | 19.66/4.73 | 18.84/4.32
 23 |  resnet269-v2| 19.71/5.00 | 18.25/4.20 | 18.70/4.33 | 17.87/3.85
 24 |  resnet38a| 20.66/5.27 | ../.. | 19.25/4.66 | ../..
 25 |  inception-v3| 21.67/5.75 | 19.60/4.73 | 20.10/4.82 | 19.25/4.24 
 26 |  xception| 20.90/5.49 | 19.68/4.90 | 19.58/4.77 | 18.91/4.39 
 27 |  inception-v4| 20.03/5.09 | 18.60/4.30 | 18.68/4.32 |18.12/3.92 
 28 |  inception-resnet-v2| 19.86/4.83 | 18.46/4.08 | 18.75/4.02 | 18.15/3.71
 29 |  resnext50-32x4d| 22.37/6.31 | 20.53/5.35 | 21.10/5.53 | 20.37/5.03
 30 |  resnext101-32x4d| 21.30/5.79 | 19.47/4.89 | 19.91/4.97 | 19.19/4.59
 31 |  resnext101-64x4d| 20.60/5.41 | 18.88/4.59 | 19.26/4.63 | 18.48/4.31
 32 |  wrn50-2<br/>(resnet50-1x128d)| 22.13/6.13 | 20.09/5.06 | 20.68/5.28 | 19.83/4.87
 33 |  airx50-24x4d| 22.39/6.23 | 20.36/5.19 | 20.88/5.33 | 19.97/4.92
 34 |  air101| 21.32/5.76 | 19.36/4.84 | 19.92/4.75 | 19.05/4.43
 35 |  air152| 20.38/5.11 | 18.46/4.26 | 19.08/4.40 | 18.53/4.00
 36 |  airx101-32x4d| 21.15/5.74 | 19.43/4.86 | 19.61/4.93 | 18.90/4.49
 37 |  dpn-68-extra| 22.56/6.24 | 20.48/4.99 | 20.99/5.25 | 20.09/4.73
 38 |  dpn-92| 20.81/5.47 | 18.99/4.59 | 19.23/4.64 | 18.68/4.24 
 39 |  dpn-98| 20.27/5.28 | 18.57/4.42 | 18.87/4.43 | 18.21/4.11
 40 |  dpn-131| 20.00/5.24 | 18.52/4.28 | 18.63/4.31 | 17.99/3.92
 41 |  dpn-107-extra| 19.70/5.06 | ../.. | 18.41/4.25 | ../..
 42 |  se-inception-v2<br/>(se-inception-bn)| 23.64/7.04 | 21.57/5.86 | 21.61/5.87 | 20.85/5.38
 43 |  se-resnet50| 22.39/6.37 | 20.61/5.34 | 20.49/5.22 | 20.02/4.85
 44 |  se-resnet50-hik| 21.98/5.80 | 20.06/4.88 | 20.51/5.04 | 19.92/4.68
 45 |  se-resnet101| 21.76/5.72 | 19.96/4.79 | 19.97/4.78 | 19.34/4.41
 46 |  se-resnet152| 21.34/5.54 | 19.56/4.66 | 19.34/4.59 | 18.83/4.32
 47 |  se-resnext50-32x4d| 20.96/5.53 | 19.39/4.69 | 19.36/4.66 | 18.70/4.38
 48 |  se-resnext101-32x4d| 19.83/4.95 | 18.44/4.16 | 18.14/4.08 | 17.68/3.86
 49 |  senet<br/>(se-resnext152-64x4d)| 18.67/4.47 | 17.40/3.69 | 17.28/3.78 | 16.80/3.47
 50 | 
 51 |  - The resnet18-priv, resnext26-32x4d-priv are trained under [pytorch](https://github.com/soeaver/pytorch-classification) by bupt-priv.
 52 |  - The pre-train models are tested on original [caffe](https://github.com/BVLC/caffe) by [evaluation_cls.py](https://github.com/soeaver/caffe-model/blob/master/cls/evaluation_cls.py), **but ceil_mode:false（pooling_layer） is used for the models converted from torch, the detail in https://github.com/BVLC/caffe/pull/3057/files**. If you remove ceil_mode:false, the performance will decline about 1% top1.
 53 |  - 224x224(base_size=256) and 320x320(base_size=320) crop size for resnet-v2/resnext/wrn, 299x299(base_size=320) and 395x395(base_size=395) crop size for inception.
 54 | 
 55 | **2. Top-1/5 accuracy with different crop sizes.**
 56 | ![teaser](https://github.com/soeaver/caffe-model/blob/master/cls/accuracy.png)
 57 |  - Figure: Accuracy curves of inception_v3(left) and resnet101_v2(right) with different crop sizes.
 58 | 
 59 | **3. Download url and forward/backward time cost for each model.**
 60 | 
 61 |  Forward/Backward time cost is evaluated with one image/mini-batch using cuDNN 5.1 on a Pascal Titan X GPU.
 62 |  
 63 |  We use
 64 |   ```
 65 |     ~/caffe/build/tools/caffe -model deploy.prototxt time -gpu -iterations 1000
 66 |   ```
 67 |  to test the forward/backward time cost, the result is really different with time cost of [evaluation_cls.py](https://github.com/soeaver/caffe-model/blob/master/cls/evaluation_cls.py)
 68 | 
 69 |  Network|F/B(224/299)|F/B(320/395)|Download<br/>(BaiduCloud)|Download<br/>(GoogleDrive)|Source
 70 |  :---:|:---:|:---:|:---:|:---:|:---:
 71 |  resnet18-priv | 4.48/5.07ms | 4.99/7.01ms | [44.6MB](https://pan.baidu.com/s/1hrYc3La)|44.6MB|[pytorch-cls](https://github.com/soeaver/pytorch-classification)
 72 |  resnext26-32x4d-priv | 8.53/10.12ms | 10.55/13.46ms | [58.9MB](https://pan.baidu.com/s/1dFzmUOh)|[58.9MB](https://drive.google.com/open?id=0B9mkjlmP0d7zZEh4dzZ3TVZUb2M)|[pytorch-cls](https://github.com/soeaver/pytorch-classification)
 73 |  resnet101-v2| 22.31/22.75ms | 26.02/29.50ms | [170.3MB](https://pan.baidu.com/s/1kVQDHFx)|[170.3MB](https://drive.google.com/open?id=0B9mkjlmP0d7zRlhISks0VktGOGs)|[craftGBD](https://github.com/craftGBD/craftGBD)
 74 |  resnet152-v2| 32.11/32.54ms | 37.46/41.84ms | [230.2MB](https://pan.baidu.com/s/1dFIc4vB)|[230.2MB](https://drive.google.com/open?id=0B9mkjlmP0d7zOXhrb1EyYVRHOEk)|[craftGBD](https://github.com/craftGBD/craftGBD)
 75 |  resnet269-v2| 58.20/59.15ms | 69.43/77.26ms | [390.4MB](https://pan.baidu.com/s/1qYbICs0)|[390.4MB](https://drive.google.com/open?id=0B9mkjlmP0d7zOGFxcTMySHN6bUE)|[craftGBD](https://github.com/craftGBD/craftGBD)
 76 |  inception-v3| 21.79/19.82ms | 22.14/24.88ms | [91.1MB](https://pan.baidu.com/s/1boC0HEf)|[91.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zTEJmNEh6c0RfYzg)|[mxnet](https://github.com/dmlc/mxnet-model-gallery/blob/master/imagenet-1k-inception-v3.md)
 77 |  xception | 14.03/30.39ms | 19.46/48.64ms | [87.4MB](https://pan.baidu.com/s/1gfiTShd)|87.4MB|[keras-models](https://github.com/fchollet/deep-learning-models)
 78 |  inception-v4| 32.96/32.19ms | 36.04/41.91ms | [163.1MB](https://pan.baidu.com/s/1c6D150)|[163.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zUEJ3aEJ2b3J0RFU)|[tf-slim](https://github.com/tensorflow/models/tree/master/slim)
 79 |  inception-resnet-v2| 49.06/54.83ms | 54.06/66.38ms | [213.4MB](https://pan.baidu.com/s/1jHPJCX4)|[213.4MB](https://drive.google.com/open?id=0B9mkjlmP0d7zc3A4NWlQQzdoM28)|[tf-slim](https://github.com/tensorflow/models/tree/master/slim)
 80 |  resnext50-32x4d| 17.29/20.08ms | 19.02/23.81ms | [95.8MB](https://pan.baidu.com/s/1kVqgfJL)|[95.8MB](https://drive.google.com/open?id=0B9mkjlmP0d7zYVgwanhVWnhrYlE)|[facebookresearch](https://github.com/facebookresearch/ResNeXt)
 81 |  resnext101-32x4d| 30.73/35.75ms | 34.33/41.02ms | [169.1MB](https://pan.baidu.com/s/1hswrNUG)|[169.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zTzYyelgyYlpOU3c)|[facebookresearch](https://github.com/facebookresearch/ResNeXt)
 82 |  resnext101-64x4d| 42.07/64.58ms | 51.99/77.71ms | [319.2MB](https://pan.baidu.com/s/1pLhk0Zp)|[319.2MB](https://drive.google.com/open?id=0B9mkjlmP0d7zQ0ZZOENnSFdQWnc)|[facebookresearch](https://github.com/facebookresearch/ResNeXt)
 83 |  wrn50-2<br/>(resnet50_1x128d)| 16.48/25.28ms | 20.99/35.04ms | [263.1MB](https://pan.baidu.com/s/1nvhoCsh)|[263.1MB](https://drive.google.com/open?id=0B9mkjlmP0d7zYW40dUMxS3VPclU)|[szagoruyko](https://github.com/szagoruyko/wide-residual-networks)
 84 |  airx50-24x4d| 23.59/24.80ms | 26.64/30.92ms | .. | .. |[pytorch-cls](https://github.com/soeaver/pytorch-classification)
 85 |  air101| 35.78/35.94ms | 39.69/45.52ms | .. | .. |[pytorch-cls](https://github.com/soeaver/pytorch-classification)
 86 |  airx101-32x4d| 49.43/55.52ms | 54.64/66.31ms | .. | .. |[pytorch-cls](https://github.com/soeaver/pytorch-classification)
 87 |  dpn-68| ../..ms | ../..ms | [48.4MB](https://pan.baidu.com/s/1bphINV5) | .. |[DPNs](https://github.com/cypw/DPNs)
 88 |  dpn-92| 29.71/30.68ms | 35.19/37.13ms  | [144.2MB](https://pan.baidu.com/s/1pL0VuWV)|[144.2MB](https://drive.google.com/open?id=0B9mkjlmP0d7zaWVKWFd2OXpRTVU)|[DPNs](https://github.com/cypw/DPNs)
 89 |  dpn-98| 36.24/44.06ms | 42.84/53.50ms | [235.6MB](https://pan.baidu.com/s/1pKHBRlD) | .. |[DPNs](https://github.com/cypw/DPNs)
 90 |  dpn-107| 45.21/59.77ms | 56.12/77.78ms | [332.4MB](https://pan.baidu.com/s/1i5b0Uih) | .. |[DPNs](https://github.com/cypw/DPNs)
 91 |  dpn-131| 48.20/59.43ms | 57.66/72.43ms | [303.3MB](https://pan.baidu.com/s/1miOdMHi) | .. |[DPNs](https://github.com/cypw/DPNs)
 92 |  se-inception-v2| 14.66/10.63ms | 15.71/13.52ms | .. | .. |[senet](https://github.com/hujie-frank/SENet)
 93 |  se-resnet50| 15.29/14.20ms | 17.96/19.69ms | .. | .. |[senet](https://github.com/hujie-frank/SENet)
 94 |   
 95 |  - For speeding up xception, we adopt [convolution depthwise layer](https://github.com/BVLC/caffe/pull/5665/files).
 96 | 
 97 | ### Check the performance
 98 | **1. Download the ILSVRC 2012 classification val set [6.3GB](http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar), and put the extracted images into the directory:**
 99 | 
100 |       ~/Database/ILSVRC2012
101 | 
102 | **2. Modify the parameter settings**
103 | 
104 |  Network|val_file|mean_value|std
105 |  :---:|:---:|:---:|:---:
106 |  resnet-v2(101/152/269)| ILSVRC2012_val | [102.98, 115.947, 122.772] | [1.0, 1.0, 1.0]
107 |  resnet10/18/, resnext, air(x) | ILSVRC2012_val | [103.52, 116.28, 123.675] | [57.375, 57.12, 58.395]
108 |  inception-v3| **ILSVRC2015_val** | [128.0, 128.0, 128.0] | [128.0, 128.0, 128.0] 
109 |  inception-v2, xception<br/>inception-v4, inception-resnet-v2 | ILSVRC2012_val | [128.0, 128.0, 128.0] | [128.0, 128.0, 128.0] 
110 |  dpn(68/92/98/131/107)| ILSVRC2012_val | [104.0, 117.0, 124.0] | [59.88, 59.88, 59.88]
111 |  official senet| **ILSVRC2015_val** | [104.0, 117.0, 123.0] | [1.0, 1.0, 1.0] 
112 | 
113 | 
114 | **3. then run evaluation_cls.py**
115 | 
116 |     python evaluation_cls.py
117 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/resnet101-v2/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet101-v2-merge.prototxt:
--------------------------------------------------------------------------------
  1 | input: "res31_scale"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 1024
  5 |   dim: 40
  6 |   dim: 40
  7 | }
  8 | 
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 300
 12 |   dim: 5
 13 | }
 14 | 
 15 | 
 16 | #============== RCNN ===============
 17 | layer {
 18 |   name: "roi_pool"
 19 |   type: "ROIPooling"
 20 |   bottom: "res31_scale"
 21 |   bottom: "rois"
 22 |   top: "roi_pool"
 23 |   roi_pooling_param {
 24 |     pooled_w: 14
 25 |     pooled_h: 14
 26 |     spatial_scale: 0.062500
 27 |   }
 28 | }
 29 | 
 30 | layer {
 31 |   name: "res31_conv1"
 32 |   type: "Convolution"
 33 |   bottom: "roi_pool"
 34 |   top: "res31_conv1"
 35 |   param {
 36 |     lr_mult: 1
 37 |     decay_mult: 1
 38 |   }
 39 |   convolution_param {
 40 |     bias_term: false
 41 |     num_output: 512
 42 |     pad: 0
 43 |     kernel_size: 1
 44 |     stride: 1
 45 |   }
 46 | }
 47 | 
 48 | layer {
 49 |   name: "res31_conv1_scale"
 50 |   type: "Scale"
 51 |   bottom: "res31_conv1"
 52 |   top: "res31_conv1"
 53 |   scale_param {
 54 |     bias_term: true
 55 |   }
 56 |   param {
 57 |     lr_mult: 0.0
 58 |     decay_mult: 0.0
 59 |   }
 60 |   param {
 61 |     lr_mult: 0.0
 62 |     decay_mult: 0.0
 63 |   }
 64 | }
 65 | layer {
 66 |   name: "res31_conv1_relu"
 67 |   type: "ReLU"
 68 |   bottom: "res31_conv1"
 69 |   top: "res31_conv1"
 70 | }
 71 | layer {
 72 |   name: "res31_conv2"
 73 |   type: "Convolution"
 74 |   bottom: "res31_conv1"
 75 |   top: "res31_conv2"
 76 |   param {
 77 |     lr_mult: 1
 78 |     decay_mult: 1
 79 |   }
 80 |   convolution_param {
 81 |     bias_term: false
 82 |     num_output: 512
 83 |     pad: 1
 84 |     kernel_size: 3
 85 |     stride: 2
 86 |   }
 87 | }
 88 | 
 89 | layer {
 90 |   name: "res31_conv2_scale"
 91 |   type: "Scale"
 92 |   bottom: "res31_conv2"
 93 |   top: "res31_conv2"
 94 |   scale_param {
 95 |     bias_term: true
 96 |   }
 97 |   param {
 98 |     lr_mult: 0.0
 99 |     decay_mult: 0.0
100 |   }
101 |   param {
102 |     lr_mult: 0.0
103 |     decay_mult: 0.0
104 |   }
105 | }
106 | layer {
107 |   name: "res31_conv2_relu"
108 |   type: "ReLU"
109 |   bottom: "res31_conv2"
110 |   top: "res31_conv2"
111 | }
112 | layer {
113 |   name: "res31_conv3"
114 |   type: "Convolution"
115 |   bottom: "res31_conv2"
116 |   top: "res31_conv3"
117 |   param {
118 |     lr_mult: 1
119 |     decay_mult: 1
120 |   }
121 |   convolution_param {
122 |     bias_term: false
123 |     num_output: 2048
124 |     pad: 0
125 |     kernel_size: 1
126 |     stride: 1
127 |   }
128 | }
129 | layer {
130 |   name: "res31_match_conv"
131 |   type: "Convolution"
132 |   bottom: "roi_pool"
133 |   top: "res31_match_conv"
134 |   param {
135 |     lr_mult: 1
136 |     decay_mult: 1
137 |   }
138 |   convolution_param {
139 |     bias_term: false
140 |     num_output: 2048
141 |     pad: 0
142 |     kernel_size: 1
143 |     stride: 2
144 |   }
145 | }
146 | layer {
147 |   name: "res31_eletwise"
148 |   type: "Eltwise"
149 |   bottom: "res31_match_conv"
150 |   bottom: "res31_conv3"
151 |   top: "res31_eletwise"
152 |   eltwise_param {
153 |     operation: SUM
154 |   }
155 | }
156 | 
157 | layer {
158 |   name: "res32_scale"
159 |   type: "Scale"
160 |   bottom: "res31_eletwise"
161 |   top: "res32_scale"
162 |   scale_param {
163 |     bias_term: true
164 |   }
165 |   param {
166 |     lr_mult: 0.0
167 |     decay_mult: 0.0
168 |   }
169 |   param {
170 |     lr_mult: 0.0
171 |     decay_mult: 0.0
172 |   }
173 | }
174 | layer {
175 |   name: "res32_relu"
176 |   type: "ReLU"
177 |   bottom: "res32_scale"
178 |   top: "res32_scale"
179 | }
180 | layer {
181 |   name: "res32_conv1"
182 |   type: "Convolution"
183 |   bottom: "res32_scale"
184 |   top: "res32_conv1"
185 |   param {
186 |     lr_mult: 1
187 |     decay_mult: 1
188 |   }
189 |   convolution_param {
190 |     bias_term: false
191 |     num_output: 512
192 |     pad: 0
193 |     kernel_size: 1
194 |     stride: 1
195 |   }
196 | }
197 | 
198 | layer {
199 |   name: "res32_conv1_scale"
200 |   type: "Scale"
201 |   bottom: "res32_conv1"
202 |   top: "res32_conv1"
203 |   scale_param {
204 |     bias_term: true
205 |   }
206 |   param {
207 |     lr_mult: 0.0
208 |     decay_mult: 0.0
209 |   }
210 |   param {
211 |     lr_mult: 0.0
212 |     decay_mult: 0.0
213 |   }
214 | }
215 | layer {
216 |   name: "res32_conv1_relu"
217 |   type: "ReLU"
218 |   bottom: "res32_conv1"
219 |   top: "res32_conv1"
220 | }
221 | layer {
222 |   name: "res32_conv2"
223 |   type: "Convolution"
224 |   bottom: "res32_conv1"
225 |   top: "res32_conv2"
226 |   param {
227 |     lr_mult: 1
228 |     decay_mult: 1
229 |   }
230 |   convolution_param {
231 |     bias_term: false
232 |     num_output: 512
233 |     pad: 1
234 |     kernel_size: 3
235 |     stride: 1
236 |   }
237 | }
238 | 
239 | layer {
240 |   name: "res32_conv2_scale"
241 |   type: "Scale"
242 |   bottom: "res32_conv2"
243 |   top: "res32_conv2"
244 |   scale_param {
245 |     bias_term: true
246 |   }
247 |   param {
248 |     lr_mult: 0.0
249 |     decay_mult: 0.0
250 |   }
251 |   param {
252 |     lr_mult: 0.0
253 |     decay_mult: 0.0
254 |   }
255 | }
256 | layer {
257 |   name: "res32_conv2_relu"
258 |   type: "ReLU"
259 |   bottom: "res32_conv2"
260 |   top: "res32_conv2"
261 | }
262 | layer {
263 |   name: "res32_conv3"
264 |   type: "Convolution"
265 |   bottom: "res32_conv2"
266 |   top: "res32_conv3"
267 |   param {
268 |     lr_mult: 1
269 |     decay_mult: 1
270 |   }
271 |   convolution_param {
272 |     bias_term: false
273 |     num_output: 2048
274 |     pad: 0
275 |     kernel_size: 1
276 |     stride: 1
277 |   }
278 | }
279 | layer {
280 |   name: "res32_eletwise"
281 |   type: "Eltwise"
282 |   bottom: "res31_eletwise"
283 |   bottom: "res32_conv3"
284 |   top: "res32_eletwise"
285 |   eltwise_param {
286 |     operation: SUM
287 |   }
288 | }
289 | 
290 | layer {
291 |   name: "res33_scale"
292 |   type: "Scale"
293 |   bottom: "res32_eletwise"
294 |   top: "res33_scale"
295 |   scale_param {
296 |     bias_term: true
297 |   }
298 |   param {
299 |     lr_mult: 0.0
300 |     decay_mult: 0.0
301 |   }
302 |   param {
303 |     lr_mult: 0.0
304 |     decay_mult: 0.0
305 |   }
306 | }
307 | layer {
308 |   name: "res33_relu"
309 |   type: "ReLU"
310 |   bottom: "res33_scale"
311 |   top: "res33_scale"
312 | }
313 | layer {
314 |   name: "res33_conv1"
315 |   type: "Convolution"
316 |   bottom: "res33_scale"
317 |   top: "res33_conv1"
318 |   param {
319 |     lr_mult: 1
320 |     decay_mult: 1
321 |   }
322 |   convolution_param {
323 |     bias_term: false
324 |     num_output: 512
325 |     pad: 0
326 |     kernel_size: 1
327 |     stride: 1
328 |   }
329 | }
330 | 
331 | layer {
332 |   name: "res33_conv1_scale"
333 |   type: "Scale"
334 |   bottom: "res33_conv1"
335 |   top: "res33_conv1"
336 |   scale_param {
337 |     bias_term: true
338 |   }
339 |   param {
340 |     lr_mult: 0.0
341 |     decay_mult: 0.0
342 |   }
343 |   param {
344 |     lr_mult: 0.0
345 |     decay_mult: 0.0
346 |   }
347 | }
348 | layer {
349 |   name: "res33_conv1_relu"
350 |   type: "ReLU"
351 |   bottom: "res33_conv1"
352 |   top: "res33_conv1"
353 | }
354 | layer {
355 |   name: "res33_conv2"
356 |   type: "Convolution"
357 |   bottom: "res33_conv1"
358 |   top: "res33_conv2"
359 |   param {
360 |     lr_mult: 1
361 |     decay_mult: 1
362 |   }
363 |   convolution_param {
364 |     bias_term: false
365 |     num_output: 512
366 |     pad: 1
367 |     kernel_size: 3
368 |     stride: 1
369 |   }
370 | }
371 | 
372 | layer {
373 |   name: "res33_conv2_scale"
374 |   type: "Scale"
375 |   bottom: "res33_conv2"
376 |   top: "res33_conv2"
377 |   scale_param {
378 |     bias_term: true
379 |   }
380 |   param {
381 |     lr_mult: 0.0
382 |     decay_mult: 0.0
383 |   }
384 |   param {
385 |     lr_mult: 0.0
386 |     decay_mult: 0.0
387 |   }
388 | }
389 | layer {
390 |   name: "res33_conv2_relu"
391 |   type: "ReLU"
392 |   bottom: "res33_conv2"
393 |   top: "res33_conv2"
394 | }
395 | layer {
396 |   name: "res33_conv3"
397 |   type: "Convolution"
398 |   bottom: "res33_conv2"
399 |   top: "res33_conv3"
400 |   param {
401 |     lr_mult: 1
402 |     decay_mult: 1
403 |   }
404 |   convolution_param {
405 |     bias_term: false
406 |     num_output: 2048
407 |     pad: 0
408 |     kernel_size: 1
409 |     stride: 1
410 |   }
411 | }
412 | layer {
413 |   name: "res33_eletwise"
414 |   type: "Eltwise"
415 |   bottom: "res32_eletwise"
416 |   bottom: "res33_conv3"
417 |   top: "res33_eletwise"
418 |   eltwise_param {
419 |     operation: SUM
420 |   }
421 | }
422 | 
423 | layer {
424 |   name: "res33_eletwise_scale"
425 |   type: "Scale"
426 |   bottom: "res33_eletwise"
427 |   top: "res33_eletwise_scale"
428 |   scale_param {
429 |     bias_term: true
430 |   }
431 |   param {
432 |     lr_mult: 0.0
433 |     decay_mult: 0.0
434 |   }
435 |   param {
436 |     lr_mult: 0.0
437 |     decay_mult: 0.0
438 |   }
439 | }
440 | layer {
441 |   name: "res33_eletwise_relu"
442 |   type: "ReLU"
443 |   bottom: "res33_eletwise_scale"
444 |   top: "res33_eletwise_scale"
445 | }
446 | 
447 | layer {
448 |   bottom: "res33_eletwise_scale"
449 |   top: "pool5"
450 |   name: "pool5"
451 |   type: "Pooling"
452 |   pooling_param {
453 |     pool: AVE
454 |     global_pooling: true
455 |   }
456 | }
457 | layer {
458 |   name: "cls_score"
459 |   type: "InnerProduct"
460 |   bottom: "pool5"
461 |   top: "cls_score"
462 |   param {
463 |     lr_mult: 1
464 |     decay_mult: 1
465 |   }
466 |   param {
467 |     lr_mult: 2
468 |     decay_mult: 0
469 |   }
470 |   inner_product_param {
471 |     num_output: 21
472 |     weight_filler {
473 |       type: "msra"
474 |       std: 0.01
475 |     }
476 |     bias_filler {
477 |       type: "constant"
478 |       value: 0
479 |     }
480 |   }
481 | }
482 | layer {
483 |   name: "bbox_pred"
484 |   type: "InnerProduct"
485 |   bottom: "pool5"
486 |   top: "bbox_pred"
487 |   param {
488 |     lr_mult: 1
489 |     decay_mult: 1
490 |   }
491 |   param {
492 |     lr_mult: 2
493 |     decay_mult: 0
494 |   }
495 |   inner_product_param {
496 |     num_output: 84
497 |     weight_filler {
498 |       type: "msra"
499 |       std: 0.01
500 |     }
501 |     bias_filler {
502 |       type: "constant"
503 |       value: 0
504 |     }
505 |   }
506 | }
507 | layer {
508 |   name: "cls_prob"
509 |   type: "Softmax"
510 |   bottom: "cls_score"
511 |   top: "cls_prob"
512 | }
513 | 


--------------------------------------------------------------------------------
/det/faster_rcnn/models/pascal_voc/resnet152-v2/rpn_rcnn_deploys/rcnn_deploy_faster_voc_resnet152-v2-merge.prototxt:
--------------------------------------------------------------------------------
  1 | input: "res48_scale"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 1024
  5 |   dim: 40
  6 |   dim: 40
  7 | }
  8 | 
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 300
 12 |   dim: 5
 13 | }
 14 | 
 15 | #============== RCNN ===============
 16 | layer {
 17 |   name: "roi_pool"
 18 |   type: "ROIPooling"
 19 |   bottom: "res48_scale"
 20 |   bottom: "rois"
 21 |   top: "roi_pool"
 22 |   roi_pooling_param {
 23 |     pooled_w: 14
 24 |     pooled_h: 14
 25 |     spatial_scale: 0.062500
 26 |   }
 27 | }
 28 | layer {
 29 |   name: "res48_conv1"
 30 |   type: "Convolution"
 31 |   bottom: "roi_pool"
 32 |   top: "res48_conv1"
 33 |   param {
 34 |     lr_mult: 1
 35 |     decay_mult: 1
 36 |   }
 37 |   convolution_param {
 38 |     bias_term: false
 39 |     num_output: 512
 40 |     pad: 0
 41 |     kernel_size: 1
 42 |     stride: 1
 43 |   }
 44 | }
 45 | 
 46 | layer {
 47 |   name: "res48_conv1_scale"
 48 |   type: "Scale"
 49 |   bottom: "res48_conv1"
 50 |   top: "res48_conv1"
 51 |   scale_param {
 52 |     bias_term: true
 53 |   }
 54 |   param {
 55 |     lr_mult: 0.0
 56 |     decay_mult: 0.0
 57 |   }
 58 |   param {
 59 |     lr_mult: 0.0
 60 |     decay_mult: 0.0
 61 |   }
 62 | }
 63 | layer {
 64 |   name: "res48_conv1_relu"
 65 |   type: "ReLU"
 66 |   bottom: "res48_conv1"
 67 |   top: "res48_conv1"
 68 | }
 69 | layer {
 70 |   name: "res48_conv2"
 71 |   type: "Convolution"
 72 |   bottom: "res48_conv1"
 73 |   top: "res48_conv2"
 74 |   param {
 75 |     lr_mult: 1
 76 |     decay_mult: 1
 77 |   }
 78 |   convolution_param {
 79 |     bias_term: false
 80 |     num_output: 512
 81 |     pad: 1
 82 |     kernel_size: 3
 83 |     stride: 2
 84 |   }
 85 | }
 86 | 
 87 | layer {
 88 |   name: "res48_conv2_scale"
 89 |   type: "Scale"
 90 |   bottom: "res48_conv2"
 91 |   top: "res48_conv2"
 92 |   scale_param {
 93 |     bias_term: true
 94 |   }
 95 |   param {
 96 |     lr_mult: 0.0
 97 |     decay_mult: 0.0
 98 |   }
 99 |   param {
100 |     lr_mult: 0.0
101 |     decay_mult: 0.0
102 |   }
103 | }
104 | layer {
105 |   name: "res48_conv2_relu"
106 |   type: "ReLU"
107 |   bottom: "res48_conv2"
108 |   top: "res48_conv2"
109 | }
110 | layer {
111 |   name: "res48_conv3"
112 |   type: "Convolution"
113 |   bottom: "res48_conv2"
114 |   top: "res48_conv3"
115 |   param {
116 |     lr_mult: 1
117 |     decay_mult: 1
118 |   }
119 |   convolution_param {
120 |     bias_term: false
121 |     num_output: 2048
122 |     pad: 0
123 |     kernel_size: 1
124 |     stride: 1
125 |   }
126 | }
127 | layer {
128 |   name: "res48_match_conv"
129 |   type: "Convolution"
130 |   bottom: "roi_pool"
131 |   top: "res48_match_conv"
132 |   param {
133 |     lr_mult: 1
134 |     decay_mult: 1
135 |   }
136 |   convolution_param {
137 |     bias_term: false
138 |     num_output: 2048
139 |     pad: 0
140 |     kernel_size: 1
141 |     stride: 2
142 |     bias_filler {
143 |       type: "constant"
144 |       value: 0.2
145 |     }
146 |   }
147 | }
148 | layer {
149 |   name: "res48_eletwise"
150 |   type: "Eltwise"
151 |   bottom: "res48_match_conv"
152 |   bottom: "res48_conv3"
153 |   top: "res48_eletwise"
154 |   eltwise_param {
155 |     operation: SUM
156 |   }
157 | }
158 | 
159 | layer {
160 |   name: "res49_scale"
161 |   type: "Scale"
162 |   bottom: "res48_eletwise"
163 |   top: "res49_scale"
164 |   scale_param {
165 |     bias_term: true
166 |   }
167 |   param {
168 |     lr_mult: 0.0
169 |     decay_mult: 0.0
170 |   }
171 |   param {
172 |     lr_mult: 0.0
173 |     decay_mult: 0.0
174 |   }
175 | }
176 | layer {
177 |   name: "res49_relu"
178 |   type: "ReLU"
179 |   bottom: "res49_scale"
180 |   top: "res49_scale"
181 | }
182 | layer {
183 |   name: "res49_conv1"
184 |   type: "Convolution"
185 |   bottom: "res49_scale"
186 |   top: "res49_conv1"
187 |   param {
188 |     lr_mult: 1
189 |     decay_mult: 1
190 |   }
191 |   convolution_param {
192 |     bias_term: false
193 |     num_output: 512
194 |     pad: 0
195 |     kernel_size: 1
196 |     stride: 1
197 |   }
198 | }
199 | 
200 | layer {
201 |   name: "res49_conv1_scale"
202 |   type: "Scale"
203 |   bottom: "res49_conv1"
204 |   top: "res49_conv1"
205 |   scale_param {
206 |     bias_term: true
207 |   }
208 |   param {
209 |     lr_mult: 0.0
210 |     decay_mult: 0.0
211 |   }
212 |   param {
213 |     lr_mult: 0.0
214 |     decay_mult: 0.0
215 |   }
216 | }
217 | layer {
218 |   name: "res49_conv1_relu"
219 |   type: "ReLU"
220 |   bottom: "res49_conv1"
221 |   top: "res49_conv1"
222 | }
223 | layer {
224 |   name: "res49_conv2"
225 |   type: "Convolution"
226 |   bottom: "res49_conv1"
227 |   top: "res49_conv2"
228 |   param {
229 |     lr_mult: 1
230 |     decay_mult: 1
231 |   }
232 |   convolution_param {
233 |     bias_term: false
234 |     num_output: 512
235 |     pad: 1
236 |     kernel_size: 3
237 |     stride: 1
238 |   }
239 | }
240 | layer {
241 |   name: "res49_conv2_scale"
242 |   type: "Scale"
243 |   bottom: "res49_conv2"
244 |   top: "res49_conv2"
245 |   scale_param {
246 |     bias_term: true
247 |   }
248 |   param {
249 |     lr_mult: 0.0
250 |     decay_mult: 0.0
251 |   }
252 |   param {
253 |     lr_mult: 0.0
254 |     decay_mult: 0.0
255 |   }
256 | }
257 | layer {
258 |   name: "res49_conv2_relu"
259 |   type: "ReLU"
260 |   bottom: "res49_conv2"
261 |   top: "res49_conv2"
262 | }
263 | layer {
264 |   name: "res49_conv3"
265 |   type: "Convolution"
266 |   bottom: "res49_conv2"
267 |   top: "res49_conv3"
268 |   param {
269 |     lr_mult: 1
270 |     decay_mult: 1
271 |   }
272 |   convolution_param {
273 |     bias_term: false
274 |     num_output: 2048
275 |     pad: 0
276 |     kernel_size: 1
277 |     stride: 1
278 |   }
279 | }
280 | layer {
281 |   name: "res49_eletwise"
282 |   type: "Eltwise"
283 |   bottom: "res48_eletwise"
284 |   bottom: "res49_conv3"
285 |   top: "res49_eletwise"
286 |   eltwise_param {
287 |     operation: SUM
288 |   }
289 | }
290 | 
291 | layer {
292 |   name: "res50_scale"
293 |   type: "Scale"
294 |   bottom: "res49_eletwise"
295 |   top: "res50_scale"
296 |   scale_param {
297 |     bias_term: true
298 |   }
299 |   param {
300 |     lr_mult: 0.0
301 |     decay_mult: 0.0
302 |   }
303 |   param {
304 |     lr_mult: 0.0
305 |     decay_mult: 0.0
306 |   }
307 | }
308 | layer {
309 |   name: "res50_relu"
310 |   type: "ReLU"
311 |   bottom: "res50_scale"
312 |   top: "res50_scale"
313 | }
314 | layer {
315 |   name: "res50_conv1"
316 |   type: "Convolution"
317 |   bottom: "res50_scale"
318 |   top: "res50_conv1"
319 |   param {
320 |     lr_mult: 1
321 |     decay_mult: 1
322 |   }
323 |   convolution_param {
324 |     bias_term: false
325 |     num_output: 512
326 |     pad: 0
327 |     kernel_size: 1
328 |     stride: 1
329 |   }
330 | }
331 | 
332 | layer {
333 |   name: "res50_conv1_scale"
334 |   type: "Scale"
335 |   bottom: "res50_conv1"
336 |   top: "res50_conv1"
337 |   scale_param {
338 |     bias_term: true
339 |   }
340 |   param {
341 |     lr_mult: 0.0
342 |     decay_mult: 0.0
343 |   }
344 |   param {
345 |     lr_mult: 0.0
346 |     decay_mult: 0.0
347 |   }
348 | }
349 | layer {
350 |   name: "res50_conv1_relu"
351 |   type: "ReLU"
352 |   bottom: "res50_conv1"
353 |   top: "res50_conv1"
354 | }
355 | layer {
356 |   name: "res50_conv2"
357 |   type: "Convolution"
358 |   bottom: "res50_conv1"
359 |   top: "res50_conv2"
360 |   param {
361 |     lr_mult: 1
362 |     decay_mult: 1
363 |   }
364 |   convolution_param {
365 |     bias_term: false
366 |     num_output: 512
367 |     pad: 1
368 |     kernel_size: 3
369 |     stride: 1
370 |   }
371 | }
372 | 
373 | layer {
374 |   name: "res50_conv2_scale"
375 |   type: "Scale"
376 |   bottom: "res50_conv2"
377 |   top: "res50_conv2"
378 |   scale_param {
379 |     bias_term: true
380 |   }
381 |   param {
382 |     lr_mult: 0.0
383 |     decay_mult: 0.0
384 |   }
385 |   param {
386 |     lr_mult: 0.0
387 |     decay_mult: 0.0
388 |   }
389 | }
390 | layer {
391 |   name: "res50_conv2_relu"
392 |   type: "ReLU"
393 |   bottom: "res50_conv2"
394 |   top: "res50_conv2"
395 | }
396 | layer {
397 |   name: "res50_conv3"
398 |   type: "Convolution"
399 |   bottom: "res50_conv2"
400 |   top: "res50_conv3"
401 |   param {
402 |     lr_mult: 1
403 |     decay_mult: 1
404 |   }
405 |   convolution_param {
406 |     bias_term: false
407 |     num_output: 2048
408 |     pad: 0
409 |     kernel_size: 1
410 |     stride: 1
411 |   }
412 | }
413 | layer {
414 |   name: "res50_eletwise"
415 |   type: "Eltwise"
416 |   bottom: "res49_eletwise"
417 |   bottom: "res50_conv3"
418 |   top: "res50_eletwise"
419 |   eltwise_param {
420 |     operation: SUM
421 |   }
422 | }
423 | 
424 | layer {
425 |   name: "res50_eletwise_scale"
426 |   type: "Scale"
427 |   bottom: "res50_eletwise"
428 |   top: "res50_eletwise_scale"
429 |   scale_param {
430 |     bias_term: true
431 |   }
432 |   param {
433 |     lr_mult: 0.0
434 |     decay_mult: 0.0
435 |   }
436 |   param {
437 |     lr_mult: 0.0
438 |     decay_mult: 0.0
439 |   }
440 | }
441 | layer {
442 |   name: "res50_eletwise_relu"
443 |   type: "ReLU"
444 |   bottom: "res50_eletwise_scale"
445 |   top: "res50_eletwise_scale"
446 | }
447 | layer {
448 |   name: "pool5"
449 |   type: "Pooling"
450 |   bottom: "res50_eletwise_scale"
451 |   top: "pool5"
452 |   pooling_param {
453 |     pool: AVE
454 |     global_pooling: true
455 |   }
456 | }
457 | layer {
458 |   name: "cls_score"
459 |   type: "InnerProduct"
460 |   bottom: "pool5"
461 |   top: "cls_score"
462 |   param {
463 |     lr_mult: 1
464 |     decay_mult: 1
465 |   }
466 |   param {
467 |     lr_mult: 2
468 |     decay_mult: 0
469 |   }
470 |   inner_product_param {
471 |     num_output: 21
472 |     weight_filler {
473 |       type: "msra"
474 |       std: 0.01
475 |     }
476 |     bias_filler {
477 |       type: "constant"
478 |       value: 0
479 |     }
480 |   }
481 | }
482 | layer {
483 |   name: "bbox_pred"
484 |   type: "InnerProduct"
485 |   bottom: "pool5"
486 |   top: "bbox_pred"
487 |   param {
488 |     lr_mult: 1
489 |     decay_mult: 1
490 |   }
491 |   param {
492 |     lr_mult: 2
493 |     decay_mult: 0
494 |   }
495 |   inner_product_param {
496 |     num_output: 84
497 |     weight_filler {
498 |       type: "msra"
499 |       std: 0.01
500 |     }
501 |     bias_filler {
502 |       type: "constant"
503 |       value: 0
504 |     }
505 |   }
506 | }
507 | layer {
508 |   name: "cls_prob"
509 |   type: "Softmax"
510 |   bottom: "cls_score"
511 |   top: "cls_prob"
512 | }
513 | 
514 | 


--------------------------------------------------------------------------------