├── .gitattributes
├── .gitignore
├── MobileNet-YOLO
    ├── mobilenet_yolov3_deploy.prototxt
    └── mobilenet_yolov3_deploy_iter_63000.caffemodel
├── README.md
├── SSD
    ├── MobileNet
    │   ├── MobileNetSSD_deploy.prototxt
    │   ├── MobileNetSSD_deploy2.prototxt
    │   ├── MobileNetSSD_deploy_custom.prototxt
    │   ├── solver.prototxt
    │   ├── solver_test.prototxt
    │   ├── test.prototxt
    │   └── train.prototxt
    └── MobileNet_V2
    │   ├── deploy.prototxt
    │   ├── solver.prototxt
    │   └── train.prototxt
├── YOLO
    ├── voc.data
    ├── voc.names
    ├── yolov3-tiny.cfg
    ├── yolov3-tiny_final.weights
    └── yolov3.cfg
└── faster_rcnn_end2end_avs
    ├── VGG19
        └── faster_rcnn_end2end
        │   ├── solver.prototxt
        │   ├── test.prototxt
        │   └── train.prototxt
    ├── solver.prototxt
    ├── test.prototxt
    └── train.prototxt


/.gitattributes:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Set default behavior to automatically normalize line endings.
 3 | ###############################################################################
 4 | * text=auto
 5 | 
 6 | ###############################################################################
 7 | # Set default behavior for command prompt diff.
 8 | #
 9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs     diff=csharp
14 | 
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following 
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln       merge=binary
26 | #*.csproj    merge=binary
27 | #*.vbproj    merge=binary
28 | #*.vcxproj   merge=binary
29 | #*.vcproj    merge=binary
30 | #*.dbproj    merge=binary
31 | #*.fsproj    merge=binary
32 | #*.lsproj    merge=binary
33 | #*.wixproj   merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj   merge=binary
36 | #*.wwaproj   merge=binary
37 | 
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg   binary
44 | #*.png   binary
45 | #*.gif   binary
46 | 
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | # 
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the 
52 | # entries below.
53 | ###############################################################################
54 | #*.doc   diff=astextplain
55 | #*.DOC   diff=astextplain
56 | #*.docx  diff=astextplain
57 | #*.DOCX  diff=astextplain
58 | #*.dot   diff=astextplain
59 | #*.DOT   diff=astextplain
60 | #*.pdf   diff=astextplain
61 | #*.PDF   diff=astextplain
62 | #*.rtf   diff=astextplain
63 | #*.RTF   diff=astextplain
64 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## General
2 | 
3 | # Model and weighting
4 | 
5 | #*.caffemodel
6 | *.solverstate
7 | *.py
8 | 


--------------------------------------------------------------------------------
/MobileNet-YOLO/mobilenet_yolov3_deploy_iter_63000.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Vehicle-Detection/caedb24b289b1c4774b85ecc15f60cf6b040bec6/MobileNet-YOLO/mobilenet_yolov3_deploy_iter_63000.caffemodel


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Vehicle Detection 
  2 | 
  3 | ## Compare different model by using same dataset
  4 | 
  5 | 1. [MobileNet-YOLO](https://github.com/eric612/MobileNet-YOLO)
  6 | 
  7 | 2. [YoloV3](https://pjreddie.com/darknet/yolo/)
  8 | 
  9 | 3. [FasterRCNN](https://github.com/intel/caffe)
 10 | 
 11 | 4. [MobileNet(V2) SSD](https://github.com/eric612/MobileNet-SSD-windows)
 12 | 
 13 | ### New !! Detection and Segementation
 14 | 
 15 | Dectection and Segementation in one stage end-to-end [models](http://ethereon.github.io/netscope/#/gist/4e0f9e4840f0cced223ee60c31772b68)
 16 | 
 17 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/rHMW8gkbe6s/0.jpg)](https://www.youtube.com/watch?v=rHMW8gkbe6s)
 18 | 
 19 | [Training project](https://github.com/eric612/MobileNet-YOLO/edit/detection_with_segmentation/README.md)
 20 | 
 21 | ### MobileNet-YOLO Result
 22 | 
 23 | [Run on linux](https://github.com/eric612/MobileNet-YOLO)
 24 | 
 25 | [Run on windows](https://github.com/eric612/Caffe-YOLOv2-Windows)
 26 | 
 27 | [Models and Weights](/MobileNet-YOLO)
 28 | 
 29 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/moW84z9zKOg/0.jpg)](https://www.youtube.com/watch?v=moW84z9zKOg)
 30 | 
 31 | ### YOLOv3 Tiny Result
 32 | 
 33 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/rA5nHltRGRE/0.jpg)](https://www.youtube.com/watch?v=rA5nHltRGRE)
 34 | 
 35 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/KSsnJuSrWMw/0.jpg)](https://www.youtube.com/watch?v=KSsnJuSrWMw)
 36 | 
 37 | ### YOLOv3 Tiny Model 
 38 | 
 39 | [weights](/YOLO/yolov3-tiny_final.weights)
 40 | 
 41 | [model](/YOLO/yolov3-tiny.cfg)
 42 | 
 43 | ### YOLOv3-416x416-full Result
 44 | 
 45 | [![YOLOv3-full](https://img.youtube.com/vi/YYz58loXJU0/0.jpg)](https://www.youtube.com/watch?v=YYz58loXJU0)
 46 | 
 47 | ### YOLOv3-416x416-full Model
 48 | 
 49 | [weights](https://drive.google.com/open?id=1BIBiZmTQOuqV2yQ1S67FSMCzRTmzThon)
 50 | 
 51 | [model](/YOLO/yolov3.cfg)
 52 | 
 53 | ### MobileNetSSD Model
 54 | 
 55 | [weights](https://drive.google.com/open?id=1LbLSTPFSlHML5qAUYN-kt1bw2HxvvNWS)
 56 | 
 57 | [model](https://drive.google.com/open?id=1KOE5r-71FFWU0LZbpo9HMEUwM_RE1LHR)
 58 | 
 59 | ### MobileNetSSD_V2 Model
 60 | 
 61 | [weights](https://drive.google.com/open?id=1v5X4tCaMFa59cfS4Ksr1J-Fl40vKlfpl)
 62 | 
 63 | [model](https://github.com/eric612/MobileNet-SSD-windows/blob/master/models/MobileNetV2/deploy.prototxt)
 64 | 
 65 | ### MobileNetSSD Result 
 66 | 
 67 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/jn6SOzT_wPA/0.jpg)](https://www.youtube.com/watch?v=jn6SOzT_wPA)
 68 | 
 69 | ### MobileNetSSD_V2 Result 
 70 | 
 71 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/oc3tXxOoSH4/0.jpg)](https://www.youtube.com/watch?v=oc3tXxOoSH4)
 72 | 
 73 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/dsdeU8B0CJE/0.jpg)](https://www.youtube.com/watch?v=dsdeU8B0CJE)
 74 | 
 75 | ### FasterRCNN Model
 76 | 
 77 | [VGG16](https://drive.google.com/open?id=1NQ9F74FTZnXM-hyuwYAoDBOYBjDSf5bp)
 78 | 
 79 | [VGG19](https://drive.google.com/open?id=1FiSktKooiABZJB5UIun9tAmD5aTEAHxn)
 80 | 
 81 | ### FasterRCNN Result 
 82 | 
 83 | ####VGG19
 84 | 
 85 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/NhID_pNwgac/0.jpg)](https://www.youtube.com/watch?v=NhID_pNwgac)
 86 | 
 87 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/AjnaTelt0KM/0.jpg)](https://www.youtube.com/watch?v=AjnaTelt0KM)
 88 | 
 89 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/vxk77dicGAQ/0.jpg)](https://www.youtube.com/watch?v=vxk77dicGAQ)
 90 | 
 91 | ####VGG16
 92 | 
 93 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/xjIB9t1tLOg/0.jpg)](https://www.youtube.com/watch?v=xjIB9t1tLOg)
 94 | 
 95 | 
 96 | 
 97 | ### Source Video
 98 | 
 99 | [street](https://drive.google.com/open?id=1Wkk_n_yXz0C8nuwPK_1B8bJVJf7vzWeB)
100 | 
101 | [tunnel](https://drive.google.com/open?id=1TDdBmZDtm_02WCLM3ENvnNnm6jUmeJFu)
102 | 
103 | [rear view](https://drive.google.com/open?id=1pJQ4F1jqUHywI9bw0r2f3R-xyJOghujm)
104 | 
105 | [night 1](https://drive.google.com/open?id=1r2QyxgAy_dt_30rWZXcHG5TE0zltXdkY)
106 | 
107 | [night 2](https://drive.google.com/open?id=11NKTl15IPMdowTZnVVW8_HAYSdpTeJU-)
108 | 
109 |  
110 | 


--------------------------------------------------------------------------------
/SSD/MobileNet/MobileNetSSD_deploy.prototxt:
--------------------------------------------------------------------------------
   1 | name: "MobileNet-SSD"
   2 | input: "data"
   3 | input_shape {
   4 |   dim: 1
   5 |   dim: 3
   6 |   dim: 300
   7 |   dim: 300
   8 | }
   9 | layer {
  10 |   name: "conv0"
  11 |   type: "Convolution"
  12 |   bottom: "data"
  13 |   top: "conv0"
  14 |   param {
  15 |     lr_mult: 1.0
  16 |     decay_mult: 1.0
  17 |   }
  18 |   param {
  19 |     lr_mult: 2.0
  20 |     decay_mult: 0.0
  21 |   }
  22 |   convolution_param {
  23 |     num_output: 32
  24 |     pad: 1
  25 |     kernel_size: 3
  26 |     stride: 2
  27 |     weight_filler {
  28 |       type: "msra"
  29 |     }
  30 |     bias_filler {
  31 |       type: "constant"
  32 |       value: 0.0
  33 |     }
  34 |   }
  35 | }
  36 | layer {
  37 |   name: "conv0/relu"
  38 |   type: "ReLU"
  39 |   bottom: "conv0"
  40 |   top: "conv0"
  41 | }
  42 | layer {
  43 |   name: "conv1/dw"
  44 |   type: "Convolution"
  45 |   bottom: "conv0"
  46 |   top: "conv1/dw"
  47 |   param {
  48 |     lr_mult: 1.0
  49 |     decay_mult: 1.0
  50 |   }
  51 |   param {
  52 |     lr_mult: 2.0
  53 |     decay_mult: 0.0
  54 |   }
  55 |   convolution_param {
  56 |     num_output: 32
  57 |     pad: 1
  58 |     kernel_size: 3
  59 |     group: 32
  60 |     engine: CAFFE
  61 |     weight_filler {
  62 |       type: "msra"
  63 |     }
  64 |     bias_filler {
  65 |       type: "constant"
  66 |       value: 0.0
  67 |     }
  68 |   }
  69 | }
  70 | layer {
  71 |   name: "conv1/dw/relu"
  72 |   type: "ReLU"
  73 |   bottom: "conv1/dw"
  74 |   top: "conv1/dw"
  75 | }
  76 | layer {
  77 |   name: "conv1"
  78 |   type: "Convolution"
  79 |   bottom: "conv1/dw"
  80 |   top: "conv1"
  81 |   param {
  82 |     lr_mult: 1.0
  83 |     decay_mult: 1.0
  84 |   }
  85 |   param {
  86 |     lr_mult: 2.0
  87 |     decay_mult: 0.0
  88 |   }
  89 |   convolution_param {
  90 |     num_output: 64
  91 |     kernel_size: 1
  92 |     weight_filler {
  93 |       type: "msra"
  94 |     }
  95 |     bias_filler {
  96 |       type: "constant"
  97 |       value: 0.0
  98 |     }
  99 |   }
 100 | }
 101 | layer {
 102 |   name: "conv1/relu"
 103 |   type: "ReLU"
 104 |   bottom: "conv1"
 105 |   top: "conv1"
 106 | }
 107 | layer {
 108 |   name: "conv2/dw"
 109 |   type: "Convolution"
 110 |   bottom: "conv1"
 111 |   top: "conv2/dw"
 112 |   param {
 113 |     lr_mult: 1.0
 114 |     decay_mult: 1.0
 115 |   }
 116 |   param {
 117 |     lr_mult: 2.0
 118 |     decay_mult: 0.0
 119 |   }
 120 |   convolution_param {
 121 |     num_output: 64
 122 |     pad: 1
 123 |     kernel_size: 3
 124 |     stride: 2
 125 |     group: 64
 126 |     engine: CAFFE
 127 |     weight_filler {
 128 |       type: "msra"
 129 |     }
 130 |     bias_filler {
 131 |       type: "constant"
 132 |       value: 0.0
 133 |     }
 134 |   }
 135 | }
 136 | layer {
 137 |   name: "conv2/dw/relu"
 138 |   type: "ReLU"
 139 |   bottom: "conv2/dw"
 140 |   top: "conv2/dw"
 141 | }
 142 | layer {
 143 |   name: "conv2"
 144 |   type: "Convolution"
 145 |   bottom: "conv2/dw"
 146 |   top: "conv2"
 147 |   param {
 148 |     lr_mult: 1.0
 149 |     decay_mult: 1.0
 150 |   }
 151 |   param {
 152 |     lr_mult: 2.0
 153 |     decay_mult: 0.0
 154 |   }
 155 |   convolution_param {
 156 |     num_output: 128
 157 |     kernel_size: 1
 158 |     weight_filler {
 159 |       type: "msra"
 160 |     }
 161 |     bias_filler {
 162 |       type: "constant"
 163 |       value: 0.0
 164 |     }
 165 |   }
 166 | }
 167 | layer {
 168 |   name: "conv2/relu"
 169 |   type: "ReLU"
 170 |   bottom: "conv2"
 171 |   top: "conv2"
 172 | }
 173 | layer {
 174 |   name: "conv3/dw"
 175 |   type: "Convolution"
 176 |   bottom: "conv2"
 177 |   top: "conv3/dw"
 178 |   param {
 179 |     lr_mult: 1.0
 180 |     decay_mult: 1.0
 181 |   }
 182 |   param {
 183 |     lr_mult: 2.0
 184 |     decay_mult: 0.0
 185 |   }
 186 |   convolution_param {
 187 |     num_output: 128
 188 |     pad: 1
 189 |     kernel_size: 3
 190 |     group: 128
 191 |     engine: CAFFE
 192 |     weight_filler {
 193 |       type: "msra"
 194 |     }
 195 |     bias_filler {
 196 |       type: "constant"
 197 |       value: 0.0
 198 |     }
 199 |   }
 200 | }
 201 | layer {
 202 |   name: "conv3/dw/relu"
 203 |   type: "ReLU"
 204 |   bottom: "conv3/dw"
 205 |   top: "conv3/dw"
 206 | }
 207 | layer {
 208 |   name: "conv3"
 209 |   type: "Convolution"
 210 |   bottom: "conv3/dw"
 211 |   top: "conv3"
 212 |   param {
 213 |     lr_mult: 1.0
 214 |     decay_mult: 1.0
 215 |   }
 216 |   param {
 217 |     lr_mult: 2.0
 218 |     decay_mult: 0.0
 219 |   }
 220 |   convolution_param {
 221 |     num_output: 128
 222 |     kernel_size: 1
 223 |     weight_filler {
 224 |       type: "msra"
 225 |     }
 226 |     bias_filler {
 227 |       type: "constant"
 228 |       value: 0.0
 229 |     }
 230 |   }
 231 | }
 232 | layer {
 233 |   name: "conv3/relu"
 234 |   type: "ReLU"
 235 |   bottom: "conv3"
 236 |   top: "conv3"
 237 | }
 238 | layer {
 239 |   name: "conv4/dw"
 240 |   type: "Convolution"
 241 |   bottom: "conv3"
 242 |   top: "conv4/dw"
 243 |   param {
 244 |     lr_mult: 1.0
 245 |     decay_mult: 1.0
 246 |   }
 247 |   param {
 248 |     lr_mult: 2.0
 249 |     decay_mult: 0.0
 250 |   }
 251 |   convolution_param {
 252 |     num_output: 128
 253 |     pad: 1
 254 |     kernel_size: 3
 255 |     stride: 2
 256 |     group: 128
 257 |     engine: CAFFE
 258 |     weight_filler {
 259 |       type: "msra"
 260 |     }
 261 |     bias_filler {
 262 |       type: "constant"
 263 |       value: 0.0
 264 |     }
 265 |   }
 266 | }
 267 | layer {
 268 |   name: "conv4/dw/relu"
 269 |   type: "ReLU"
 270 |   bottom: "conv4/dw"
 271 |   top: "conv4/dw"
 272 | }
 273 | layer {
 274 |   name: "conv4"
 275 |   type: "Convolution"
 276 |   bottom: "conv4/dw"
 277 |   top: "conv4"
 278 |   param {
 279 |     lr_mult: 1.0
 280 |     decay_mult: 1.0
 281 |   }
 282 |   param {
 283 |     lr_mult: 2.0
 284 |     decay_mult: 0.0
 285 |   }
 286 |   convolution_param {
 287 |     num_output: 256
 288 |     kernel_size: 1
 289 |     weight_filler {
 290 |       type: "msra"
 291 |     }
 292 |     bias_filler {
 293 |       type: "constant"
 294 |       value: 0.0
 295 |     }
 296 |   }
 297 | }
 298 | layer {
 299 |   name: "conv4/relu"
 300 |   type: "ReLU"
 301 |   bottom: "conv4"
 302 |   top: "conv4"
 303 | }
 304 | layer {
 305 |   name: "conv5/dw"
 306 |   type: "Convolution"
 307 |   bottom: "conv4"
 308 |   top: "conv5/dw"
 309 |   param {
 310 |     lr_mult: 1.0
 311 |     decay_mult: 1.0
 312 |   }
 313 |   param {
 314 |     lr_mult: 2.0
 315 |     decay_mult: 0.0
 316 |   }
 317 |   convolution_param {
 318 |     num_output: 256
 319 |     pad: 1
 320 |     kernel_size: 3
 321 |     group: 256
 322 |     engine: CAFFE
 323 |     weight_filler {
 324 |       type: "msra"
 325 |     }
 326 |     bias_filler {
 327 |       type: "constant"
 328 |       value: 0.0
 329 |     }
 330 |   }
 331 | }
 332 | layer {
 333 |   name: "conv5/dw/relu"
 334 |   type: "ReLU"
 335 |   bottom: "conv5/dw"
 336 |   top: "conv5/dw"
 337 | }
 338 | layer {
 339 |   name: "conv5"
 340 |   type: "Convolution"
 341 |   bottom: "conv5/dw"
 342 |   top: "conv5"
 343 |   param {
 344 |     lr_mult: 1.0
 345 |     decay_mult: 1.0
 346 |   }
 347 |   param {
 348 |     lr_mult: 2.0
 349 |     decay_mult: 0.0
 350 |   }
 351 |   convolution_param {
 352 |     num_output: 256
 353 |     kernel_size: 1
 354 |     weight_filler {
 355 |       type: "msra"
 356 |     }
 357 |     bias_filler {
 358 |       type: "constant"
 359 |       value: 0.0
 360 |     }
 361 |   }
 362 | }
 363 | layer {
 364 |   name: "conv5/relu"
 365 |   type: "ReLU"
 366 |   bottom: "conv5"
 367 |   top: "conv5"
 368 | }
 369 | layer {
 370 |   name: "conv6/dw"
 371 |   type: "Convolution"
 372 |   bottom: "conv5"
 373 |   top: "conv6/dw"
 374 |   param {
 375 |     lr_mult: 1.0
 376 |     decay_mult: 1.0
 377 |   }
 378 |   param {
 379 |     lr_mult: 2.0
 380 |     decay_mult: 0.0
 381 |   }
 382 |   convolution_param {
 383 |     num_output: 256
 384 |     pad: 1
 385 |     kernel_size: 3
 386 |     stride: 2
 387 |     group: 256
 388 |     engine: CAFFE
 389 |     weight_filler {
 390 |       type: "msra"
 391 |     }
 392 |     bias_filler {
 393 |       type: "constant"
 394 |       value: 0.0
 395 |     }
 396 |   }
 397 | }
 398 | layer {
 399 |   name: "conv6/dw/relu"
 400 |   type: "ReLU"
 401 |   bottom: "conv6/dw"
 402 |   top: "conv6/dw"
 403 | }
 404 | layer {
 405 |   name: "conv6"
 406 |   type: "Convolution"
 407 |   bottom: "conv6/dw"
 408 |   top: "conv6"
 409 |   param {
 410 |     lr_mult: 1.0
 411 |     decay_mult: 1.0
 412 |   }
 413 |   param {
 414 |     lr_mult: 2.0
 415 |     decay_mult: 0.0
 416 |   }
 417 |   convolution_param {
 418 |     num_output: 512
 419 |     kernel_size: 1
 420 |     weight_filler {
 421 |       type: "msra"
 422 |     }
 423 |     bias_filler {
 424 |       type: "constant"
 425 |       value: 0.0
 426 |     }
 427 |   }
 428 | }
 429 | layer {
 430 |   name: "conv6/relu"
 431 |   type: "ReLU"
 432 |   bottom: "conv6"
 433 |   top: "conv6"
 434 | }
 435 | layer {
 436 |   name: "conv7/dw"
 437 |   type: "Convolution"
 438 |   bottom: "conv6"
 439 |   top: "conv7/dw"
 440 |   param {
 441 |     lr_mult: 1.0
 442 |     decay_mult: 1.0
 443 |   }
 444 |   param {
 445 |     lr_mult: 2.0
 446 |     decay_mult: 0.0
 447 |   }
 448 |   convolution_param {
 449 |     num_output: 512
 450 |     pad: 1
 451 |     kernel_size: 3
 452 |     group: 512
 453 |     engine: CAFFE
 454 |     weight_filler {
 455 |       type: "msra"
 456 |     }
 457 |     bias_filler {
 458 |       type: "constant"
 459 |       value: 0.0
 460 |     }
 461 |   }
 462 | }
 463 | layer {
 464 |   name: "conv7/dw/relu"
 465 |   type: "ReLU"
 466 |   bottom: "conv7/dw"
 467 |   top: "conv7/dw"
 468 | }
 469 | layer {
 470 |   name: "conv7"
 471 |   type: "Convolution"
 472 |   bottom: "conv7/dw"
 473 |   top: "conv7"
 474 |   param {
 475 |     lr_mult: 1.0
 476 |     decay_mult: 1.0
 477 |   }
 478 |   param {
 479 |     lr_mult: 2.0
 480 |     decay_mult: 0.0
 481 |   }
 482 |   convolution_param {
 483 |     num_output: 512
 484 |     kernel_size: 1
 485 |     weight_filler {
 486 |       type: "msra"
 487 |     }
 488 |     bias_filler {
 489 |       type: "constant"
 490 |       value: 0.0
 491 |     }
 492 |   }
 493 | }
 494 | layer {
 495 |   name: "conv7/relu"
 496 |   type: "ReLU"
 497 |   bottom: "conv7"
 498 |   top: "conv7"
 499 | }
 500 | layer {
 501 |   name: "conv8/dw"
 502 |   type: "Convolution"
 503 |   bottom: "conv7"
 504 |   top: "conv8/dw"
 505 |   param {
 506 |     lr_mult: 1.0
 507 |     decay_mult: 1.0
 508 |   }
 509 |   param {
 510 |     lr_mult: 2.0
 511 |     decay_mult: 0.0
 512 |   }
 513 |   convolution_param {
 514 |     num_output: 512
 515 |     pad: 1
 516 |     kernel_size: 3
 517 |     group: 512
 518 |     engine: CAFFE
 519 |     weight_filler {
 520 |       type: "msra"
 521 |     }
 522 |     bias_filler {
 523 |       type: "constant"
 524 |       value: 0.0
 525 |     }
 526 |   }
 527 | }
 528 | layer {
 529 |   name: "conv8/dw/relu"
 530 |   type: "ReLU"
 531 |   bottom: "conv8/dw"
 532 |   top: "conv8/dw"
 533 | }
 534 | layer {
 535 |   name: "conv8"
 536 |   type: "Convolution"
 537 |   bottom: "conv8/dw"
 538 |   top: "conv8"
 539 |   param {
 540 |     lr_mult: 1.0
 541 |     decay_mult: 1.0
 542 |   }
 543 |   param {
 544 |     lr_mult: 2.0
 545 |     decay_mult: 0.0
 546 |   }
 547 |   convolution_param {
 548 |     num_output: 512
 549 |     kernel_size: 1
 550 |     weight_filler {
 551 |       type: "msra"
 552 |     }
 553 |     bias_filler {
 554 |       type: "constant"
 555 |       value: 0.0
 556 |     }
 557 |   }
 558 | }
 559 | layer {
 560 |   name: "conv8/relu"
 561 |   type: "ReLU"
 562 |   bottom: "conv8"
 563 |   top: "conv8"
 564 | }
 565 | layer {
 566 |   name: "conv9/dw"
 567 |   type: "Convolution"
 568 |   bottom: "conv8"
 569 |   top: "conv9/dw"
 570 |   param {
 571 |     lr_mult: 1.0
 572 |     decay_mult: 1.0
 573 |   }
 574 |   param {
 575 |     lr_mult: 2.0
 576 |     decay_mult: 0.0
 577 |   }
 578 |   convolution_param {
 579 |     num_output: 512
 580 |     pad: 1
 581 |     kernel_size: 3
 582 |     group: 512
 583 |     engine: CAFFE
 584 |     weight_filler {
 585 |       type: "msra"
 586 |     }
 587 |     bias_filler {
 588 |       type: "constant"
 589 |       value: 0.0
 590 |     }
 591 |   }
 592 | }
 593 | layer {
 594 |   name: "conv9/dw/relu"
 595 |   type: "ReLU"
 596 |   bottom: "conv9/dw"
 597 |   top: "conv9/dw"
 598 | }
 599 | layer {
 600 |   name: "conv9"
 601 |   type: "Convolution"
 602 |   bottom: "conv9/dw"
 603 |   top: "conv9"
 604 |   param {
 605 |     lr_mult: 1.0
 606 |     decay_mult: 1.0
 607 |   }
 608 |   param {
 609 |     lr_mult: 2.0
 610 |     decay_mult: 0.0
 611 |   }
 612 |   convolution_param {
 613 |     num_output: 512
 614 |     kernel_size: 1
 615 |     weight_filler {
 616 |       type: "msra"
 617 |     }
 618 |     bias_filler {
 619 |       type: "constant"
 620 |       value: 0.0
 621 |     }
 622 |   }
 623 | }
 624 | layer {
 625 |   name: "conv9/relu"
 626 |   type: "ReLU"
 627 |   bottom: "conv9"
 628 |   top: "conv9"
 629 | }
 630 | layer {
 631 |   name: "conv10/dw"
 632 |   type: "Convolution"
 633 |   bottom: "conv9"
 634 |   top: "conv10/dw"
 635 |   param {
 636 |     lr_mult: 1.0
 637 |     decay_mult: 1.0
 638 |   }
 639 |   param {
 640 |     lr_mult: 2.0
 641 |     decay_mult: 0.0
 642 |   }
 643 |   convolution_param {
 644 |     num_output: 512
 645 |     pad: 1
 646 |     kernel_size: 3
 647 |     group: 512
 648 |     engine: CAFFE
 649 |     weight_filler {
 650 |       type: "msra"
 651 |     }
 652 |     bias_filler {
 653 |       type: "constant"
 654 |       value: 0.0
 655 |     }
 656 |   }
 657 | }
 658 | layer {
 659 |   name: "conv10/dw/relu"
 660 |   type: "ReLU"
 661 |   bottom: "conv10/dw"
 662 |   top: "conv10/dw"
 663 | }
 664 | layer {
 665 |   name: "conv10"
 666 |   type: "Convolution"
 667 |   bottom: "conv10/dw"
 668 |   top: "conv10"
 669 |   param {
 670 |     lr_mult: 1.0
 671 |     decay_mult: 1.0
 672 |   }
 673 |   param {
 674 |     lr_mult: 2.0
 675 |     decay_mult: 0.0
 676 |   }
 677 |   convolution_param {
 678 |     num_output: 512
 679 |     kernel_size: 1
 680 |     weight_filler {
 681 |       type: "msra"
 682 |     }
 683 |     bias_filler {
 684 |       type: "constant"
 685 |       value: 0.0
 686 |     }
 687 |   }
 688 | }
 689 | layer {
 690 |   name: "conv10/relu"
 691 |   type: "ReLU"
 692 |   bottom: "conv10"
 693 |   top: "conv10"
 694 | }
 695 | layer {
 696 |   name: "conv11/dw"
 697 |   type: "Convolution"
 698 |   bottom: "conv10"
 699 |   top: "conv11/dw"
 700 |   param {
 701 |     lr_mult: 1.0
 702 |     decay_mult: 1.0
 703 |   }
 704 |   param {
 705 |     lr_mult: 2.0
 706 |     decay_mult: 0.0
 707 |   }
 708 |   convolution_param {
 709 |     num_output: 512
 710 |     pad: 1
 711 |     kernel_size: 3
 712 |     group: 512
 713 |     engine: CAFFE
 714 |     weight_filler {
 715 |       type: "msra"
 716 |     }
 717 |     bias_filler {
 718 |       type: "constant"
 719 |       value: 0.0
 720 |     }
 721 |   }
 722 | }
 723 | layer {
 724 |   name: "conv11/dw/relu"
 725 |   type: "ReLU"
 726 |   bottom: "conv11/dw"
 727 |   top: "conv11/dw"
 728 | }
 729 | layer {
 730 |   name: "conv11"
 731 |   type: "Convolution"
 732 |   bottom: "conv11/dw"
 733 |   top: "conv11"
 734 |   param {
 735 |     lr_mult: 1.0
 736 |     decay_mult: 1.0
 737 |   }
 738 |   param {
 739 |     lr_mult: 2.0
 740 |     decay_mult: 0.0
 741 |   }
 742 |   convolution_param {
 743 |     num_output: 512
 744 |     kernel_size: 1
 745 |     weight_filler {
 746 |       type: "msra"
 747 |     }
 748 |     bias_filler {
 749 |       type: "constant"
 750 |       value: 0.0
 751 |     }
 752 |   }
 753 | }
 754 | layer {
 755 |   name: "conv11/relu"
 756 |   type: "ReLU"
 757 |   bottom: "conv11"
 758 |   top: "conv11"
 759 | }
 760 | layer {
 761 |   name: "conv12/dw"
 762 |   type: "Convolution"
 763 |   bottom: "conv11"
 764 |   top: "conv12/dw"
 765 |   param {
 766 |     lr_mult: 1.0
 767 |     decay_mult: 1.0
 768 |   }
 769 |   param {
 770 |     lr_mult: 2.0
 771 |     decay_mult: 0.0
 772 |   }
 773 |   convolution_param {
 774 |     num_output: 512
 775 |     pad: 1
 776 |     kernel_size: 3
 777 |     stride: 2
 778 |     group: 512
 779 |     engine: CAFFE
 780 |     weight_filler {
 781 |       type: "msra"
 782 |     }
 783 |     bias_filler {
 784 |       type: "constant"
 785 |       value: 0.0
 786 |     }
 787 |   }
 788 | }
 789 | layer {
 790 |   name: "conv12/dw/relu"
 791 |   type: "ReLU"
 792 |   bottom: "conv12/dw"
 793 |   top: "conv12/dw"
 794 | }
 795 | layer {
 796 |   name: "conv12"
 797 |   type: "Convolution"
 798 |   bottom: "conv12/dw"
 799 |   top: "conv12"
 800 |   param {
 801 |     lr_mult: 1.0
 802 |     decay_mult: 1.0
 803 |   }
 804 |   param {
 805 |     lr_mult: 2.0
 806 |     decay_mult: 0.0
 807 |   }
 808 |   convolution_param {
 809 |     num_output: 1024
 810 |     kernel_size: 1
 811 |     weight_filler {
 812 |       type: "msra"
 813 |     }
 814 |     bias_filler {
 815 |       type: "constant"
 816 |       value: 0.0
 817 |     }
 818 |   }
 819 | }
 820 | layer {
 821 |   name: "conv12/relu"
 822 |   type: "ReLU"
 823 |   bottom: "conv12"
 824 |   top: "conv12"
 825 | }
 826 | layer {
 827 |   name: "conv13/dw"
 828 |   type: "Convolution"
 829 |   bottom: "conv12"
 830 |   top: "conv13/dw"
 831 |   param {
 832 |     lr_mult: 1.0
 833 |     decay_mult: 1.0
 834 |   }
 835 |   param {
 836 |     lr_mult: 2.0
 837 |     decay_mult: 0.0
 838 |   }
 839 |   convolution_param {
 840 |     num_output: 1024
 841 |     pad: 1
 842 |     kernel_size: 3
 843 |     group: 1024
 844 |     engine: CAFFE
 845 |     weight_filler {
 846 |       type: "msra"
 847 |     }
 848 |     bias_filler {
 849 |       type: "constant"
 850 |       value: 0.0
 851 |     }
 852 |   }
 853 | }
 854 | layer {
 855 |   name: "conv13/dw/relu"
 856 |   type: "ReLU"
 857 |   bottom: "conv13/dw"
 858 |   top: "conv13/dw"
 859 | }
 860 | layer {
 861 |   name: "conv13"
 862 |   type: "Convolution"
 863 |   bottom: "conv13/dw"
 864 |   top: "conv13"
 865 |   param {
 866 |     lr_mult: 1.0
 867 |     decay_mult: 1.0
 868 |   }
 869 |   param {
 870 |     lr_mult: 2.0
 871 |     decay_mult: 0.0
 872 |   }
 873 |   convolution_param {
 874 |     num_output: 1024
 875 |     kernel_size: 1
 876 |     weight_filler {
 877 |       type: "msra"
 878 |     }
 879 |     bias_filler {
 880 |       type: "constant"
 881 |       value: 0.0
 882 |     }
 883 |   }
 884 | }
 885 | layer {
 886 |   name: "conv13/relu"
 887 |   type: "ReLU"
 888 |   bottom: "conv13"
 889 |   top: "conv13"
 890 | }
 891 | layer {
 892 |   name: "conv14_1"
 893 |   type: "Convolution"
 894 |   bottom: "conv13"
 895 |   top: "conv14_1"
 896 |   param {
 897 |     lr_mult: 1.0
 898 |     decay_mult: 1.0
 899 |   }
 900 |   param {
 901 |     lr_mult: 2.0
 902 |     decay_mult: 0.0
 903 |   }
 904 |   convolution_param {
 905 |     num_output: 256
 906 |     kernel_size: 1
 907 |     weight_filler {
 908 |       type: "msra"
 909 |     }
 910 |     bias_filler {
 911 |       type: "constant"
 912 |       value: 0.0
 913 |     }
 914 |   }
 915 | }
 916 | layer {
 917 |   name: "conv14_1/relu"
 918 |   type: "ReLU"
 919 |   bottom: "conv14_1"
 920 |   top: "conv14_1"
 921 | }
 922 | layer {
 923 |   name: "conv14_2"
 924 |   type: "Convolution"
 925 |   bottom: "conv14_1"
 926 |   top: "conv14_2"
 927 |   param {
 928 |     lr_mult: 1.0
 929 |     decay_mult: 1.0
 930 |   }
 931 |   param {
 932 |     lr_mult: 2.0
 933 |     decay_mult: 0.0
 934 |   }
 935 |   convolution_param {
 936 |     num_output: 512
 937 |     pad: 1
 938 |     kernel_size: 3
 939 |     stride: 2
 940 |     weight_filler {
 941 |       type: "msra"
 942 |     }
 943 |     bias_filler {
 944 |       type: "constant"
 945 |       value: 0.0
 946 |     }
 947 |   }
 948 | }
 949 | layer {
 950 |   name: "conv14_2/relu"
 951 |   type: "ReLU"
 952 |   bottom: "conv14_2"
 953 |   top: "conv14_2"
 954 | }
 955 | layer {
 956 |   name: "conv15_1"
 957 |   type: "Convolution"
 958 |   bottom: "conv14_2"
 959 |   top: "conv15_1"
 960 |   param {
 961 |     lr_mult: 1.0
 962 |     decay_mult: 1.0
 963 |   }
 964 |   param {
 965 |     lr_mult: 2.0
 966 |     decay_mult: 0.0
 967 |   }
 968 |   convolution_param {
 969 |     num_output: 128
 970 |     kernel_size: 1
 971 |     weight_filler {
 972 |       type: "msra"
 973 |     }
 974 |     bias_filler {
 975 |       type: "constant"
 976 |       value: 0.0
 977 |     }
 978 |   }
 979 | }
 980 | layer {
 981 |   name: "conv15_1/relu"
 982 |   type: "ReLU"
 983 |   bottom: "conv15_1"
 984 |   top: "conv15_1"
 985 | }
 986 | layer {
 987 |   name: "conv15_2"
 988 |   type: "Convolution"
 989 |   bottom: "conv15_1"
 990 |   top: "conv15_2"
 991 |   param {
 992 |     lr_mult: 1.0
 993 |     decay_mult: 1.0
 994 |   }
 995 |   param {
 996 |     lr_mult: 2.0
 997 |     decay_mult: 0.0
 998 |   }
 999 |   convolution_param {
1000 |     num_output: 256
1001 |     pad: 1
1002 |     kernel_size: 3
1003 |     stride: 2
1004 |     weight_filler {
1005 |       type: "msra"
1006 |     }
1007 |     bias_filler {
1008 |       type: "constant"
1009 |       value: 0.0
1010 |     }
1011 |   }
1012 | }
1013 | layer {
1014 |   name: "conv15_2/relu"
1015 |   type: "ReLU"
1016 |   bottom: "conv15_2"
1017 |   top: "conv15_2"
1018 | }
1019 | layer {
1020 |   name: "conv16_1"
1021 |   type: "Convolution"
1022 |   bottom: "conv15_2"
1023 |   top: "conv16_1"
1024 |   param {
1025 |     lr_mult: 1.0
1026 |     decay_mult: 1.0
1027 |   }
1028 |   param {
1029 |     lr_mult: 2.0
1030 |     decay_mult: 0.0
1031 |   }
1032 |   convolution_param {
1033 |     num_output: 128
1034 |     kernel_size: 1
1035 |     weight_filler {
1036 |       type: "msra"
1037 |     }
1038 |     bias_filler {
1039 |       type: "constant"
1040 |       value: 0.0
1041 |     }
1042 |   }
1043 | }
1044 | layer {
1045 |   name: "conv16_1/relu"
1046 |   type: "ReLU"
1047 |   bottom: "conv16_1"
1048 |   top: "conv16_1"
1049 | }
1050 | layer {
1051 |   name: "conv16_2"
1052 |   type: "Convolution"
1053 |   bottom: "conv16_1"
1054 |   top: "conv16_2"
1055 |   param {
1056 |     lr_mult: 1.0
1057 |     decay_mult: 1.0
1058 |   }
1059 |   param {
1060 |     lr_mult: 2.0
1061 |     decay_mult: 0.0
1062 |   }
1063 |   convolution_param {
1064 |     num_output: 256
1065 |     pad: 1
1066 |     kernel_size: 3
1067 |     stride: 2
1068 |     weight_filler {
1069 |       type: "msra"
1070 |     }
1071 |     bias_filler {
1072 |       type: "constant"
1073 |       value: 0.0
1074 |     }
1075 |   }
1076 | }
1077 | layer {
1078 |   name: "conv16_2/relu"
1079 |   type: "ReLU"
1080 |   bottom: "conv16_2"
1081 |   top: "conv16_2"
1082 | }
1083 | layer {
1084 |   name: "conv17_1"
1085 |   type: "Convolution"
1086 |   bottom: "conv16_2"
1087 |   top: "conv17_1"
1088 |   param {
1089 |     lr_mult: 1.0
1090 |     decay_mult: 1.0
1091 |   }
1092 |   param {
1093 |     lr_mult: 2.0
1094 |     decay_mult: 0.0
1095 |   }
1096 |   convolution_param {
1097 |     num_output: 64
1098 |     kernel_size: 1
1099 |     weight_filler {
1100 |       type: "msra"
1101 |     }
1102 |     bias_filler {
1103 |       type: "constant"
1104 |       value: 0.0
1105 |     }
1106 |   }
1107 | }
1108 | layer {
1109 |   name: "conv17_1/relu"
1110 |   type: "ReLU"
1111 |   bottom: "conv17_1"
1112 |   top: "conv17_1"
1113 | }
1114 | layer {
1115 |   name: "conv17_2"
1116 |   type: "Convolution"
1117 |   bottom: "conv17_1"
1118 |   top: "conv17_2"
1119 |   param {
1120 |     lr_mult: 1.0
1121 |     decay_mult: 1.0
1122 |   }
1123 |   param {
1124 |     lr_mult: 2.0
1125 |     decay_mult: 0.0
1126 |   }
1127 |   convolution_param {
1128 |     num_output: 128
1129 |     pad: 1
1130 |     kernel_size: 3
1131 |     stride: 2
1132 |     weight_filler {
1133 |       type: "msra"
1134 |     }
1135 |     bias_filler {
1136 |       type: "constant"
1137 |       value: 0.0
1138 |     }
1139 |   }
1140 | }
1141 | layer {
1142 |   name: "conv17_2/relu"
1143 |   type: "ReLU"
1144 |   bottom: "conv17_2"
1145 |   top: "conv17_2"
1146 | }
1147 | layer {
1148 |   name: "conv11_mbox_loc"
1149 |   type: "Convolution"
1150 |   bottom: "conv11"
1151 |   top: "conv11_mbox_loc"
1152 |   param {
1153 |     lr_mult: 1.0
1154 |     decay_mult: 1.0
1155 |   }
1156 |   param {
1157 |     lr_mult: 2.0
1158 |     decay_mult: 0.0
1159 |   }
1160 |   convolution_param {
1161 |     num_output: 12
1162 |     kernel_size: 1
1163 |     weight_filler {
1164 |       type: "msra"
1165 |     }
1166 |     bias_filler {
1167 |       type: "constant"
1168 |       value: 0.0
1169 |     }
1170 |   }
1171 | }
1172 | layer {
1173 |   name: "conv11_mbox_loc_perm"
1174 |   type: "Permute"
1175 |   bottom: "conv11_mbox_loc"
1176 |   top: "conv11_mbox_loc_perm"
1177 |   permute_param {
1178 |     order: 0
1179 |     order: 2
1180 |     order: 3
1181 |     order: 1
1182 |   }
1183 | }
1184 | layer {
1185 |   name: "conv11_mbox_loc_flat"
1186 |   type: "Flatten"
1187 |   bottom: "conv11_mbox_loc_perm"
1188 |   top: "conv11_mbox_loc_flat"
1189 |   flatten_param {
1190 |     axis: 1
1191 |   }
1192 | }
1193 | layer {
1194 |   name: "conv11_mbox_conf"
1195 |   type: "Convolution"
1196 |   bottom: "conv11"
1197 |   top: "conv11_mbox_conf"
1198 |   param {
1199 |     lr_mult: 1.0
1200 |     decay_mult: 1.0
1201 |   }
1202 |   param {
1203 |     lr_mult: 2.0
1204 |     decay_mult: 0.0
1205 |   }
1206 |   convolution_param {
1207 |     num_output: 18
1208 |     kernel_size: 1
1209 |     weight_filler {
1210 |       type: "msra"
1211 |     }
1212 |     bias_filler {
1213 |       type: "constant"
1214 |       value: 0.0
1215 |     }
1216 |   }
1217 | }
1218 | layer {
1219 |   name: "conv11_mbox_conf_perm"
1220 |   type: "Permute"
1221 |   bottom: "conv11_mbox_conf"
1222 |   top: "conv11_mbox_conf_perm"
1223 |   permute_param {
1224 |     order: 0
1225 |     order: 2
1226 |     order: 3
1227 |     order: 1
1228 |   }
1229 | }
1230 | layer {
1231 |   name: "conv11_mbox_conf_flat"
1232 |   type: "Flatten"
1233 |   bottom: "conv11_mbox_conf_perm"
1234 |   top: "conv11_mbox_conf_flat"
1235 |   flatten_param {
1236 |     axis: 1
1237 |   }
1238 | }
1239 | layer {
1240 |   name: "conv11_mbox_priorbox"
1241 |   type: "PriorBox"
1242 |   bottom: "conv11"
1243 |   bottom: "data"
1244 |   top: "conv11_mbox_priorbox"
1245 |   prior_box_param {
1246 |     min_size: 60.0
1247 |     aspect_ratio: 2.0
1248 |     flip: true
1249 |     clip: false
1250 |     variance: 0.1
1251 |     variance: 0.1
1252 |     variance: 0.2
1253 |     variance: 0.2
1254 |     offset: 0.5
1255 |   }
1256 | }
1257 | layer {
1258 |   name: "conv13_mbox_loc"
1259 |   type: "Convolution"
1260 |   bottom: "conv13"
1261 |   top: "conv13_mbox_loc"
1262 |   param {
1263 |     lr_mult: 1.0
1264 |     decay_mult: 1.0
1265 |   }
1266 |   param {
1267 |     lr_mult: 2.0
1268 |     decay_mult: 0.0
1269 |   }
1270 |   convolution_param {
1271 |     num_output: 24
1272 |     kernel_size: 1
1273 |     weight_filler {
1274 |       type: "msra"
1275 |     }
1276 |     bias_filler {
1277 |       type: "constant"
1278 |       value: 0.0
1279 |     }
1280 |   }
1281 | }
1282 | layer {
1283 |   name: "conv13_mbox_loc_perm"
1284 |   type: "Permute"
1285 |   bottom: "conv13_mbox_loc"
1286 |   top: "conv13_mbox_loc_perm"
1287 |   permute_param {
1288 |     order: 0
1289 |     order: 2
1290 |     order: 3
1291 |     order: 1
1292 |   }
1293 | }
1294 | layer {
1295 |   name: "conv13_mbox_loc_flat"
1296 |   type: "Flatten"
1297 |   bottom: "conv13_mbox_loc_perm"
1298 |   top: "conv13_mbox_loc_flat"
1299 |   flatten_param {
1300 |     axis: 1
1301 |   }
1302 | }
1303 | layer {
1304 |   name: "conv13_mbox_conf"
1305 |   type: "Convolution"
1306 |   bottom: "conv13"
1307 |   top: "conv13_mbox_conf"
1308 |   param {
1309 |     lr_mult: 1.0
1310 |     decay_mult: 1.0
1311 |   }
1312 |   param {
1313 |     lr_mult: 2.0
1314 |     decay_mult: 0.0
1315 |   }
1316 |   convolution_param {
1317 |     num_output: 36
1318 |     kernel_size: 1
1319 |     weight_filler {
1320 |       type: "msra"
1321 |     }
1322 |     bias_filler {
1323 |       type: "constant"
1324 |       value: 0.0
1325 |     }
1326 |   }
1327 | }
1328 | layer {
1329 |   name: "conv13_mbox_conf_perm"
1330 |   type: "Permute"
1331 |   bottom: "conv13_mbox_conf"
1332 |   top: "conv13_mbox_conf_perm"
1333 |   permute_param {
1334 |     order: 0
1335 |     order: 2
1336 |     order: 3
1337 |     order: 1
1338 |   }
1339 | }
1340 | layer {
1341 |   name: "conv13_mbox_conf_flat"
1342 |   type: "Flatten"
1343 |   bottom: "conv13_mbox_conf_perm"
1344 |   top: "conv13_mbox_conf_flat"
1345 |   flatten_param {
1346 |     axis: 1
1347 |   }
1348 | }
1349 | layer {
1350 |   name: "conv13_mbox_priorbox"
1351 |   type: "PriorBox"
1352 |   bottom: "conv13"
1353 |   bottom: "data"
1354 |   top: "conv13_mbox_priorbox"
1355 |   prior_box_param {
1356 |     min_size: 105.0
1357 |     max_size: 150.0
1358 |     aspect_ratio: 2.0
1359 |     aspect_ratio: 3.0
1360 |     flip: true
1361 |     clip: false
1362 |     variance: 0.1
1363 |     variance: 0.1
1364 |     variance: 0.2
1365 |     variance: 0.2
1366 |     offset: 0.5
1367 |   }
1368 | }
1369 | layer {
1370 |   name: "conv14_2_mbox_loc"
1371 |   type: "Convolution"
1372 |   bottom: "conv14_2"
1373 |   top: "conv14_2_mbox_loc"
1374 |   param {
1375 |     lr_mult: 1.0
1376 |     decay_mult: 1.0
1377 |   }
1378 |   param {
1379 |     lr_mult: 2.0
1380 |     decay_mult: 0.0
1381 |   }
1382 |   convolution_param {
1383 |     num_output: 24
1384 |     kernel_size: 1
1385 |     weight_filler {
1386 |       type: "msra"
1387 |     }
1388 |     bias_filler {
1389 |       type: "constant"
1390 |       value: 0.0
1391 |     }
1392 |   }
1393 | }
1394 | layer {
1395 |   name: "conv14_2_mbox_loc_perm"
1396 |   type: "Permute"
1397 |   bottom: "conv14_2_mbox_loc"
1398 |   top: "conv14_2_mbox_loc_perm"
1399 |   permute_param {
1400 |     order: 0
1401 |     order: 2
1402 |     order: 3
1403 |     order: 1
1404 |   }
1405 | }
1406 | layer {
1407 |   name: "conv14_2_mbox_loc_flat"
1408 |   type: "Flatten"
1409 |   bottom: "conv14_2_mbox_loc_perm"
1410 |   top: "conv14_2_mbox_loc_flat"
1411 |   flatten_param {
1412 |     axis: 1
1413 |   }
1414 | }
1415 | layer {
1416 |   name: "conv14_2_mbox_conf"
1417 |   type: "Convolution"
1418 |   bottom: "conv14_2"
1419 |   top: "conv14_2_mbox_conf"
1420 |   param {
1421 |     lr_mult: 1.0
1422 |     decay_mult: 1.0
1423 |   }
1424 |   param {
1425 |     lr_mult: 2.0
1426 |     decay_mult: 0.0
1427 |   }
1428 |   convolution_param {
1429 |     num_output: 36
1430 |     kernel_size: 1
1431 |     weight_filler {
1432 |       type: "msra"
1433 |     }
1434 |     bias_filler {
1435 |       type: "constant"
1436 |       value: 0.0
1437 |     }
1438 |   }
1439 | }
1440 | layer {
1441 |   name: "conv14_2_mbox_conf_perm"
1442 |   type: "Permute"
1443 |   bottom: "conv14_2_mbox_conf"
1444 |   top: "conv14_2_mbox_conf_perm"
1445 |   permute_param {
1446 |     order: 0
1447 |     order: 2
1448 |     order: 3
1449 |     order: 1
1450 |   }
1451 | }
1452 | layer {
1453 |   name: "conv14_2_mbox_conf_flat"
1454 |   type: "Flatten"
1455 |   bottom: "conv14_2_mbox_conf_perm"
1456 |   top: "conv14_2_mbox_conf_flat"
1457 |   flatten_param {
1458 |     axis: 1
1459 |   }
1460 | }
1461 | layer {
1462 |   name: "conv14_2_mbox_priorbox"
1463 |   type: "PriorBox"
1464 |   bottom: "conv14_2"
1465 |   bottom: "data"
1466 |   top: "conv14_2_mbox_priorbox"
1467 |   prior_box_param {
1468 |     min_size: 150.0
1469 |     max_size: 195.0
1470 |     aspect_ratio: 2.0
1471 |     aspect_ratio: 3.0
1472 |     flip: true
1473 |     clip: false
1474 |     variance: 0.1
1475 |     variance: 0.1
1476 |     variance: 0.2
1477 |     variance: 0.2
1478 |     offset: 0.5
1479 |   }
1480 | }
1481 | layer {
1482 |   name: "conv15_2_mbox_loc"
1483 |   type: "Convolution"
1484 |   bottom: "conv15_2"
1485 |   top: "conv15_2_mbox_loc"
1486 |   param {
1487 |     lr_mult: 1.0
1488 |     decay_mult: 1.0
1489 |   }
1490 |   param {
1491 |     lr_mult: 2.0
1492 |     decay_mult: 0.0
1493 |   }
1494 |   convolution_param {
1495 |     num_output: 24
1496 |     kernel_size: 1
1497 |     weight_filler {
1498 |       type: "msra"
1499 |     }
1500 |     bias_filler {
1501 |       type: "constant"
1502 |       value: 0.0
1503 |     }
1504 |   }
1505 | }
1506 | layer {
1507 |   name: "conv15_2_mbox_loc_perm"
1508 |   type: "Permute"
1509 |   bottom: "conv15_2_mbox_loc"
1510 |   top: "conv15_2_mbox_loc_perm"
1511 |   permute_param {
1512 |     order: 0
1513 |     order: 2
1514 |     order: 3
1515 |     order: 1
1516 |   }
1517 | }
1518 | layer {
1519 |   name: "conv15_2_mbox_loc_flat"
1520 |   type: "Flatten"
1521 |   bottom: "conv15_2_mbox_loc_perm"
1522 |   top: "conv15_2_mbox_loc_flat"
1523 |   flatten_param {
1524 |     axis: 1
1525 |   }
1526 | }
1527 | layer {
1528 |   name: "conv15_2_mbox_conf"
1529 |   type: "Convolution"
1530 |   bottom: "conv15_2"
1531 |   top: "conv15_2_mbox_conf"
1532 |   param {
1533 |     lr_mult: 1.0
1534 |     decay_mult: 1.0
1535 |   }
1536 |   param {
1537 |     lr_mult: 2.0
1538 |     decay_mult: 0.0
1539 |   }
1540 |   convolution_param {
1541 |     num_output: 36
1542 |     kernel_size: 1
1543 |     weight_filler {
1544 |       type: "msra"
1545 |     }
1546 |     bias_filler {
1547 |       type: "constant"
1548 |       value: 0.0
1549 |     }
1550 |   }
1551 | }
1552 | layer {
1553 |   name: "conv15_2_mbox_conf_perm"
1554 |   type: "Permute"
1555 |   bottom: "conv15_2_mbox_conf"
1556 |   top: "conv15_2_mbox_conf_perm"
1557 |   permute_param {
1558 |     order: 0
1559 |     order: 2
1560 |     order: 3
1561 |     order: 1
1562 |   }
1563 | }
1564 | layer {
1565 |   name: "conv15_2_mbox_conf_flat"
1566 |   type: "Flatten"
1567 |   bottom: "conv15_2_mbox_conf_perm"
1568 |   top: "conv15_2_mbox_conf_flat"
1569 |   flatten_param {
1570 |     axis: 1
1571 |   }
1572 | }
1573 | layer {
1574 |   name: "conv15_2_mbox_priorbox"
1575 |   type: "PriorBox"
1576 |   bottom: "conv15_2"
1577 |   bottom: "data"
1578 |   top: "conv15_2_mbox_priorbox"
1579 |   prior_box_param {
1580 |     min_size: 195.0
1581 |     max_size: 240.0
1582 |     aspect_ratio: 2.0
1583 |     aspect_ratio: 3.0
1584 |     flip: true
1585 |     clip: false
1586 |     variance: 0.1
1587 |     variance: 0.1
1588 |     variance: 0.2
1589 |     variance: 0.2
1590 |     offset: 0.5
1591 |   }
1592 | }
1593 | layer {
1594 |   name: "conv16_2_mbox_loc"
1595 |   type: "Convolution"
1596 |   bottom: "conv16_2"
1597 |   top: "conv16_2_mbox_loc"
1598 |   param {
1599 |     lr_mult: 1.0
1600 |     decay_mult: 1.0
1601 |   }
1602 |   param {
1603 |     lr_mult: 2.0
1604 |     decay_mult: 0.0
1605 |   }
1606 |   convolution_param {
1607 |     num_output: 24
1608 |     kernel_size: 1
1609 |     weight_filler {
1610 |       type: "msra"
1611 |     }
1612 |     bias_filler {
1613 |       type: "constant"
1614 |       value: 0.0
1615 |     }
1616 |   }
1617 | }
1618 | layer {
1619 |   name: "conv16_2_mbox_loc_perm"
1620 |   type: "Permute"
1621 |   bottom: "conv16_2_mbox_loc"
1622 |   top: "conv16_2_mbox_loc_perm"
1623 |   permute_param {
1624 |     order: 0
1625 |     order: 2
1626 |     order: 3
1627 |     order: 1
1628 |   }
1629 | }
1630 | layer {
1631 |   name: "conv16_2_mbox_loc_flat"
1632 |   type: "Flatten"
1633 |   bottom: "conv16_2_mbox_loc_perm"
1634 |   top: "conv16_2_mbox_loc_flat"
1635 |   flatten_param {
1636 |     axis: 1
1637 |   }
1638 | }
1639 | layer {
1640 |   name: "conv16_2_mbox_conf"
1641 |   type: "Convolution"
1642 |   bottom: "conv16_2"
1643 |   top: "conv16_2_mbox_conf"
1644 |   param {
1645 |     lr_mult: 1.0
1646 |     decay_mult: 1.0
1647 |   }
1648 |   param {
1649 |     lr_mult: 2.0
1650 |     decay_mult: 0.0
1651 |   }
1652 |   convolution_param {
1653 |     num_output: 36
1654 |     kernel_size: 1
1655 |     weight_filler {
1656 |       type: "msra"
1657 |     }
1658 |     bias_filler {
1659 |       type: "constant"
1660 |       value: 0.0
1661 |     }
1662 |   }
1663 | }
1664 | layer {
1665 |   name: "conv16_2_mbox_conf_perm"
1666 |   type: "Permute"
1667 |   bottom: "conv16_2_mbox_conf"
1668 |   top: "conv16_2_mbox_conf_perm"
1669 |   permute_param {
1670 |     order: 0
1671 |     order: 2
1672 |     order: 3
1673 |     order: 1
1674 |   }
1675 | }
1676 | layer {
1677 |   name: "conv16_2_mbox_conf_flat"
1678 |   type: "Flatten"
1679 |   bottom: "conv16_2_mbox_conf_perm"
1680 |   top: "conv16_2_mbox_conf_flat"
1681 |   flatten_param {
1682 |     axis: 1
1683 |   }
1684 | }
1685 | layer {
1686 |   name: "conv16_2_mbox_priorbox"
1687 |   type: "PriorBox"
1688 |   bottom: "conv16_2"
1689 |   bottom: "data"
1690 |   top: "conv16_2_mbox_priorbox"
1691 |   prior_box_param {
1692 |     min_size: 240.0
1693 |     max_size: 285.0
1694 |     aspect_ratio: 2.0
1695 |     aspect_ratio: 3.0
1696 |     flip: true
1697 |     clip: false
1698 |     variance: 0.1
1699 |     variance: 0.1
1700 |     variance: 0.2
1701 |     variance: 0.2
1702 |     offset: 0.5
1703 |   }
1704 | }
1705 | layer {
1706 |   name: "conv17_2_mbox_loc"
1707 |   type: "Convolution"
1708 |   bottom: "conv17_2"
1709 |   top: "conv17_2_mbox_loc"
1710 |   param {
1711 |     lr_mult: 1.0
1712 |     decay_mult: 1.0
1713 |   }
1714 |   param {
1715 |     lr_mult: 2.0
1716 |     decay_mult: 0.0
1717 |   }
1718 |   convolution_param {
1719 |     num_output: 24
1720 |     kernel_size: 1
1721 |     weight_filler {
1722 |       type: "msra"
1723 |     }
1724 |     bias_filler {
1725 |       type: "constant"
1726 |       value: 0.0
1727 |     }
1728 |   }
1729 | }
1730 | layer {
1731 |   name: "conv17_2_mbox_loc_perm"
1732 |   type: "Permute"
1733 |   bottom: "conv17_2_mbox_loc"
1734 |   top: "conv17_2_mbox_loc_perm"
1735 |   permute_param {
1736 |     order: 0
1737 |     order: 2
1738 |     order: 3
1739 |     order: 1
1740 |   }
1741 | }
1742 | layer {
1743 |   name: "conv17_2_mbox_loc_flat"
1744 |   type: "Flatten"
1745 |   bottom: "conv17_2_mbox_loc_perm"
1746 |   top: "conv17_2_mbox_loc_flat"
1747 |   flatten_param {
1748 |     axis: 1
1749 |   }
1750 | }
1751 | layer {
1752 |   name: "conv17_2_mbox_conf"
1753 |   type: "Convolution"
1754 |   bottom: "conv17_2"
1755 |   top: "conv17_2_mbox_conf"
1756 |   param {
1757 |     lr_mult: 1.0
1758 |     decay_mult: 1.0
1759 |   }
1760 |   param {
1761 |     lr_mult: 2.0
1762 |     decay_mult: 0.0
1763 |   }
1764 |   convolution_param {
1765 |     num_output: 36
1766 |     kernel_size: 1
1767 |     weight_filler {
1768 |       type: "msra"
1769 |     }
1770 |     bias_filler {
1771 |       type: "constant"
1772 |       value: 0.0
1773 |     }
1774 |   }
1775 | }
1776 | layer {
1777 |   name: "conv17_2_mbox_conf_perm"
1778 |   type: "Permute"
1779 |   bottom: "conv17_2_mbox_conf"
1780 |   top: "conv17_2_mbox_conf_perm"
1781 |   permute_param {
1782 |     order: 0
1783 |     order: 2
1784 |     order: 3
1785 |     order: 1
1786 |   }
1787 | }
1788 | layer {
1789 |   name: "conv17_2_mbox_conf_flat"
1790 |   type: "Flatten"
1791 |   bottom: "conv17_2_mbox_conf_perm"
1792 |   top: "conv17_2_mbox_conf_flat"
1793 |   flatten_param {
1794 |     axis: 1
1795 |   }
1796 | }
1797 | layer {
1798 |   name: "conv17_2_mbox_priorbox"
1799 |   type: "PriorBox"
1800 |   bottom: "conv17_2"
1801 |   bottom: "data"
1802 |   top: "conv17_2_mbox_priorbox"
1803 |   prior_box_param {
1804 |     min_size: 285.0
1805 |     max_size: 300.0
1806 |     aspect_ratio: 2.0
1807 |     aspect_ratio: 3.0
1808 |     flip: true
1809 |     clip: false
1810 |     variance: 0.1
1811 |     variance: 0.1
1812 |     variance: 0.2
1813 |     variance: 0.2
1814 |     offset: 0.5
1815 |   }
1816 | }
1817 | layer {
1818 |   name: "mbox_loc"
1819 |   type: "Concat"
1820 |   bottom: "conv11_mbox_loc_flat"
1821 |   bottom: "conv13_mbox_loc_flat"
1822 |   bottom: "conv14_2_mbox_loc_flat"
1823 |   bottom: "conv15_2_mbox_loc_flat"
1824 |   bottom: "conv16_2_mbox_loc_flat"
1825 |   bottom: "conv17_2_mbox_loc_flat"
1826 |   top: "mbox_loc"
1827 |   concat_param {
1828 |     axis: 1
1829 |   }
1830 | }
1831 | layer {
1832 |   name: "mbox_conf"
1833 |   type: "Concat"
1834 |   bottom: "conv11_mbox_conf_flat"
1835 |   bottom: "conv13_mbox_conf_flat"
1836 |   bottom: "conv14_2_mbox_conf_flat"
1837 |   bottom: "conv15_2_mbox_conf_flat"
1838 |   bottom: "conv16_2_mbox_conf_flat"
1839 |   bottom: "conv17_2_mbox_conf_flat"
1840 |   top: "mbox_conf"
1841 |   concat_param {
1842 |     axis: 1
1843 |   }
1844 | }
1845 | layer {
1846 |   name: "mbox_priorbox"
1847 |   type: "Concat"
1848 |   bottom: "conv11_mbox_priorbox"
1849 |   bottom: "conv13_mbox_priorbox"
1850 |   bottom: "conv14_2_mbox_priorbox"
1851 |   bottom: "conv15_2_mbox_priorbox"
1852 |   bottom: "conv16_2_mbox_priorbox"
1853 |   bottom: "conv17_2_mbox_priorbox"
1854 |   top: "mbox_priorbox"
1855 |   concat_param {
1856 |     axis: 2
1857 |   }
1858 | }
1859 | layer {
1860 |   name: "mbox_conf_reshape"
1861 |   type: "Reshape"
1862 |   bottom: "mbox_conf"
1863 |   top: "mbox_conf_reshape"
1864 |   reshape_param {
1865 |     shape {
1866 |       dim: 0
1867 |       dim: -1
1868 |       dim: 6
1869 |     }
1870 |   }
1871 | }
1872 | layer {
1873 |   name: "mbox_conf_softmax"
1874 |   type: "Softmax"
1875 |   bottom: "mbox_conf_reshape"
1876 |   top: "mbox_conf_softmax"
1877 |   softmax_param {
1878 |     axis: 2
1879 |   }
1880 | }
1881 | layer {
1882 |   name: "mbox_conf_flatten"
1883 |   type: "Flatten"
1884 |   bottom: "mbox_conf_softmax"
1885 |   top: "mbox_conf_flatten"
1886 |   flatten_param {
1887 |     axis: 1
1888 |   }
1889 | }
1890 | layer {
1891 |   name: "detection_out"
1892 |   type: "DetectionOutput"
1893 |   bottom: "mbox_loc"
1894 |   bottom: "mbox_conf_flatten"
1895 |   bottom: "mbox_priorbox"
1896 |   top: "detection_out"
1897 |   include {
1898 |     phase: TEST
1899 |   }
1900 |   detection_output_param {
1901 |     num_classes: 6
1902 |     share_location: true
1903 |     background_label_id: 0
1904 |     nms_param {
1905 |       nms_threshold: 0.45
1906 |       top_k: 100
1907 |     }
1908 |     code_type: CENTER_SIZE
1909 |     keep_top_k: 100
1910 |     confidence_threshold: 0.25
1911 |   }
1912 | }
1913 | 


--------------------------------------------------------------------------------
/SSD/MobileNet/MobileNetSSD_deploy2.prototxt:
--------------------------------------------------------------------------------
   1 | name: "MobileNet-SSD"
   2 | input: "data"
   3 | input_shape {
   4 |   dim: 1
   5 |   dim: 3
   6 |   dim: 300
   7 |   dim: 300
   8 | }
   9 | layer {
  10 |   name: "conv0"
  11 |   type: "Convolution"
  12 |   bottom: "data"
  13 |   top: "conv0"
  14 |   param {
  15 |     lr_mult: 1.0
  16 |     decay_mult: 1.0
  17 |   }
  18 |   param {
  19 |     lr_mult: 2.0
  20 |     decay_mult: 0.0
  21 |   }
  22 |   convolution_param {
  23 |     num_output: 32
  24 |     pad: 1
  25 |     kernel_size: 3
  26 |     stride: 2
  27 |     weight_filler {
  28 |       type: "msra"
  29 |     }
  30 |     bias_filler {
  31 |       type: "constant"
  32 |       value: 0.0
  33 |     }
  34 |   }
  35 | }
  36 | layer {
  37 |   name: "conv0/relu"
  38 |   type: "ReLU"
  39 |   bottom: "conv0"
  40 |   top: "conv0"
  41 | }
  42 | layer {
  43 |   name: "conv1/dw"
  44 |   type: "Convolution"
  45 |   bottom: "conv0"
  46 |   top: "conv1/dw"
  47 |   param {
  48 |     lr_mult: 1.0
  49 |     decay_mult: 1.0
  50 |   }
  51 |   param {
  52 |     lr_mult: 2.0
  53 |     decay_mult: 0.0
  54 |   }
  55 |   convolution_param {
  56 |     num_output: 32
  57 |     pad: 1
  58 |     kernel_size: 3
  59 |     group: 32
  60 |     engine: CAFFE
  61 |     weight_filler {
  62 |       type: "msra"
  63 |     }
  64 |     bias_filler {
  65 |       type: "constant"
  66 |       value: 0.0
  67 |     }
  68 |   }
  69 | }
  70 | layer {
  71 |   name: "conv1/dw/relu"
  72 |   type: "ReLU"
  73 |   bottom: "conv1/dw"
  74 |   top: "conv1/dw"
  75 | }
  76 | layer {
  77 |   name: "conv1"
  78 |   type: "Convolution"
  79 |   bottom: "conv1/dw"
  80 |   top: "conv1"
  81 |   param {
  82 |     lr_mult: 1.0
  83 |     decay_mult: 1.0
  84 |   }
  85 |   param {
  86 |     lr_mult: 2.0
  87 |     decay_mult: 0.0
  88 |   }
  89 |   convolution_param {
  90 |     num_output: 64
  91 |     kernel_size: 1
  92 |     weight_filler {
  93 |       type: "msra"
  94 |     }
  95 |     bias_filler {
  96 |       type: "constant"
  97 |       value: 0.0
  98 |     }
  99 |   }
 100 | }
 101 | layer {
 102 |   name: "conv1/relu"
 103 |   type: "ReLU"
 104 |   bottom: "conv1"
 105 |   top: "conv1"
 106 | }
 107 | layer {
 108 |   name: "conv2/dw"
 109 |   type: "Convolution"
 110 |   bottom: "conv1"
 111 |   top: "conv2/dw"
 112 |   param {
 113 |     lr_mult: 1.0
 114 |     decay_mult: 1.0
 115 |   }
 116 |   param {
 117 |     lr_mult: 2.0
 118 |     decay_mult: 0.0
 119 |   }
 120 |   convolution_param {
 121 |     num_output: 64
 122 |     pad: 1
 123 |     kernel_size: 3
 124 |     stride: 2
 125 |     group: 64
 126 |     engine: CAFFE
 127 |     weight_filler {
 128 |       type: "msra"
 129 |     }
 130 |     bias_filler {
 131 |       type: "constant"
 132 |       value: 0.0
 133 |     }
 134 |   }
 135 | }
 136 | layer {
 137 |   name: "conv2/dw/relu"
 138 |   type: "ReLU"
 139 |   bottom: "conv2/dw"
 140 |   top: "conv2/dw"
 141 | }
 142 | layer {
 143 |   name: "conv2"
 144 |   type: "Convolution"
 145 |   bottom: "conv2/dw"
 146 |   top: "conv2"
 147 |   param {
 148 |     lr_mult: 1.0
 149 |     decay_mult: 1.0
 150 |   }
 151 |   param {
 152 |     lr_mult: 2.0
 153 |     decay_mult: 0.0
 154 |   }
 155 |   convolution_param {
 156 |     num_output: 128
 157 |     kernel_size: 1
 158 |     weight_filler {
 159 |       type: "msra"
 160 |     }
 161 |     bias_filler {
 162 |       type: "constant"
 163 |       value: 0.0
 164 |     }
 165 |   }
 166 | }
 167 | layer {
 168 |   name: "conv2/relu"
 169 |   type: "ReLU"
 170 |   bottom: "conv2"
 171 |   top: "conv2"
 172 | }
 173 | layer {
 174 |   name: "conv3/dw"
 175 |   type: "Convolution"
 176 |   bottom: "conv2"
 177 |   top: "conv3/dw"
 178 |   param {
 179 |     lr_mult: 1.0
 180 |     decay_mult: 1.0
 181 |   }
 182 |   param {
 183 |     lr_mult: 2.0
 184 |     decay_mult: 0.0
 185 |   }
 186 |   convolution_param {
 187 |     num_output: 128
 188 |     pad: 1
 189 |     kernel_size: 3
 190 |     group: 128
 191 |     engine: CAFFE
 192 |     weight_filler {
 193 |       type: "msra"
 194 |     }
 195 |     bias_filler {
 196 |       type: "constant"
 197 |       value: 0.0
 198 |     }
 199 |   }
 200 | }
 201 | layer {
 202 |   name: "conv3/dw/relu"
 203 |   type: "ReLU"
 204 |   bottom: "conv3/dw"
 205 |   top: "conv3/dw"
 206 | }
 207 | layer {
 208 |   name: "conv3"
 209 |   type: "Convolution"
 210 |   bottom: "conv3/dw"
 211 |   top: "conv3"
 212 |   param {
 213 |     lr_mult: 1.0
 214 |     decay_mult: 1.0
 215 |   }
 216 |   param {
 217 |     lr_mult: 2.0
 218 |     decay_mult: 0.0
 219 |   }
 220 |   convolution_param {
 221 |     num_output: 128
 222 |     kernel_size: 1
 223 |     weight_filler {
 224 |       type: "msra"
 225 |     }
 226 |     bias_filler {
 227 |       type: "constant"
 228 |       value: 0.0
 229 |     }
 230 |   }
 231 | }
 232 | layer {
 233 |   name: "conv3/relu"
 234 |   type: "ReLU"
 235 |   bottom: "conv3"
 236 |   top: "conv3"
 237 | }
 238 | layer {
 239 |   name: "conv4/dw"
 240 |   type: "Convolution"
 241 |   bottom: "conv3"
 242 |   top: "conv4/dw"
 243 |   param {
 244 |     lr_mult: 1.0
 245 |     decay_mult: 1.0
 246 |   }
 247 |   param {
 248 |     lr_mult: 2.0
 249 |     decay_mult: 0.0
 250 |   }
 251 |   convolution_param {
 252 |     num_output: 128
 253 |     pad: 1
 254 |     kernel_size: 3
 255 |     stride: 2
 256 |     group: 128
 257 |     engine: CAFFE
 258 |     weight_filler {
 259 |       type: "msra"
 260 |     }
 261 |     bias_filler {
 262 |       type: "constant"
 263 |       value: 0.0
 264 |     }
 265 |   }
 266 | }
 267 | layer {
 268 |   name: "conv4/dw/relu"
 269 |   type: "ReLU"
 270 |   bottom: "conv4/dw"
 271 |   top: "conv4/dw"
 272 | }
 273 | layer {
 274 |   name: "conv4"
 275 |   type: "Convolution"
 276 |   bottom: "conv4/dw"
 277 |   top: "conv4"
 278 |   param {
 279 |     lr_mult: 1.0
 280 |     decay_mult: 1.0
 281 |   }
 282 |   param {
 283 |     lr_mult: 2.0
 284 |     decay_mult: 0.0
 285 |   }
 286 |   convolution_param {
 287 |     num_output: 256
 288 |     kernel_size: 1
 289 |     weight_filler {
 290 |       type: "msra"
 291 |     }
 292 |     bias_filler {
 293 |       type: "constant"
 294 |       value: 0.0
 295 |     }
 296 |   }
 297 | }
 298 | layer {
 299 |   name: "conv4/relu"
 300 |   type: "ReLU"
 301 |   bottom: "conv4"
 302 |   top: "conv4"
 303 | }
 304 | layer {
 305 |   name: "conv5/dw"
 306 |   type: "Convolution"
 307 |   bottom: "conv4"
 308 |   top: "conv5/dw"
 309 |   param {
 310 |     lr_mult: 1.0
 311 |     decay_mult: 1.0
 312 |   }
 313 |   param {
 314 |     lr_mult: 2.0
 315 |     decay_mult: 0.0
 316 |   }
 317 |   convolution_param {
 318 |     num_output: 256
 319 |     pad: 1
 320 |     kernel_size: 3
 321 |     group: 256
 322 |     engine: CAFFE
 323 |     weight_filler {
 324 |       type: "msra"
 325 |     }
 326 |     bias_filler {
 327 |       type: "constant"
 328 |       value: 0.0
 329 |     }
 330 |   }
 331 | }
 332 | layer {
 333 |   name: "conv5/dw/relu"
 334 |   type: "ReLU"
 335 |   bottom: "conv5/dw"
 336 |   top: "conv5/dw"
 337 | }
 338 | layer {
 339 |   name: "conv5"
 340 |   type: "Convolution"
 341 |   bottom: "conv5/dw"
 342 |   top: "conv5"
 343 |   param {
 344 |     lr_mult: 1.0
 345 |     decay_mult: 1.0
 346 |   }
 347 |   param {
 348 |     lr_mult: 2.0
 349 |     decay_mult: 0.0
 350 |   }
 351 |   convolution_param {
 352 |     num_output: 256
 353 |     kernel_size: 1
 354 |     weight_filler {
 355 |       type: "msra"
 356 |     }
 357 |     bias_filler {
 358 |       type: "constant"
 359 |       value: 0.0
 360 |     }
 361 |   }
 362 | }
 363 | layer {
 364 |   name: "conv5/relu"
 365 |   type: "ReLU"
 366 |   bottom: "conv5"
 367 |   top: "conv5"
 368 | }
 369 | layer {
 370 |   name: "conv6/dw"
 371 |   type: "Convolution"
 372 |   bottom: "conv5"
 373 |   top: "conv6/dw"
 374 |   param {
 375 |     lr_mult: 1.0
 376 |     decay_mult: 1.0
 377 |   }
 378 |   param {
 379 |     lr_mult: 2.0
 380 |     decay_mult: 0.0
 381 |   }
 382 |   convolution_param {
 383 |     num_output: 256
 384 |     pad: 1
 385 |     kernel_size: 3
 386 |     stride: 2
 387 |     group: 256
 388 |     engine: CAFFE
 389 |     weight_filler {
 390 |       type: "msra"
 391 |     }
 392 |     bias_filler {
 393 |       type: "constant"
 394 |       value: 0.0
 395 |     }
 396 |   }
 397 | }
 398 | layer {
 399 |   name: "conv6/dw/relu"
 400 |   type: "ReLU"
 401 |   bottom: "conv6/dw"
 402 |   top: "conv6/dw"
 403 | }
 404 | layer {
 405 |   name: "conv6"
 406 |   type: "Convolution"
 407 |   bottom: "conv6/dw"
 408 |   top: "conv6"
 409 |   param {
 410 |     lr_mult: 1.0
 411 |     decay_mult: 1.0
 412 |   }
 413 |   param {
 414 |     lr_mult: 2.0
 415 |     decay_mult: 0.0
 416 |   }
 417 |   convolution_param {
 418 |     num_output: 512
 419 |     kernel_size: 1
 420 |     weight_filler {
 421 |       type: "msra"
 422 |     }
 423 |     bias_filler {
 424 |       type: "constant"
 425 |       value: 0.0
 426 |     }
 427 |   }
 428 | }
 429 | layer {
 430 |   name: "conv6/relu"
 431 |   type: "ReLU"
 432 |   bottom: "conv6"
 433 |   top: "conv6"
 434 | }
 435 | layer {
 436 |   name: "conv7/dw"
 437 |   type: "Convolution"
 438 |   bottom: "conv6"
 439 |   top: "conv7/dw"
 440 |   param {
 441 |     lr_mult: 1.0
 442 |     decay_mult: 1.0
 443 |   }
 444 |   param {
 445 |     lr_mult: 2.0
 446 |     decay_mult: 0.0
 447 |   }
 448 |   convolution_param {
 449 |     num_output: 512
 450 |     pad: 1
 451 |     kernel_size: 3
 452 |     group: 512
 453 |     engine: CAFFE
 454 |     weight_filler {
 455 |       type: "msra"
 456 |     }
 457 |     bias_filler {
 458 |       type: "constant"
 459 |       value: 0.0
 460 |     }
 461 |   }
 462 | }
 463 | layer {
 464 |   name: "conv7/dw/relu"
 465 |   type: "ReLU"
 466 |   bottom: "conv7/dw"
 467 |   top: "conv7/dw"
 468 | }
 469 | layer {
 470 |   name: "conv7"
 471 |   type: "Convolution"
 472 |   bottom: "conv7/dw"
 473 |   top: "conv7"
 474 |   param {
 475 |     lr_mult: 1.0
 476 |     decay_mult: 1.0
 477 |   }
 478 |   param {
 479 |     lr_mult: 2.0
 480 |     decay_mult: 0.0
 481 |   }
 482 |   convolution_param {
 483 |     num_output: 512
 484 |     kernel_size: 1
 485 |     weight_filler {
 486 |       type: "msra"
 487 |     }
 488 |     bias_filler {
 489 |       type: "constant"
 490 |       value: 0.0
 491 |     }
 492 |   }
 493 | }
 494 | layer {
 495 |   name: "conv7/relu"
 496 |   type: "ReLU"
 497 |   bottom: "conv7"
 498 |   top: "conv7"
 499 | }
 500 | layer {
 501 |   name: "conv8/dw"
 502 |   type: "Convolution"
 503 |   bottom: "conv7"
 504 |   top: "conv8/dw"
 505 |   param {
 506 |     lr_mult: 1.0
 507 |     decay_mult: 1.0
 508 |   }
 509 |   param {
 510 |     lr_mult: 2.0
 511 |     decay_mult: 0.0
 512 |   }
 513 |   convolution_param {
 514 |     num_output: 512
 515 |     pad: 1
 516 |     kernel_size: 3
 517 |     group: 512
 518 |     engine: CAFFE
 519 |     weight_filler {
 520 |       type: "msra"
 521 |     }
 522 |     bias_filler {
 523 |       type: "constant"
 524 |       value: 0.0
 525 |     }
 526 |   }
 527 | }
 528 | layer {
 529 |   name: "conv8/dw/relu"
 530 |   type: "ReLU"
 531 |   bottom: "conv8/dw"
 532 |   top: "conv8/dw"
 533 | }
 534 | layer {
 535 |   name: "conv8"
 536 |   type: "Convolution"
 537 |   bottom: "conv8/dw"
 538 |   top: "conv8"
 539 |   param {
 540 |     lr_mult: 1.0
 541 |     decay_mult: 1.0
 542 |   }
 543 |   param {
 544 |     lr_mult: 2.0
 545 |     decay_mult: 0.0
 546 |   }
 547 |   convolution_param {
 548 |     num_output: 512
 549 |     kernel_size: 1
 550 |     weight_filler {
 551 |       type: "msra"
 552 |     }
 553 |     bias_filler {
 554 |       type: "constant"
 555 |       value: 0.0
 556 |     }
 557 |   }
 558 | }
 559 | layer {
 560 |   name: "conv8/relu"
 561 |   type: "ReLU"
 562 |   bottom: "conv8"
 563 |   top: "conv8"
 564 | }
 565 | layer {
 566 |   name: "conv9/dw"
 567 |   type: "Convolution"
 568 |   bottom: "conv8"
 569 |   top: "conv9/dw"
 570 |   param {
 571 |     lr_mult: 1.0
 572 |     decay_mult: 1.0
 573 |   }
 574 |   param {
 575 |     lr_mult: 2.0
 576 |     decay_mult: 0.0
 577 |   }
 578 |   convolution_param {
 579 |     num_output: 512
 580 |     pad: 1
 581 |     kernel_size: 3
 582 |     group: 512
 583 |     engine: CAFFE
 584 |     weight_filler {
 585 |       type: "msra"
 586 |     }
 587 |     bias_filler {
 588 |       type: "constant"
 589 |       value: 0.0
 590 |     }
 591 |   }
 592 | }
 593 | layer {
 594 |   name: "conv9/dw/relu"
 595 |   type: "ReLU"
 596 |   bottom: "conv9/dw"
 597 |   top: "conv9/dw"
 598 | }
 599 | layer {
 600 |   name: "conv9"
 601 |   type: "Convolution"
 602 |   bottom: "conv9/dw"
 603 |   top: "conv9"
 604 |   param {
 605 |     lr_mult: 1.0
 606 |     decay_mult: 1.0
 607 |   }
 608 |   param {
 609 |     lr_mult: 2.0
 610 |     decay_mult: 0.0
 611 |   }
 612 |   convolution_param {
 613 |     num_output: 512
 614 |     kernel_size: 1
 615 |     weight_filler {
 616 |       type: "msra"
 617 |     }
 618 |     bias_filler {
 619 |       type: "constant"
 620 |       value: 0.0
 621 |     }
 622 |   }
 623 | }
 624 | layer {
 625 |   name: "conv9/relu"
 626 |   type: "ReLU"
 627 |   bottom: "conv9"
 628 |   top: "conv9"
 629 | }
 630 | layer {
 631 |   name: "conv10/dw"
 632 |   type: "Convolution"
 633 |   bottom: "conv9"
 634 |   top: "conv10/dw"
 635 |   param {
 636 |     lr_mult: 1.0
 637 |     decay_mult: 1.0
 638 |   }
 639 |   param {
 640 |     lr_mult: 2.0
 641 |     decay_mult: 0.0
 642 |   }
 643 |   convolution_param {
 644 |     num_output: 512
 645 |     pad: 1
 646 |     kernel_size: 3
 647 |     group: 512
 648 |     engine: CAFFE
 649 |     weight_filler {
 650 |       type: "msra"
 651 |     }
 652 |     bias_filler {
 653 |       type: "constant"
 654 |       value: 0.0
 655 |     }
 656 |   }
 657 | }
 658 | layer {
 659 |   name: "conv10/dw/relu"
 660 |   type: "ReLU"
 661 |   bottom: "conv10/dw"
 662 |   top: "conv10/dw"
 663 | }
 664 | layer {
 665 |   name: "conv10"
 666 |   type: "Convolution"
 667 |   bottom: "conv10/dw"
 668 |   top: "conv10"
 669 |   param {
 670 |     lr_mult: 1.0
 671 |     decay_mult: 1.0
 672 |   }
 673 |   param {
 674 |     lr_mult: 2.0
 675 |     decay_mult: 0.0
 676 |   }
 677 |   convolution_param {
 678 |     num_output: 512
 679 |     kernel_size: 1
 680 |     weight_filler {
 681 |       type: "msra"
 682 |     }
 683 |     bias_filler {
 684 |       type: "constant"
 685 |       value: 0.0
 686 |     }
 687 |   }
 688 | }
 689 | layer {
 690 |   name: "conv10/relu"
 691 |   type: "ReLU"
 692 |   bottom: "conv10"
 693 |   top: "conv10"
 694 | }
 695 | layer {
 696 |   name: "conv11/dw"
 697 |   type: "Convolution"
 698 |   bottom: "conv10"
 699 |   top: "conv11/dw"
 700 |   param {
 701 |     lr_mult: 1.0
 702 |     decay_mult: 1.0
 703 |   }
 704 |   param {
 705 |     lr_mult: 2.0
 706 |     decay_mult: 0.0
 707 |   }
 708 |   convolution_param {
 709 |     num_output: 512
 710 |     pad: 1
 711 |     kernel_size: 3
 712 |     group: 512
 713 |     engine: CAFFE
 714 |     weight_filler {
 715 |       type: "msra"
 716 |     }
 717 |     bias_filler {
 718 |       type: "constant"
 719 |       value: 0.0
 720 |     }
 721 |   }
 722 | }
 723 | layer {
 724 |   name: "conv11/dw/relu"
 725 |   type: "ReLU"
 726 |   bottom: "conv11/dw"
 727 |   top: "conv11/dw"
 728 | }
 729 | layer {
 730 |   name: "conv11"
 731 |   type: "Convolution"
 732 |   bottom: "conv11/dw"
 733 |   top: "conv11"
 734 |   param {
 735 |     lr_mult: 1.0
 736 |     decay_mult: 1.0
 737 |   }
 738 |   param {
 739 |     lr_mult: 2.0
 740 |     decay_mult: 0.0
 741 |   }
 742 |   convolution_param {
 743 |     num_output: 512
 744 |     kernel_size: 1
 745 |     weight_filler {
 746 |       type: "msra"
 747 |     }
 748 |     bias_filler {
 749 |       type: "constant"
 750 |       value: 0.0
 751 |     }
 752 |   }
 753 | }
 754 | layer {
 755 |   name: "conv11/relu"
 756 |   type: "ReLU"
 757 |   bottom: "conv11"
 758 |   top: "conv11"
 759 | }
 760 | layer {
 761 |   name: "conv12/dw"
 762 |   type: "Convolution"
 763 |   bottom: "conv11"
 764 |   top: "conv12/dw"
 765 |   param {
 766 |     lr_mult: 1.0
 767 |     decay_mult: 1.0
 768 |   }
 769 |   param {
 770 |     lr_mult: 2.0
 771 |     decay_mult: 0.0
 772 |   }
 773 |   convolution_param {
 774 |     num_output: 512
 775 |     pad: 1
 776 |     kernel_size: 3
 777 |     stride: 2
 778 |     group: 512
 779 |     engine: CAFFE
 780 |     weight_filler {
 781 |       type: "msra"
 782 |     }
 783 |     bias_filler {
 784 |       type: "constant"
 785 |       value: 0.0
 786 |     }
 787 |   }
 788 | }
 789 | layer {
 790 |   name: "conv12/dw/relu"
 791 |   type: "ReLU"
 792 |   bottom: "conv12/dw"
 793 |   top: "conv12/dw"
 794 | }
 795 | layer {
 796 |   name: "conv12"
 797 |   type: "Convolution"
 798 |   bottom: "conv12/dw"
 799 |   top: "conv12"
 800 |   param {
 801 |     lr_mult: 1.0
 802 |     decay_mult: 1.0
 803 |   }
 804 |   param {
 805 |     lr_mult: 2.0
 806 |     decay_mult: 0.0
 807 |   }
 808 |   convolution_param {
 809 |     num_output: 1024
 810 |     kernel_size: 1
 811 |     weight_filler {
 812 |       type: "msra"
 813 |     }
 814 |     bias_filler {
 815 |       type: "constant"
 816 |       value: 0.0
 817 |     }
 818 |   }
 819 | }
 820 | layer {
 821 |   name: "conv12/relu"
 822 |   type: "ReLU"
 823 |   bottom: "conv12"
 824 |   top: "conv12"
 825 | }
 826 | layer {
 827 |   name: "conv13/dw"
 828 |   type: "Convolution"
 829 |   bottom: "conv12"
 830 |   top: "conv13/dw"
 831 |   param {
 832 |     lr_mult: 1.0
 833 |     decay_mult: 1.0
 834 |   }
 835 |   param {
 836 |     lr_mult: 2.0
 837 |     decay_mult: 0.0
 838 |   }
 839 |   convolution_param {
 840 |     num_output: 1024
 841 |     pad: 1
 842 |     kernel_size: 3
 843 |     group: 1024
 844 |     engine: CAFFE
 845 |     weight_filler {
 846 |       type: "msra"
 847 |     }
 848 |     bias_filler {
 849 |       type: "constant"
 850 |       value: 0.0
 851 |     }
 852 |   }
 853 | }
 854 | layer {
 855 |   name: "conv13/dw/relu"
 856 |   type: "ReLU"
 857 |   bottom: "conv13/dw"
 858 |   top: "conv13/dw"
 859 | }
 860 | layer {
 861 |   name: "conv13"
 862 |   type: "Convolution"
 863 |   bottom: "conv13/dw"
 864 |   top: "conv13"
 865 |   param {
 866 |     lr_mult: 1.0
 867 |     decay_mult: 1.0
 868 |   }
 869 |   param {
 870 |     lr_mult: 2.0
 871 |     decay_mult: 0.0
 872 |   }
 873 |   convolution_param {
 874 |     num_output: 1024
 875 |     kernel_size: 1
 876 |     weight_filler {
 877 |       type: "msra"
 878 |     }
 879 |     bias_filler {
 880 |       type: "constant"
 881 |       value: 0.0
 882 |     }
 883 |   }
 884 | }
 885 | layer {
 886 |   name: "conv13/relu"
 887 |   type: "ReLU"
 888 |   bottom: "conv13"
 889 |   top: "conv13"
 890 | }
 891 | layer {
 892 |   name: "conv14_1"
 893 |   type: "Convolution"
 894 |   bottom: "conv13"
 895 |   top: "conv14_1"
 896 |   param {
 897 |     lr_mult: 1.0
 898 |     decay_mult: 1.0
 899 |   }
 900 |   param {
 901 |     lr_mult: 2.0
 902 |     decay_mult: 0.0
 903 |   }
 904 |   convolution_param {
 905 |     num_output: 256
 906 |     kernel_size: 1
 907 |     weight_filler {
 908 |       type: "msra"
 909 |     }
 910 |     bias_filler {
 911 |       type: "constant"
 912 |       value: 0.0
 913 |     }
 914 |   }
 915 | }
 916 | layer {
 917 |   name: "conv14_1/relu"
 918 |   type: "ReLU"
 919 |   bottom: "conv14_1"
 920 |   top: "conv14_1"
 921 | }
 922 | layer {
 923 |   name: "conv14_2"
 924 |   type: "Convolution"
 925 |   bottom: "conv14_1"
 926 |   top: "conv14_2"
 927 |   param {
 928 |     lr_mult: 1.0
 929 |     decay_mult: 1.0
 930 |   }
 931 |   param {
 932 |     lr_mult: 2.0
 933 |     decay_mult: 0.0
 934 |   }
 935 |   convolution_param {
 936 |     num_output: 512
 937 |     pad: 1
 938 |     kernel_size: 3
 939 |     stride: 2
 940 |     weight_filler {
 941 |       type: "msra"
 942 |     }
 943 |     bias_filler {
 944 |       type: "constant"
 945 |       value: 0.0
 946 |     }
 947 |   }
 948 | }
 949 | layer {
 950 |   name: "conv14_2/relu"
 951 |   type: "ReLU"
 952 |   bottom: "conv14_2"
 953 |   top: "conv14_2"
 954 | }
 955 | layer {
 956 |   name: "conv15_1"
 957 |   type: "Convolution"
 958 |   bottom: "conv14_2"
 959 |   top: "conv15_1"
 960 |   param {
 961 |     lr_mult: 1.0
 962 |     decay_mult: 1.0
 963 |   }
 964 |   param {
 965 |     lr_mult: 2.0
 966 |     decay_mult: 0.0
 967 |   }
 968 |   convolution_param {
 969 |     num_output: 128
 970 |     kernel_size: 1
 971 |     weight_filler {
 972 |       type: "msra"
 973 |     }
 974 |     bias_filler {
 975 |       type: "constant"
 976 |       value: 0.0
 977 |     }
 978 |   }
 979 | }
 980 | layer {
 981 |   name: "conv15_1/relu"
 982 |   type: "ReLU"
 983 |   bottom: "conv15_1"
 984 |   top: "conv15_1"
 985 | }
 986 | layer {
 987 |   name: "conv15_2"
 988 |   type: "Convolution"
 989 |   bottom: "conv15_1"
 990 |   top: "conv15_2"
 991 |   param {
 992 |     lr_mult: 1.0
 993 |     decay_mult: 1.0
 994 |   }
 995 |   param {
 996 |     lr_mult: 2.0
 997 |     decay_mult: 0.0
 998 |   }
 999 |   convolution_param {
1000 |     num_output: 256
1001 |     pad: 1
1002 |     kernel_size: 3
1003 |     stride: 2
1004 |     weight_filler {
1005 |       type: "msra"
1006 |     }
1007 |     bias_filler {
1008 |       type: "constant"
1009 |       value: 0.0
1010 |     }
1011 |   }
1012 | }
1013 | layer {
1014 |   name: "conv15_2/relu"
1015 |   type: "ReLU"
1016 |   bottom: "conv15_2"
1017 |   top: "conv15_2"
1018 | }
1019 | layer {
1020 |   name: "conv16_1"
1021 |   type: "Convolution"
1022 |   bottom: "conv15_2"
1023 |   top: "conv16_1"
1024 |   param {
1025 |     lr_mult: 1.0
1026 |     decay_mult: 1.0
1027 |   }
1028 |   param {
1029 |     lr_mult: 2.0
1030 |     decay_mult: 0.0
1031 |   }
1032 |   convolution_param {
1033 |     num_output: 128
1034 |     kernel_size: 1
1035 |     weight_filler {
1036 |       type: "msra"
1037 |     }
1038 |     bias_filler {
1039 |       type: "constant"
1040 |       value: 0.0
1041 |     }
1042 |   }
1043 | }
1044 | layer {
1045 |   name: "conv16_1/relu"
1046 |   type: "ReLU"
1047 |   bottom: "conv16_1"
1048 |   top: "conv16_1"
1049 | }
1050 | layer {
1051 |   name: "conv16_2"
1052 |   type: "Convolution"
1053 |   bottom: "conv16_1"
1054 |   top: "conv16_2"
1055 |   param {
1056 |     lr_mult: 1.0
1057 |     decay_mult: 1.0
1058 |   }
1059 |   param {
1060 |     lr_mult: 2.0
1061 |     decay_mult: 0.0
1062 |   }
1063 |   convolution_param {
1064 |     num_output: 256
1065 |     pad: 1
1066 |     kernel_size: 3
1067 |     stride: 2
1068 |     weight_filler {
1069 |       type: "msra"
1070 |     }
1071 |     bias_filler {
1072 |       type: "constant"
1073 |       value: 0.0
1074 |     }
1075 |   }
1076 | }
1077 | layer {
1078 |   name: "conv16_2/relu"
1079 |   type: "ReLU"
1080 |   bottom: "conv16_2"
1081 |   top: "conv16_2"
1082 | }
1083 | layer {
1084 |   name: "conv17_1"
1085 |   type: "Convolution"
1086 |   bottom: "conv16_2"
1087 |   top: "conv17_1"
1088 |   param {
1089 |     lr_mult: 1.0
1090 |     decay_mult: 1.0
1091 |   }
1092 |   param {
1093 |     lr_mult: 2.0
1094 |     decay_mult: 0.0
1095 |   }
1096 |   convolution_param {
1097 |     num_output: 64
1098 |     kernel_size: 1
1099 |     weight_filler {
1100 |       type: "msra"
1101 |     }
1102 |     bias_filler {
1103 |       type: "constant"
1104 |       value: 0.0
1105 |     }
1106 |   }
1107 | }
1108 | layer {
1109 |   name: "conv17_1/relu"
1110 |   type: "ReLU"
1111 |   bottom: "conv17_1"
1112 |   top: "conv17_1"
1113 | }
1114 | layer {
1115 |   name: "conv17_2"
1116 |   type: "Convolution"
1117 |   bottom: "conv17_1"
1118 |   top: "conv17_2"
1119 |   param {
1120 |     lr_mult: 1.0
1121 |     decay_mult: 1.0
1122 |   }
1123 |   param {
1124 |     lr_mult: 2.0
1125 |     decay_mult: 0.0
1126 |   }
1127 |   convolution_param {
1128 |     num_output: 128
1129 |     pad: 1
1130 |     kernel_size: 3
1131 |     stride: 2
1132 |     weight_filler {
1133 |       type: "msra"
1134 |     }
1135 |     bias_filler {
1136 |       type: "constant"
1137 |       value: 0.0
1138 |     }
1139 |   }
1140 | }
1141 | layer {
1142 |   name: "conv17_2/relu"
1143 |   type: "ReLU"
1144 |   bottom: "conv17_2"
1145 |   top: "conv17_2"
1146 | }
1147 | layer {
1148 |   name: "conv11_mbox_loc"
1149 |   type: "Convolution"
1150 |   bottom: "conv11"
1151 |   top: "conv11_mbox_loc"
1152 |   param {
1153 |     lr_mult: 1.0
1154 |     decay_mult: 1.0
1155 |   }
1156 |   param {
1157 |     lr_mult: 2.0
1158 |     decay_mult: 0.0
1159 |   }
1160 |   convolution_param {
1161 |     num_output: 12
1162 |     kernel_size: 1
1163 |     weight_filler {
1164 |       type: "msra"
1165 |     }
1166 |     bias_filler {
1167 |       type: "constant"
1168 |       value: 0.0
1169 |     }
1170 |   }
1171 | }
1172 | layer {
1173 |   name: "conv11_mbox_loc_perm"
1174 |   type: "Permute"
1175 |   bottom: "conv11_mbox_loc"
1176 |   top: "conv11_mbox_loc_perm"
1177 |   permute_param {
1178 |     order: 0
1179 |     order: 2
1180 |     order: 3
1181 |     order: 1
1182 |   }
1183 | }
1184 | layer {
1185 |   name: "conv11_mbox_loc_flat"
1186 |   type: "Flatten"
1187 |   bottom: "conv11_mbox_loc_perm"
1188 |   top: "conv11_mbox_loc_flat"
1189 |   flatten_param {
1190 |     axis: 1
1191 |   }
1192 | }
1193 | layer {
1194 |   name: "conv11_mbox_conf"
1195 |   type: "Convolution"
1196 |   bottom: "conv11"
1197 |   top: "conv11_mbox_conf"
1198 |   param {
1199 |     lr_mult: 1.0
1200 |     decay_mult: 1.0
1201 |   }
1202 |   param {
1203 |     lr_mult: 2.0
1204 |     decay_mult: 0.0
1205 |   }
1206 |   convolution_param {
1207 |     num_output: 63
1208 |     kernel_size: 1
1209 |     weight_filler {
1210 |       type: "msra"
1211 |     }
1212 |     bias_filler {
1213 |       type: "constant"
1214 |       value: 0.0
1215 |     }
1216 |   }
1217 | }
1218 | layer {
1219 |   name: "conv11_mbox_conf_perm"
1220 |   type: "Permute"
1221 |   bottom: "conv11_mbox_conf"
1222 |   top: "conv11_mbox_conf_perm"
1223 |   permute_param {
1224 |     order: 0
1225 |     order: 2
1226 |     order: 3
1227 |     order: 1
1228 |   }
1229 | }
1230 | layer {
1231 |   name: "conv11_mbox_conf_flat"
1232 |   type: "Flatten"
1233 |   bottom: "conv11_mbox_conf_perm"
1234 |   top: "conv11_mbox_conf_flat"
1235 |   flatten_param {
1236 |     axis: 1
1237 |   }
1238 | }
1239 | layer {
1240 |   name: "conv11_mbox_priorbox"
1241 |   type: "PriorBox"
1242 |   bottom: "conv11"
1243 |   bottom: "data"
1244 |   top: "conv11_mbox_priorbox"
1245 |   prior_box_param {
1246 |     min_size: 60.0
1247 |     aspect_ratio: 2.0
1248 |     flip: true
1249 |     clip: false
1250 |     variance: 0.1
1251 |     variance: 0.1
1252 |     variance: 0.2
1253 |     variance: 0.2
1254 |     offset: 0.5
1255 |   }
1256 | }
1257 | layer {
1258 |   name: "conv13_mbox_loc"
1259 |   type: "Convolution"
1260 |   bottom: "conv13"
1261 |   top: "conv13_mbox_loc"
1262 |   param {
1263 |     lr_mult: 1.0
1264 |     decay_mult: 1.0
1265 |   }
1266 |   param {
1267 |     lr_mult: 2.0
1268 |     decay_mult: 0.0
1269 |   }
1270 |   convolution_param {
1271 |     num_output: 24
1272 |     kernel_size: 1
1273 |     weight_filler {
1274 |       type: "msra"
1275 |     }
1276 |     bias_filler {
1277 |       type: "constant"
1278 |       value: 0.0
1279 |     }
1280 |   }
1281 | }
1282 | layer {
1283 |   name: "conv13_mbox_loc_perm"
1284 |   type: "Permute"
1285 |   bottom: "conv13_mbox_loc"
1286 |   top: "conv13_mbox_loc_perm"
1287 |   permute_param {
1288 |     order: 0
1289 |     order: 2
1290 |     order: 3
1291 |     order: 1
1292 |   }
1293 | }
1294 | layer {
1295 |   name: "conv13_mbox_loc_flat"
1296 |   type: "Flatten"
1297 |   bottom: "conv13_mbox_loc_perm"
1298 |   top: "conv13_mbox_loc_flat"
1299 |   flatten_param {
1300 |     axis: 1
1301 |   }
1302 | }
1303 | layer {
1304 |   name: "conv13_mbox_conf"
1305 |   type: "Convolution"
1306 |   bottom: "conv13"
1307 |   top: "conv13_mbox_conf"
1308 |   param {
1309 |     lr_mult: 1.0
1310 |     decay_mult: 1.0
1311 |   }
1312 |   param {
1313 |     lr_mult: 2.0
1314 |     decay_mult: 0.0
1315 |   }
1316 |   convolution_param {
1317 |     num_output: 126
1318 |     kernel_size: 1
1319 |     weight_filler {
1320 |       type: "msra"
1321 |     }
1322 |     bias_filler {
1323 |       type: "constant"
1324 |       value: 0.0
1325 |     }
1326 |   }
1327 | }
1328 | layer {
1329 |   name: "conv13_mbox_conf_perm"
1330 |   type: "Permute"
1331 |   bottom: "conv13_mbox_conf"
1332 |   top: "conv13_mbox_conf_perm"
1333 |   permute_param {
1334 |     order: 0
1335 |     order: 2
1336 |     order: 3
1337 |     order: 1
1338 |   }
1339 | }
1340 | layer {
1341 |   name: "conv13_mbox_conf_flat"
1342 |   type: "Flatten"
1343 |   bottom: "conv13_mbox_conf_perm"
1344 |   top: "conv13_mbox_conf_flat"
1345 |   flatten_param {
1346 |     axis: 1
1347 |   }
1348 | }
1349 | layer {
1350 |   name: "conv13_mbox_priorbox"
1351 |   type: "PriorBox"
1352 |   bottom: "conv13"
1353 |   bottom: "data"
1354 |   top: "conv13_mbox_priorbox"
1355 |   prior_box_param {
1356 |     min_size: 105.0
1357 |     max_size: 150.0
1358 |     aspect_ratio: 2.0
1359 |     aspect_ratio: 3.0
1360 |     flip: true
1361 |     clip: false
1362 |     variance: 0.1
1363 |     variance: 0.1
1364 |     variance: 0.2
1365 |     variance: 0.2
1366 |     offset: 0.5
1367 |   }
1368 | }
1369 | layer {
1370 |   name: "conv14_2_mbox_loc"
1371 |   type: "Convolution"
1372 |   bottom: "conv14_2"
1373 |   top: "conv14_2_mbox_loc"
1374 |   param {
1375 |     lr_mult: 1.0
1376 |     decay_mult: 1.0
1377 |   }
1378 |   param {
1379 |     lr_mult: 2.0
1380 |     decay_mult: 0.0
1381 |   }
1382 |   convolution_param {
1383 |     num_output: 24
1384 |     kernel_size: 1
1385 |     weight_filler {
1386 |       type: "msra"
1387 |     }
1388 |     bias_filler {
1389 |       type: "constant"
1390 |       value: 0.0
1391 |     }
1392 |   }
1393 | }
1394 | layer {
1395 |   name: "conv14_2_mbox_loc_perm"
1396 |   type: "Permute"
1397 |   bottom: "conv14_2_mbox_loc"
1398 |   top: "conv14_2_mbox_loc_perm"
1399 |   permute_param {
1400 |     order: 0
1401 |     order: 2
1402 |     order: 3
1403 |     order: 1
1404 |   }
1405 | }
1406 | layer {
1407 |   name: "conv14_2_mbox_loc_flat"
1408 |   type: "Flatten"
1409 |   bottom: "conv14_2_mbox_loc_perm"
1410 |   top: "conv14_2_mbox_loc_flat"
1411 |   flatten_param {
1412 |     axis: 1
1413 |   }
1414 | }
1415 | layer {
1416 |   name: "conv14_2_mbox_conf"
1417 |   type: "Convolution"
1418 |   bottom: "conv14_2"
1419 |   top: "conv14_2_mbox_conf"
1420 |   param {
1421 |     lr_mult: 1.0
1422 |     decay_mult: 1.0
1423 |   }
1424 |   param {
1425 |     lr_mult: 2.0
1426 |     decay_mult: 0.0
1427 |   }
1428 |   convolution_param {
1429 |     num_output: 126
1430 |     kernel_size: 1
1431 |     weight_filler {
1432 |       type: "msra"
1433 |     }
1434 |     bias_filler {
1435 |       type: "constant"
1436 |       value: 0.0
1437 |     }
1438 |   }
1439 | }
1440 | layer {
1441 |   name: "conv14_2_mbox_conf_perm"
1442 |   type: "Permute"
1443 |   bottom: "conv14_2_mbox_conf"
1444 |   top: "conv14_2_mbox_conf_perm"
1445 |   permute_param {
1446 |     order: 0
1447 |     order: 2
1448 |     order: 3
1449 |     order: 1
1450 |   }
1451 | }
1452 | layer {
1453 |   name: "conv14_2_mbox_conf_flat"
1454 |   type: "Flatten"
1455 |   bottom: "conv14_2_mbox_conf_perm"
1456 |   top: "conv14_2_mbox_conf_flat"
1457 |   flatten_param {
1458 |     axis: 1
1459 |   }
1460 | }
1461 | layer {
1462 |   name: "conv14_2_mbox_priorbox"
1463 |   type: "PriorBox"
1464 |   bottom: "conv14_2"
1465 |   bottom: "data"
1466 |   top: "conv14_2_mbox_priorbox"
1467 |   prior_box_param {
1468 |     min_size: 150.0
1469 |     max_size: 195.0
1470 |     aspect_ratio: 2.0
1471 |     aspect_ratio: 3.0
1472 |     flip: true
1473 |     clip: false
1474 |     variance: 0.1
1475 |     variance: 0.1
1476 |     variance: 0.2
1477 |     variance: 0.2
1478 |     offset: 0.5
1479 |   }
1480 | }
1481 | layer {
1482 |   name: "conv15_2_mbox_loc"
1483 |   type: "Convolution"
1484 |   bottom: "conv15_2"
1485 |   top: "conv15_2_mbox_loc"
1486 |   param {
1487 |     lr_mult: 1.0
1488 |     decay_mult: 1.0
1489 |   }
1490 |   param {
1491 |     lr_mult: 2.0
1492 |     decay_mult: 0.0
1493 |   }
1494 |   convolution_param {
1495 |     num_output: 24
1496 |     kernel_size: 1
1497 |     weight_filler {
1498 |       type: "msra"
1499 |     }
1500 |     bias_filler {
1501 |       type: "constant"
1502 |       value: 0.0
1503 |     }
1504 |   }
1505 | }
1506 | layer {
1507 |   name: "conv15_2_mbox_loc_perm"
1508 |   type: "Permute"
1509 |   bottom: "conv15_2_mbox_loc"
1510 |   top: "conv15_2_mbox_loc_perm"
1511 |   permute_param {
1512 |     order: 0
1513 |     order: 2
1514 |     order: 3
1515 |     order: 1
1516 |   }
1517 | }
1518 | layer {
1519 |   name: "conv15_2_mbox_loc_flat"
1520 |   type: "Flatten"
1521 |   bottom: "conv15_2_mbox_loc_perm"
1522 |   top: "conv15_2_mbox_loc_flat"
1523 |   flatten_param {
1524 |     axis: 1
1525 |   }
1526 | }
1527 | layer {
1528 |   name: "conv15_2_mbox_conf"
1529 |   type: "Convolution"
1530 |   bottom: "conv15_2"
1531 |   top: "conv15_2_mbox_conf"
1532 |   param {
1533 |     lr_mult: 1.0
1534 |     decay_mult: 1.0
1535 |   }
1536 |   param {
1537 |     lr_mult: 2.0
1538 |     decay_mult: 0.0
1539 |   }
1540 |   convolution_param {
1541 |     num_output: 126
1542 |     kernel_size: 1
1543 |     weight_filler {
1544 |       type: "msra"
1545 |     }
1546 |     bias_filler {
1547 |       type: "constant"
1548 |       value: 0.0
1549 |     }
1550 |   }
1551 | }
1552 | layer {
1553 |   name: "conv15_2_mbox_conf_perm"
1554 |   type: "Permute"
1555 |   bottom: "conv15_2_mbox_conf"
1556 |   top: "conv15_2_mbox_conf_perm"
1557 |   permute_param {
1558 |     order: 0
1559 |     order: 2
1560 |     order: 3
1561 |     order: 1
1562 |   }
1563 | }
1564 | layer {
1565 |   name: "conv15_2_mbox_conf_flat"
1566 |   type: "Flatten"
1567 |   bottom: "conv15_2_mbox_conf_perm"
1568 |   top: "conv15_2_mbox_conf_flat"
1569 |   flatten_param {
1570 |     axis: 1
1571 |   }
1572 | }
1573 | layer {
1574 |   name: "conv15_2_mbox_priorbox"
1575 |   type: "PriorBox"
1576 |   bottom: "conv15_2"
1577 |   bottom: "data"
1578 |   top: "conv15_2_mbox_priorbox"
1579 |   prior_box_param {
1580 |     min_size: 195.0
1581 |     max_size: 240.0
1582 |     aspect_ratio: 2.0
1583 |     aspect_ratio: 3.0
1584 |     flip: true
1585 |     clip: false
1586 |     variance: 0.1
1587 |     variance: 0.1
1588 |     variance: 0.2
1589 |     variance: 0.2
1590 |     offset: 0.5
1591 |   }
1592 | }
1593 | layer {
1594 |   name: "conv16_2_mbox_loc"
1595 |   type: "Convolution"
1596 |   bottom: "conv16_2"
1597 |   top: "conv16_2_mbox_loc"
1598 |   param {
1599 |     lr_mult: 1.0
1600 |     decay_mult: 1.0
1601 |   }
1602 |   param {
1603 |     lr_mult: 2.0
1604 |     decay_mult: 0.0
1605 |   }
1606 |   convolution_param {
1607 |     num_output: 24
1608 |     kernel_size: 1
1609 |     weight_filler {
1610 |       type: "msra"
1611 |     }
1612 |     bias_filler {
1613 |       type: "constant"
1614 |       value: 0.0
1615 |     }
1616 |   }
1617 | }
1618 | layer {
1619 |   name: "conv16_2_mbox_loc_perm"
1620 |   type: "Permute"
1621 |   bottom: "conv16_2_mbox_loc"
1622 |   top: "conv16_2_mbox_loc_perm"
1623 |   permute_param {
1624 |     order: 0
1625 |     order: 2
1626 |     order: 3
1627 |     order: 1
1628 |   }
1629 | }
1630 | layer {
1631 |   name: "conv16_2_mbox_loc_flat"
1632 |   type: "Flatten"
1633 |   bottom: "conv16_2_mbox_loc_perm"
1634 |   top: "conv16_2_mbox_loc_flat"
1635 |   flatten_param {
1636 |     axis: 1
1637 |   }
1638 | }
1639 | layer {
1640 |   name: "conv16_2_mbox_conf"
1641 |   type: "Convolution"
1642 |   bottom: "conv16_2"
1643 |   top: "conv16_2_mbox_conf"
1644 |   param {
1645 |     lr_mult: 1.0
1646 |     decay_mult: 1.0
1647 |   }
1648 |   param {
1649 |     lr_mult: 2.0
1650 |     decay_mult: 0.0
1651 |   }
1652 |   convolution_param {
1653 |     num_output: 126
1654 |     kernel_size: 1
1655 |     weight_filler {
1656 |       type: "msra"
1657 |     }
1658 |     bias_filler {
1659 |       type: "constant"
1660 |       value: 0.0
1661 |     }
1662 |   }
1663 | }
1664 | layer {
1665 |   name: "conv16_2_mbox_conf_perm"
1666 |   type: "Permute"
1667 |   bottom: "conv16_2_mbox_conf"
1668 |   top: "conv16_2_mbox_conf_perm"
1669 |   permute_param {
1670 |     order: 0
1671 |     order: 2
1672 |     order: 3
1673 |     order: 1
1674 |   }
1675 | }
1676 | layer {
1677 |   name: "conv16_2_mbox_conf_flat"
1678 |   type: "Flatten"
1679 |   bottom: "conv16_2_mbox_conf_perm"
1680 |   top: "conv16_2_mbox_conf_flat"
1681 |   flatten_param {
1682 |     axis: 1
1683 |   }
1684 | }
1685 | layer {
1686 |   name: "conv16_2_mbox_priorbox"
1687 |   type: "PriorBox"
1688 |   bottom: "conv16_2"
1689 |   bottom: "data"
1690 |   top: "conv16_2_mbox_priorbox"
1691 |   prior_box_param {
1692 |     min_size: 240.0
1693 |     max_size: 285.0
1694 |     aspect_ratio: 2.0
1695 |     aspect_ratio: 3.0
1696 |     flip: true
1697 |     clip: false
1698 |     variance: 0.1
1699 |     variance: 0.1
1700 |     variance: 0.2
1701 |     variance: 0.2
1702 |     offset: 0.5
1703 |   }
1704 | }
1705 | layer {
1706 |   name: "conv17_2_mbox_loc"
1707 |   type: "Convolution"
1708 |   bottom: "conv17_2"
1709 |   top: "conv17_2_mbox_loc"
1710 |   param {
1711 |     lr_mult: 1.0
1712 |     decay_mult: 1.0
1713 |   }
1714 |   param {
1715 |     lr_mult: 2.0
1716 |     decay_mult: 0.0
1717 |   }
1718 |   convolution_param {
1719 |     num_output: 24
1720 |     kernel_size: 1
1721 |     weight_filler {
1722 |       type: "msra"
1723 |     }
1724 |     bias_filler {
1725 |       type: "constant"
1726 |       value: 0.0
1727 |     }
1728 |   }
1729 | }
1730 | layer {
1731 |   name: "conv17_2_mbox_loc_perm"
1732 |   type: "Permute"
1733 |   bottom: "conv17_2_mbox_loc"
1734 |   top: "conv17_2_mbox_loc_perm"
1735 |   permute_param {
1736 |     order: 0
1737 |     order: 2
1738 |     order: 3
1739 |     order: 1
1740 |   }
1741 | }
1742 | layer {
1743 |   name: "conv17_2_mbox_loc_flat"
1744 |   type: "Flatten"
1745 |   bottom: "conv17_2_mbox_loc_perm"
1746 |   top: "conv17_2_mbox_loc_flat"
1747 |   flatten_param {
1748 |     axis: 1
1749 |   }
1750 | }
1751 | layer {
1752 |   name: "conv17_2_mbox_conf"
1753 |   type: "Convolution"
1754 |   bottom: "conv17_2"
1755 |   top: "conv17_2_mbox_conf"
1756 |   param {
1757 |     lr_mult: 1.0
1758 |     decay_mult: 1.0
1759 |   }
1760 |   param {
1761 |     lr_mult: 2.0
1762 |     decay_mult: 0.0
1763 |   }
1764 |   convolution_param {
1765 |     num_output: 126
1766 |     kernel_size: 1
1767 |     weight_filler {
1768 |       type: "msra"
1769 |     }
1770 |     bias_filler {
1771 |       type: "constant"
1772 |       value: 0.0
1773 |     }
1774 |   }
1775 | }
1776 | layer {
1777 |   name: "conv17_2_mbox_conf_perm"
1778 |   type: "Permute"
1779 |   bottom: "conv17_2_mbox_conf"
1780 |   top: "conv17_2_mbox_conf_perm"
1781 |   permute_param {
1782 |     order: 0
1783 |     order: 2
1784 |     order: 3
1785 |     order: 1
1786 |   }
1787 | }
1788 | layer {
1789 |   name: "conv17_2_mbox_conf_flat"
1790 |   type: "Flatten"
1791 |   bottom: "conv17_2_mbox_conf_perm"
1792 |   top: "conv17_2_mbox_conf_flat"
1793 |   flatten_param {
1794 |     axis: 1
1795 |   }
1796 | }
1797 | layer {
1798 |   name: "conv17_2_mbox_priorbox"
1799 |   type: "PriorBox"
1800 |   bottom: "conv17_2"
1801 |   bottom: "data"
1802 |   top: "conv17_2_mbox_priorbox"
1803 |   prior_box_param {
1804 |     min_size: 285.0
1805 |     max_size: 300.0
1806 |     aspect_ratio: 2.0
1807 |     aspect_ratio: 3.0
1808 |     flip: true
1809 |     clip: false
1810 |     variance: 0.1
1811 |     variance: 0.1
1812 |     variance: 0.2
1813 |     variance: 0.2
1814 |     offset: 0.5
1815 |   }
1816 | }
1817 | layer {
1818 |   name: "mbox_loc"
1819 |   type: "Concat"
1820 |   bottom: "conv11_mbox_loc_flat"
1821 |   bottom: "conv13_mbox_loc_flat"
1822 |   bottom: "conv14_2_mbox_loc_flat"
1823 |   bottom: "conv15_2_mbox_loc_flat"
1824 |   bottom: "conv16_2_mbox_loc_flat"
1825 |   bottom: "conv17_2_mbox_loc_flat"
1826 |   top: "mbox_loc"
1827 |   concat_param {
1828 |     axis: 1
1829 |   }
1830 | }
1831 | layer {
1832 |   name: "mbox_conf"
1833 |   type: "Concat"
1834 |   bottom: "conv11_mbox_conf_flat"
1835 |   bottom: "conv13_mbox_conf_flat"
1836 |   bottom: "conv14_2_mbox_conf_flat"
1837 |   bottom: "conv15_2_mbox_conf_flat"
1838 |   bottom: "conv16_2_mbox_conf_flat"
1839 |   bottom: "conv17_2_mbox_conf_flat"
1840 |   top: "mbox_conf"
1841 |   concat_param {
1842 |     axis: 1
1843 |   }
1844 | }
1845 | layer {
1846 |   name: "mbox_priorbox"
1847 |   type: "Concat"
1848 |   bottom: "conv11_mbox_priorbox"
1849 |   bottom: "conv13_mbox_priorbox"
1850 |   bottom: "conv14_2_mbox_priorbox"
1851 |   bottom: "conv15_2_mbox_priorbox"
1852 |   bottom: "conv16_2_mbox_priorbox"
1853 |   bottom: "conv17_2_mbox_priorbox"
1854 |   top: "mbox_priorbox"
1855 |   concat_param {
1856 |     axis: 2
1857 |   }
1858 | }
1859 | layer {
1860 |   name: "mbox_conf_reshape"
1861 |   type: "Reshape"
1862 |   bottom: "mbox_conf"
1863 |   top: "mbox_conf_reshape"
1864 |   reshape_param {
1865 |     shape {
1866 |       dim: 0
1867 |       dim: -1
1868 |       dim: 21
1869 |     }
1870 |   }
1871 | }
1872 | layer {
1873 |   name: "mbox_conf_softmax"
1874 |   type: "Softmax"
1875 |   bottom: "mbox_conf_reshape"
1876 |   top: "mbox_conf_softmax"
1877 |   softmax_param {
1878 |     axis: 2
1879 |   }
1880 | }
1881 | layer {
1882 |   name: "mbox_conf_flatten"
1883 |   type: "Flatten"
1884 |   bottom: "mbox_conf_softmax"
1885 |   top: "mbox_conf_flatten"
1886 |   flatten_param {
1887 |     axis: 1
1888 |   }
1889 | }
1890 | layer {
1891 |   name: "detection_out"
1892 |   type: "DetectionOutput"
1893 |   bottom: "mbox_loc"
1894 |   bottom: "mbox_conf_flatten"
1895 |   bottom: "mbox_priorbox"
1896 |   top: "detection_out"
1897 |   include {
1898 |     phase: TEST
1899 |   }
1900 |   detection_output_param {
1901 |     num_classes: 21
1902 |     share_location: true
1903 |     background_label_id: 0
1904 |     nms_param {
1905 |       nms_threshold: 0.45
1906 |       top_k: 100
1907 |     }
1908 |     code_type: CENTER_SIZE
1909 |     keep_top_k: 100
1910 |     confidence_threshold: 0.25
1911 |   }
1912 | }
1913 | 


--------------------------------------------------------------------------------
/SSD/MobileNet/MobileNetSSD_deploy_custom.prototxt:
--------------------------------------------------------------------------------
   1 | name: "MobileNet-SSD"
   2 | input: "data"
   3 | input_shape {
   4 |   dim: 1
   5 |   dim: 3
   6 |   dim: 300
   7 |   dim: 300
   8 | }
   9 | layer {
  10 |   name: "conv0"
  11 |   type: "Convolution"
  12 |   bottom: "data"
  13 |   top: "conv0"
  14 |   param {
  15 |     lr_mult: 1.0
  16 |     decay_mult: 1.0
  17 |   }
  18 |   convolution_param {
  19 |     num_output: 32
  20 |     bias_term: false
  21 |     pad: 1
  22 |     kernel_size: 3
  23 |     stride: 2
  24 |     weight_filler {
  25 |       type: "msra"
  26 |     }
  27 |     bias_filler {
  28 |       type: "constant"
  29 |       value: 0.0
  30 |     }
  31 |   }
  32 | }
  33 | layer {
  34 |   name: "conv0/relu"
  35 |   type: "ReLU"
  36 |   bottom: "conv0"
  37 |   top: "conv0"
  38 | }
  39 | layer {
  40 |   name: "conv1/dw"
  41 |   type: "Convolution"
  42 |   bottom: "conv0"
  43 |   top: "conv1/dw"
  44 |   param {
  45 |     lr_mult: 1.0
  46 |     decay_mult: 1.0
  47 |   }
  48 |   convolution_param {
  49 |     num_output: 32
  50 |     bias_term: false
  51 |     pad: 1
  52 |     kernel_size: 3
  53 |     group: 32
  54 |     engine: CAFFE
  55 |     weight_filler {
  56 |       type: "msra"
  57 |     }
  58 |     bias_filler {
  59 |       type: "constant"
  60 |       value: 0.0
  61 |     }
  62 |   }
  63 | }
  64 | layer {
  65 |   name: "conv1/dw/relu"
  66 |   type: "ReLU"
  67 |   bottom: "conv1/dw"
  68 |   top: "conv1/dw"
  69 | }
  70 | layer {
  71 |   name: "conv1"
  72 |   type: "Convolution"
  73 |   bottom: "conv1/dw"
  74 |   top: "conv1"
  75 |   param {
  76 |     lr_mult: 1.0
  77 |     decay_mult: 1.0
  78 |   }
  79 |   convolution_param {
  80 |     num_output: 64
  81 |     bias_term: false
  82 |     kernel_size: 1
  83 |     weight_filler {
  84 |       type: "msra"
  85 |     }
  86 |     bias_filler {
  87 |       type: "constant"
  88 |       value: 0.0
  89 |     }
  90 |   }
  91 | }
  92 | layer {
  93 |   name: "conv1/relu"
  94 |   type: "ReLU"
  95 |   bottom: "conv1"
  96 |   top: "conv1"
  97 | }
  98 | layer {
  99 |   name: "conv2/dw"
 100 |   type: "Convolution"
 101 |   bottom: "conv1"
 102 |   top: "conv2/dw"
 103 |   param {
 104 |     lr_mult: 1.0
 105 |     decay_mult: 1.0
 106 |   }
 107 |   convolution_param {
 108 |     num_output: 64
 109 |     bias_term: false
 110 |     pad: 1
 111 |     kernel_size: 3
 112 |     stride: 2
 113 |     group: 64
 114 |     engine: CAFFE
 115 |     weight_filler {
 116 |       type: "msra"
 117 |     }
 118 |     bias_filler {
 119 |       type: "constant"
 120 |       value: 0.0
 121 |     }
 122 |   }
 123 | }
 124 | layer {
 125 |   name: "conv2/dw/relu"
 126 |   type: "ReLU"
 127 |   bottom: "conv2/dw"
 128 |   top: "conv2/dw"
 129 | }
 130 | layer {
 131 |   name: "conv2"
 132 |   type: "Convolution"
 133 |   bottom: "conv2/dw"
 134 |   top: "conv2"
 135 |   param {
 136 |     lr_mult: 1.0
 137 |     decay_mult: 1.0
 138 |   }
 139 |   convolution_param {
 140 |     num_output: 128
 141 |     bias_term: false
 142 |     kernel_size: 1
 143 |     weight_filler {
 144 |       type: "msra"
 145 |     }
 146 |     bias_filler {
 147 |       type: "constant"
 148 |       value: 0.0
 149 |     }
 150 |   }
 151 | }
 152 | layer {
 153 |   name: "conv2/relu"
 154 |   type: "ReLU"
 155 |   bottom: "conv2"
 156 |   top: "conv2"
 157 | }
 158 | layer {
 159 |   name: "conv3/dw"
 160 |   type: "Convolution"
 161 |   bottom: "conv2"
 162 |   top: "conv3/dw"
 163 |   param {
 164 |     lr_mult: 1.0
 165 |     decay_mult: 1.0
 166 |   }
 167 |   convolution_param {
 168 |     num_output: 128
 169 |     bias_term: false
 170 |     pad: 1
 171 |     kernel_size: 3
 172 |     group: 128
 173 |     engine: CAFFE
 174 |     weight_filler {
 175 |       type: "msra"
 176 |     }
 177 |     bias_filler {
 178 |       type: "constant"
 179 |       value: 0.0
 180 |     }
 181 |   }
 182 | }
 183 | layer {
 184 |   name: "conv3/dw/relu"
 185 |   type: "ReLU"
 186 |   bottom: "conv3/dw"
 187 |   top: "conv3/dw"
 188 | }
 189 | layer {
 190 |   name: "conv3"
 191 |   type: "Convolution"
 192 |   bottom: "conv3/dw"
 193 |   top: "conv3"
 194 |   param {
 195 |     lr_mult: 1.0
 196 |     decay_mult: 1.0
 197 |   }
 198 |   convolution_param {
 199 |     num_output: 128
 200 |     bias_term: false
 201 |     kernel_size: 1
 202 |     weight_filler {
 203 |       type: "msra"
 204 |     }
 205 |     bias_filler {
 206 |       type: "constant"
 207 |       value: 0.0
 208 |     }
 209 |   }
 210 | }
 211 | layer {
 212 |   name: "conv3/relu"
 213 |   type: "ReLU"
 214 |   bottom: "conv3"
 215 |   top: "conv3"
 216 | }
 217 | layer {
 218 |   name: "conv4/dw"
 219 |   type: "Convolution"
 220 |   bottom: "conv3"
 221 |   top: "conv4/dw"
 222 |   param {
 223 |     lr_mult: 1.0
 224 |     decay_mult: 1.0
 225 |   }
 226 |   convolution_param {
 227 |     num_output: 128
 228 |     bias_term: false
 229 |     pad: 1
 230 |     kernel_size: 3
 231 |     stride: 2
 232 |     group: 128
 233 |     engine: CAFFE
 234 |     weight_filler {
 235 |       type: "msra"
 236 |     }
 237 |     bias_filler {
 238 |       type: "constant"
 239 |       value: 0.0
 240 |     }
 241 |   }
 242 | }
 243 | layer {
 244 |   name: "conv4/dw/relu"
 245 |   type: "ReLU"
 246 |   bottom: "conv4/dw"
 247 |   top: "conv4/dw"
 248 | }
 249 | layer {
 250 |   name: "conv4"
 251 |   type: "Convolution"
 252 |   bottom: "conv4/dw"
 253 |   top: "conv4"
 254 |   param {
 255 |     lr_mult: 1.0
 256 |     decay_mult: 1.0
 257 |   }
 258 |   convolution_param {
 259 |     num_output: 256
 260 |     bias_term: false
 261 |     kernel_size: 1
 262 |     weight_filler {
 263 |       type: "msra"
 264 |     }
 265 |     bias_filler {
 266 |       type: "constant"
 267 |       value: 0.0
 268 |     }
 269 |   }
 270 | }
 271 | layer {
 272 |   name: "conv4/relu"
 273 |   type: "ReLU"
 274 |   bottom: "conv4"
 275 |   top: "conv4"
 276 | }
 277 | layer {
 278 |   name: "conv5/dw"
 279 |   type: "Convolution"
 280 |   bottom: "conv4"
 281 |   top: "conv5/dw"
 282 |   param {
 283 |     lr_mult: 1.0
 284 |     decay_mult: 1.0
 285 |   }
 286 |   convolution_param {
 287 |     num_output: 256
 288 |     bias_term: false
 289 |     pad: 1
 290 |     kernel_size: 3
 291 |     group: 256
 292 |     engine: CAFFE
 293 |     weight_filler {
 294 |       type: "msra"
 295 |     }
 296 |     bias_filler {
 297 |       type: "constant"
 298 |       value: 0.0
 299 |     }
 300 |   }
 301 | }
 302 | layer {
 303 |   name: "conv5/dw/relu"
 304 |   type: "ReLU"
 305 |   bottom: "conv5/dw"
 306 |   top: "conv5/dw"
 307 | }
 308 | layer {
 309 |   name: "conv5"
 310 |   type: "Convolution"
 311 |   bottom: "conv5/dw"
 312 |   top: "conv5"
 313 |   param {
 314 |     lr_mult: 1.0
 315 |     decay_mult: 1.0
 316 |   }
 317 |   convolution_param {
 318 |     num_output: 256
 319 |     bias_term: false
 320 |     kernel_size: 1
 321 |     weight_filler {
 322 |       type: "msra"
 323 |     }
 324 |     bias_filler {
 325 |       type: "constant"
 326 |       value: 0.0
 327 |     }
 328 |   }
 329 | }
 330 | layer {
 331 |   name: "conv5/relu"
 332 |   type: "ReLU"
 333 |   bottom: "conv5"
 334 |   top: "conv5"
 335 | }
 336 | layer {
 337 |   name: "conv6/dw"
 338 |   type: "Convolution"
 339 |   bottom: "conv5"
 340 |   top: "conv6/dw"
 341 |   param {
 342 |     lr_mult: 1.0
 343 |     decay_mult: 1.0
 344 |   }
 345 |   convolution_param {
 346 |     num_output: 256
 347 |     bias_term: false
 348 |     pad: 1
 349 |     kernel_size: 3
 350 |     stride: 2
 351 |     group: 256
 352 |     engine: CAFFE
 353 |     weight_filler {
 354 |       type: "msra"
 355 |     }
 356 |     bias_filler {
 357 |       type: "constant"
 358 |       value: 0.0
 359 |     }
 360 |   }
 361 | }
 362 | layer {
 363 |   name: "conv6/dw/relu"
 364 |   type: "ReLU"
 365 |   bottom: "conv6/dw"
 366 |   top: "conv6/dw"
 367 | }
 368 | layer {
 369 |   name: "conv6"
 370 |   type: "Convolution"
 371 |   bottom: "conv6/dw"
 372 |   top: "conv6"
 373 |   param {
 374 |     lr_mult: 1.0
 375 |     decay_mult: 1.0
 376 |   }
 377 |   convolution_param {
 378 |     num_output: 512
 379 |     bias_term: false
 380 |     kernel_size: 1
 381 |     weight_filler {
 382 |       type: "msra"
 383 |     }
 384 |     bias_filler {
 385 |       type: "constant"
 386 |       value: 0.0
 387 |     }
 388 |   }
 389 | }
 390 | layer {
 391 |   name: "conv6/relu"
 392 |   type: "ReLU"
 393 |   bottom: "conv6"
 394 |   top: "conv6"
 395 | }
 396 | layer {
 397 |   name: "conv7/dw"
 398 |   type: "Convolution"
 399 |   bottom: "conv6"
 400 |   top: "conv7/dw"
 401 |   param {
 402 |     lr_mult: 1.0
 403 |     decay_mult: 1.0
 404 |   }
 405 |   convolution_param {
 406 |     num_output: 512
 407 |     bias_term: false
 408 |     pad: 1
 409 |     kernel_size: 3
 410 |     group: 512
 411 |     engine: CAFFE
 412 |     weight_filler {
 413 |       type: "msra"
 414 |     }
 415 |     bias_filler {
 416 |       type: "constant"
 417 |       value: 0.0
 418 |     }
 419 |   }
 420 | }
 421 | layer {
 422 |   name: "conv7/dw/relu"
 423 |   type: "ReLU"
 424 |   bottom: "conv7/dw"
 425 |   top: "conv7/dw"
 426 | }
 427 | layer {
 428 |   name: "conv7"
 429 |   type: "Convolution"
 430 |   bottom: "conv7/dw"
 431 |   top: "conv7"
 432 |   param {
 433 |     lr_mult: 1.0
 434 |     decay_mult: 1.0
 435 |   }
 436 |   convolution_param {
 437 |     num_output: 512
 438 |     bias_term: false
 439 |     kernel_size: 1
 440 |     weight_filler {
 441 |       type: "msra"
 442 |     }
 443 |     bias_filler {
 444 |       type: "constant"
 445 |       value: 0.0
 446 |     }
 447 |   }
 448 | }
 449 | layer {
 450 |   name: "conv7/relu"
 451 |   type: "ReLU"
 452 |   bottom: "conv7"
 453 |   top: "conv7"
 454 | }
 455 | layer {
 456 |   name: "conv8/dw"
 457 |   type: "Convolution"
 458 |   bottom: "conv7"
 459 |   top: "conv8/dw"
 460 |   param {
 461 |     lr_mult: 1.0
 462 |     decay_mult: 1.0
 463 |   }
 464 |   convolution_param {
 465 |     num_output: 512
 466 |     bias_term: false
 467 |     pad: 1
 468 |     kernel_size: 3
 469 |     group: 512
 470 |     engine: CAFFE
 471 |     weight_filler {
 472 |       type: "msra"
 473 |     }
 474 |     bias_filler {
 475 |       type: "constant"
 476 |       value: 0.0
 477 |     }
 478 |   }
 479 | }
 480 | layer {
 481 |   name: "conv8/dw/relu"
 482 |   type: "ReLU"
 483 |   bottom: "conv8/dw"
 484 |   top: "conv8/dw"
 485 | }
 486 | layer {
 487 |   name: "conv8"
 488 |   type: "Convolution"
 489 |   bottom: "conv8/dw"
 490 |   top: "conv8"
 491 |   param {
 492 |     lr_mult: 1.0
 493 |     decay_mult: 1.0
 494 |   }
 495 |   convolution_param {
 496 |     num_output: 512
 497 |     bias_term: false
 498 |     kernel_size: 1
 499 |     weight_filler {
 500 |       type: "msra"
 501 |     }
 502 |     bias_filler {
 503 |       type: "constant"
 504 |       value: 0.0
 505 |     }
 506 |   }
 507 | }
 508 | layer {
 509 |   name: "conv8/relu"
 510 |   type: "ReLU"
 511 |   bottom: "conv8"
 512 |   top: "conv8"
 513 | }
 514 | layer {
 515 |   name: "conv9/dw"
 516 |   type: "Convolution"
 517 |   bottom: "conv8"
 518 |   top: "conv9/dw"
 519 |   param {
 520 |     lr_mult: 1.0
 521 |     decay_mult: 1.0
 522 |   }
 523 |   convolution_param {
 524 |     num_output: 512
 525 |     bias_term: false
 526 |     pad: 1
 527 |     kernel_size: 3
 528 |     group: 512
 529 |     engine: CAFFE
 530 |     weight_filler {
 531 |       type: "msra"
 532 |     }
 533 |     bias_filler {
 534 |       type: "constant"
 535 |       value: 0.0
 536 |     }
 537 |   }
 538 | }
 539 | layer {
 540 |   name: "conv9/dw/relu"
 541 |   type: "ReLU"
 542 |   bottom: "conv9/dw"
 543 |   top: "conv9/dw"
 544 | }
 545 | layer {
 546 |   name: "conv9"
 547 |   type: "Convolution"
 548 |   bottom: "conv9/dw"
 549 |   top: "conv9"
 550 |   param {
 551 |     lr_mult: 1.0
 552 |     decay_mult: 1.0
 553 |   }
 554 |   convolution_param {
 555 |     num_output: 512
 556 |     bias_term: false
 557 |     kernel_size: 1
 558 |     weight_filler {
 559 |       type: "msra"
 560 |     }
 561 |     bias_filler {
 562 |       type: "constant"
 563 |       value: 0.0
 564 |     }
 565 |   }
 566 | }
 567 | layer {
 568 |   name: "conv9/relu"
 569 |   type: "ReLU"
 570 |   bottom: "conv9"
 571 |   top: "conv9"
 572 | }
 573 | layer {
 574 |   name: "conv10/dw"
 575 |   type: "Convolution"
 576 |   bottom: "conv9"
 577 |   top: "conv10/dw"
 578 |   param {
 579 |     lr_mult: 1.0
 580 |     decay_mult: 1.0
 581 |   }
 582 |   convolution_param {
 583 |     num_output: 512
 584 |     bias_term: false
 585 |     pad: 1
 586 |     kernel_size: 3
 587 |     group: 512
 588 |     engine: CAFFE
 589 |     weight_filler {
 590 |       type: "msra"
 591 |     }
 592 |     bias_filler {
 593 |       type: "constant"
 594 |       value: 0.0
 595 |     }
 596 |   }
 597 | }
 598 | layer {
 599 |   name: "conv10/dw/relu"
 600 |   type: "ReLU"
 601 |   bottom: "conv10/dw"
 602 |   top: "conv10/dw"
 603 | }
 604 | layer {
 605 |   name: "conv10"
 606 |   type: "Convolution"
 607 |   bottom: "conv10/dw"
 608 |   top: "conv10"
 609 |   param {
 610 |     lr_mult: 1.0
 611 |     decay_mult: 1.0
 612 |   }
 613 |   convolution_param {
 614 |     num_output: 512
 615 |     bias_term: false
 616 |     kernel_size: 1
 617 |     weight_filler {
 618 |       type: "msra"
 619 |     }
 620 |     bias_filler {
 621 |       type: "constant"
 622 |       value: 0.0
 623 |     }
 624 |   }
 625 | }
 626 | layer {
 627 |   name: "conv10/relu"
 628 |   type: "ReLU"
 629 |   bottom: "conv10"
 630 |   top: "conv10"
 631 | }
 632 | layer {
 633 |   name: "conv11/dw"
 634 |   type: "Convolution"
 635 |   bottom: "conv10"
 636 |   top: "conv11/dw"
 637 |   param {
 638 |     lr_mult: 1.0
 639 |     decay_mult: 1.0
 640 |   }
 641 |   convolution_param {
 642 |     num_output: 512
 643 |     bias_term: false
 644 |     pad: 1
 645 |     kernel_size: 3
 646 |     group: 512
 647 |     engine: CAFFE
 648 |     weight_filler {
 649 |       type: "msra"
 650 |     }
 651 |     bias_filler {
 652 |       type: "constant"
 653 |       value: 0.0
 654 |     }
 655 |   }
 656 | }
 657 | layer {
 658 |   name: "conv11/dw/relu"
 659 |   type: "ReLU"
 660 |   bottom: "conv11/dw"
 661 |   top: "conv11/dw"
 662 | }
 663 | layer {
 664 |   name: "conv11"
 665 |   type: "Convolution"
 666 |   bottom: "conv11/dw"
 667 |   top: "conv11"
 668 |   param {
 669 |     lr_mult: 1.0
 670 |     decay_mult: 1.0
 671 |   }
 672 |   convolution_param {
 673 |     num_output: 512
 674 |     bias_term: false
 675 |     kernel_size: 1
 676 |     weight_filler {
 677 |       type: "msra"
 678 |     }
 679 |     bias_filler {
 680 |       type: "constant"
 681 |       value: 0.0
 682 |     }
 683 |   }
 684 | }
 685 | layer {
 686 |   name: "conv11/relu"
 687 |   type: "ReLU"
 688 |   bottom: "conv11"
 689 |   top: "conv11"
 690 | }
 691 | layer {
 692 |   name: "conv12/dw"
 693 |   type: "Convolution"
 694 |   bottom: "conv11"
 695 |   top: "conv12/dw"
 696 |   param {
 697 |     lr_mult: 1.0
 698 |     decay_mult: 1.0
 699 |   }
 700 |   convolution_param {
 701 |     num_output: 512
 702 |     bias_term: false
 703 |     pad: 1
 704 |     kernel_size: 3
 705 |     stride: 2
 706 |     group: 512
 707 |     engine: CAFFE
 708 |     weight_filler {
 709 |       type: "msra"
 710 |     }
 711 |     bias_filler {
 712 |       type: "constant"
 713 |       value: 0.0
 714 |     }
 715 |   }
 716 | }
 717 | layer {
 718 |   name: "conv12/dw/relu"
 719 |   type: "ReLU"
 720 |   bottom: "conv12/dw"
 721 |   top: "conv12/dw"
 722 | }
 723 | layer {
 724 |   name: "conv12"
 725 |   type: "Convolution"
 726 |   bottom: "conv12/dw"
 727 |   top: "conv12"
 728 |   param {
 729 |     lr_mult: 1.0
 730 |     decay_mult: 1.0
 731 |   }
 732 |   convolution_param {
 733 |     num_output: 1024
 734 |     bias_term: false
 735 |     kernel_size: 1
 736 |     weight_filler {
 737 |       type: "msra"
 738 |     }
 739 |     bias_filler {
 740 |       type: "constant"
 741 |       value: 0.0
 742 |     }
 743 |   }
 744 | }
 745 | layer {
 746 |   name: "conv12/relu"
 747 |   type: "ReLU"
 748 |   bottom: "conv12"
 749 |   top: "conv12"
 750 | }
 751 | layer {
 752 |   name: "conv13/dw"
 753 |   type: "Convolution"
 754 |   bottom: "conv12"
 755 |   top: "conv13/dw"
 756 |   param {
 757 |     lr_mult: 1.0
 758 |     decay_mult: 1.0
 759 |   }
 760 |   convolution_param {
 761 |     num_output: 1024
 762 |     bias_term: false
 763 |     pad: 1
 764 |     kernel_size: 3
 765 |     group: 1024
 766 |     engine: CAFFE
 767 |     weight_filler {
 768 |       type: "msra"
 769 |     }
 770 |     bias_filler {
 771 |       type: "constant"
 772 |       value: 0.0
 773 |     }
 774 |   }
 775 | }
 776 | layer {
 777 |   name: "conv13/dw/relu"
 778 |   type: "ReLU"
 779 |   bottom: "conv13/dw"
 780 |   top: "conv13/dw"
 781 | }
 782 | layer {
 783 |   name: "conv13"
 784 |   type: "Convolution"
 785 |   bottom: "conv13/dw"
 786 |   top: "conv13"
 787 |   param {
 788 |     lr_mult: 1.0
 789 |     decay_mult: 1.0
 790 |   }
 791 |   convolution_param {
 792 |     num_output: 1024
 793 |     bias_term: false
 794 |     kernel_size: 1
 795 |     weight_filler {
 796 |       type: "msra"
 797 |     }
 798 |     bias_filler {
 799 |       type: "constant"
 800 |       value: 0.0
 801 |     }
 802 |   }
 803 | }
 804 | layer {
 805 |   name: "conv13/relu"
 806 |   type: "ReLU"
 807 |   bottom: "conv13"
 808 |   top: "conv13"
 809 | }
 810 | layer {
 811 |   name: "conv14_1"
 812 |   type: "Convolution"
 813 |   bottom: "conv13"
 814 |   top: "conv14_1"
 815 |   param {
 816 |     lr_mult: 1.0
 817 |     decay_mult: 1.0
 818 |   }
 819 |   convolution_param {
 820 |     num_output: 256
 821 |     bias_term: false
 822 |     kernel_size: 1
 823 |     weight_filler {
 824 |       type: "msra"
 825 |     }
 826 |     bias_filler {
 827 |       type: "constant"
 828 |       value: 0.0
 829 |     }
 830 |   }
 831 | }
 832 | layer {
 833 |   name: "conv14_1/relu"
 834 |   type: "ReLU"
 835 |   bottom: "conv14_1"
 836 |   top: "conv14_1"
 837 | }
 838 | layer {
 839 |   name: "conv14_2"
 840 |   type: "Convolution"
 841 |   bottom: "conv14_1"
 842 |   top: "conv14_2"
 843 |   param {
 844 |     lr_mult: 1.0
 845 |     decay_mult: 1.0
 846 |   }
 847 |   convolution_param {
 848 |     num_output: 512
 849 |     bias_term: false
 850 |     pad: 1
 851 |     kernel_size: 3
 852 |     stride: 2
 853 |     weight_filler {
 854 |       type: "msra"
 855 |     }
 856 |     bias_filler {
 857 |       type: "constant"
 858 |       value: 0.0
 859 |     }
 860 |   }
 861 | }
 862 | layer {
 863 |   name: "conv14_2/relu"
 864 |   type: "ReLU"
 865 |   bottom: "conv14_2"
 866 |   top: "conv14_2"
 867 | }
 868 | layer {
 869 |   name: "conv15_1"
 870 |   type: "Convolution"
 871 |   bottom: "conv14_2"
 872 |   top: "conv15_1"
 873 |   param {
 874 |     lr_mult: 1.0
 875 |     decay_mult: 1.0
 876 |   }
 877 |   convolution_param {
 878 |     num_output: 128
 879 |     bias_term: false
 880 |     kernel_size: 1
 881 |     weight_filler {
 882 |       type: "msra"
 883 |     }
 884 |     bias_filler {
 885 |       type: "constant"
 886 |       value: 0.0
 887 |     }
 888 |   }
 889 | }
 890 | layer {
 891 |   name: "conv15_1/relu"
 892 |   type: "ReLU"
 893 |   bottom: "conv15_1"
 894 |   top: "conv15_1"
 895 | }
 896 | layer {
 897 |   name: "conv15_2"
 898 |   type: "Convolution"
 899 |   bottom: "conv15_1"
 900 |   top: "conv15_2"
 901 |   param {
 902 |     lr_mult: 1.0
 903 |     decay_mult: 1.0
 904 |   }
 905 |   convolution_param {
 906 |     num_output: 256
 907 |     bias_term: false
 908 |     pad: 1
 909 |     kernel_size: 3
 910 |     stride: 2
 911 |     weight_filler {
 912 |       type: "msra"
 913 |     }
 914 |     bias_filler {
 915 |       type: "constant"
 916 |       value: 0.0
 917 |     }
 918 |   }
 919 | }
 920 | layer {
 921 |   name: "conv15_2/relu"
 922 |   type: "ReLU"
 923 |   bottom: "conv15_2"
 924 |   top: "conv15_2"
 925 | }
 926 | layer {
 927 |   name: "conv16_1"
 928 |   type: "Convolution"
 929 |   bottom: "conv15_2"
 930 |   top: "conv16_1"
 931 |   param {
 932 |     lr_mult: 1.0
 933 |     decay_mult: 1.0
 934 |   }
 935 |   convolution_param {
 936 |     num_output: 128
 937 |     bias_term: false
 938 |     kernel_size: 1
 939 |     weight_filler {
 940 |       type: "msra"
 941 |     }
 942 |     bias_filler {
 943 |       type: "constant"
 944 |       value: 0.0
 945 |     }
 946 |   }
 947 | }
 948 | layer {
 949 |   name: "conv16_1/relu"
 950 |   type: "ReLU"
 951 |   bottom: "conv16_1"
 952 |   top: "conv16_1"
 953 | }
 954 | layer {
 955 |   name: "conv16_2"
 956 |   type: "Convolution"
 957 |   bottom: "conv16_1"
 958 |   top: "conv16_2"
 959 |   param {
 960 |     lr_mult: 1.0
 961 |     decay_mult: 1.0
 962 |   }
 963 |   convolution_param {
 964 |     num_output: 256
 965 |     bias_term: false
 966 |     pad: 1
 967 |     kernel_size: 3
 968 |     stride: 2
 969 |     weight_filler {
 970 |       type: "msra"
 971 |     }
 972 |     bias_filler {
 973 |       type: "constant"
 974 |       value: 0.0
 975 |     }
 976 |   }
 977 | }
 978 | layer {
 979 |   name: "conv16_2/relu"
 980 |   type: "ReLU"
 981 |   bottom: "conv16_2"
 982 |   top: "conv16_2"
 983 | }
 984 | layer {
 985 |   name: "conv17_1"
 986 |   type: "Convolution"
 987 |   bottom: "conv16_2"
 988 |   top: "conv17_1"
 989 |   param {
 990 |     lr_mult: 1.0
 991 |     decay_mult: 1.0
 992 |   }
 993 |   convolution_param {
 994 |     num_output: 64
 995 |     bias_term: false
 996 |     kernel_size: 1
 997 |     weight_filler {
 998 |       type: "msra"
 999 |     }
1000 |     bias_filler {
1001 |       type: "constant"
1002 |       value: 0.0
1003 |     }
1004 |   }
1005 | }
1006 | layer {
1007 |   name: "conv17_1/relu"
1008 |   type: "ReLU"
1009 |   bottom: "conv17_1"
1010 |   top: "conv17_1"
1011 | }
1012 | layer {
1013 |   name: "conv17_2"
1014 |   type: "Convolution"
1015 |   bottom: "conv17_1"
1016 |   top: "conv17_2"
1017 |   param {
1018 |     lr_mult: 1.0
1019 |     decay_mult: 1.0
1020 |   }
1021 |   convolution_param {
1022 |     num_output: 128
1023 |     bias_term: false
1024 |     pad: 1
1025 |     kernel_size: 3
1026 |     stride: 2
1027 |     weight_filler {
1028 |       type: "msra"
1029 |     }
1030 |     bias_filler {
1031 |       type: "constant"
1032 |       value: 0.0
1033 |     }
1034 |   }
1035 | }
1036 | layer {
1037 |   name: "conv17_2/relu"
1038 |   type: "ReLU"
1039 |   bottom: "conv17_2"
1040 |   top: "conv17_2"
1041 | }
1042 | layer {
1043 |   name: "conv11_mbox_loc"
1044 |   type: "Convolution"
1045 |   bottom: "conv11"
1046 |   top: "conv11_mbox_loc"
1047 |   param {
1048 |     lr_mult: 1.0
1049 |     decay_mult: 1.0
1050 |   }
1051 |   param {
1052 |     lr_mult: 2.0
1053 |     decay_mult: 0.0
1054 |   }
1055 |   convolution_param {
1056 |     num_output: 12
1057 |     kernel_size: 1
1058 |     weight_filler {
1059 |       type: "msra"
1060 |     }
1061 |     bias_filler {
1062 |       type: "constant"
1063 |       value: 0.0
1064 |     }
1065 |   }
1066 | }
1067 | layer {
1068 |   name: "conv11_mbox_loc_perm"
1069 |   type: "Permute"
1070 |   bottom: "conv11_mbox_loc"
1071 |   top: "conv11_mbox_loc_perm"
1072 |   permute_param {
1073 |     order: 0
1074 |     order: 2
1075 |     order: 3
1076 |     order: 1
1077 |   }
1078 | }
1079 | layer {
1080 |   name: "conv11_mbox_loc_flat"
1081 |   type: "Flatten"
1082 |   bottom: "conv11_mbox_loc_perm"
1083 |   top: "conv11_mbox_loc_flat"
1084 |   flatten_param {
1085 |     axis: 1
1086 |   }
1087 | }
1088 | layer {
1089 |   name: "conv11_mbox_conf_new"
1090 |   type: "Convolution"
1091 |   bottom: "conv11"
1092 |   top: "conv11_mbox_conf"
1093 |   param {
1094 |     lr_mult: 1.0
1095 |     decay_mult: 1.0
1096 |   }
1097 |   param {
1098 |     lr_mult: 2.0
1099 |     decay_mult: 0.0
1100 |   }
1101 |   convolution_param {
1102 |     num_output: 18
1103 |     kernel_size: 1
1104 |     weight_filler {
1105 |       type: "msra"
1106 |     }
1107 |     bias_filler {
1108 |       type: "constant"
1109 |       value: 0.0
1110 |     }
1111 |   }
1112 | }
1113 | layer {
1114 |   name: "conv11_mbox_conf_perm"
1115 |   type: "Permute"
1116 |   bottom: "conv11_mbox_conf"
1117 |   top: "conv11_mbox_conf_perm"
1118 |   permute_param {
1119 |     order: 0
1120 |     order: 2
1121 |     order: 3
1122 |     order: 1
1123 |   }
1124 | }
1125 | layer {
1126 |   name: "conv11_mbox_conf_flat"
1127 |   type: "Flatten"
1128 |   bottom: "conv11_mbox_conf_perm"
1129 |   top: "conv11_mbox_conf_flat"
1130 |   flatten_param {
1131 |     axis: 1
1132 |   }
1133 | }
1134 | layer {
1135 |   name: "conv11_mbox_priorbox"
1136 |   type: "PriorBox"
1137 |   bottom: "conv11"
1138 |   bottom: "data"
1139 |   top: "conv11_mbox_priorbox"
1140 |   prior_box_param {
1141 |     min_size: 30.0
1142 |     aspect_ratio: 2.0
1143 |     flip: true
1144 |     clip: false
1145 |     variance: 0.1
1146 |     variance: 0.1
1147 |     variance: 0.2
1148 |     variance: 0.2
1149 |     offset: 0.5
1150 |   }
1151 | }
1152 | layer {
1153 |   name: "conv13_mbox_loc"
1154 |   type: "Convolution"
1155 |   bottom: "conv13"
1156 |   top: "conv13_mbox_loc"
1157 |   param {
1158 |     lr_mult: 1.0
1159 |     decay_mult: 1.0
1160 |   }
1161 |   param {
1162 |     lr_mult: 2.0
1163 |     decay_mult: 0.0
1164 |   }
1165 |   convolution_param {
1166 |     num_output: 24
1167 |     kernel_size: 1
1168 |     weight_filler {
1169 |       type: "msra"
1170 |     }
1171 |     bias_filler {
1172 |       type: "constant"
1173 |       value: 0.0
1174 |     }
1175 |   }
1176 | }
1177 | layer {
1178 |   name: "conv13_mbox_loc_perm"
1179 |   type: "Permute"
1180 |   bottom: "conv13_mbox_loc"
1181 |   top: "conv13_mbox_loc_perm"
1182 |   permute_param {
1183 |     order: 0
1184 |     order: 2
1185 |     order: 3
1186 |     order: 1
1187 |   }
1188 | }
1189 | layer {
1190 |   name: "conv13_mbox_loc_flat"
1191 |   type: "Flatten"
1192 |   bottom: "conv13_mbox_loc_perm"
1193 |   top: "conv13_mbox_loc_flat"
1194 |   flatten_param {
1195 |     axis: 1
1196 |   }
1197 | }
1198 | layer {
1199 |   name: "conv13_mbox_conf_new"
1200 |   type: "Convolution"
1201 |   bottom: "conv13"
1202 |   top: "conv13_mbox_conf"
1203 |   param {
1204 |     lr_mult: 1.0
1205 |     decay_mult: 1.0
1206 |   }
1207 |   param {
1208 |     lr_mult: 2.0
1209 |     decay_mult: 0.0
1210 |   }
1211 |   convolution_param {
1212 |     num_output: 36
1213 |     kernel_size: 1
1214 |     weight_filler {
1215 |       type: "msra"
1216 |     }
1217 |     bias_filler {
1218 |       type: "constant"
1219 |       value: 0.0
1220 |     }
1221 |   }
1222 | }
1223 | layer {
1224 |   name: "conv13_mbox_conf_perm"
1225 |   type: "Permute"
1226 |   bottom: "conv13_mbox_conf"
1227 |   top: "conv13_mbox_conf_perm"
1228 |   permute_param {
1229 |     order: 0
1230 |     order: 2
1231 |     order: 3
1232 |     order: 1
1233 |   }
1234 | }
1235 | layer {
1236 |   name: "conv13_mbox_conf_flat"
1237 |   type: "Flatten"
1238 |   bottom: "conv13_mbox_conf_perm"
1239 |   top: "conv13_mbox_conf_flat"
1240 |   flatten_param {
1241 |     axis: 1
1242 |   }
1243 | }
1244 | layer {
1245 |   name: "conv13_mbox_priorbox"
1246 |   type: "PriorBox"
1247 |   bottom: "conv13"
1248 |   bottom: "data"
1249 |   top: "conv13_mbox_priorbox"
1250 |   prior_box_param {
1251 |     min_size: 60.0
1252 |     max_size: 100.0
1253 |     aspect_ratio: 2.0
1254 |     aspect_ratio: 3.0
1255 |     flip: true
1256 |     clip: false
1257 |     variance: 0.1
1258 |     variance: 0.1
1259 |     variance: 0.2
1260 |     variance: 0.2
1261 |     offset: 0.5
1262 |   }
1263 | }
1264 | layer {
1265 |   name: "conv14_2_mbox_loc"
1266 |   type: "Convolution"
1267 |   bottom: "conv14_2"
1268 |   top: "conv14_2_mbox_loc"
1269 |   param {
1270 |     lr_mult: 1.0
1271 |     decay_mult: 1.0
1272 |   }
1273 |   param {
1274 |     lr_mult: 2.0
1275 |     decay_mult: 0.0
1276 |   }
1277 |   convolution_param {
1278 |     num_output: 24
1279 |     kernel_size: 1
1280 |     weight_filler {
1281 |       type: "msra"
1282 |     }
1283 |     bias_filler {
1284 |       type: "constant"
1285 |       value: 0.0
1286 |     }
1287 |   }
1288 | }
1289 | layer {
1290 |   name: "conv14_2_mbox_loc_perm"
1291 |   type: "Permute"
1292 |   bottom: "conv14_2_mbox_loc"
1293 |   top: "conv14_2_mbox_loc_perm"
1294 |   permute_param {
1295 |     order: 0
1296 |     order: 2
1297 |     order: 3
1298 |     order: 1
1299 |   }
1300 | }
1301 | layer {
1302 |   name: "conv14_2_mbox_loc_flat"
1303 |   type: "Flatten"
1304 |   bottom: "conv14_2_mbox_loc_perm"
1305 |   top: "conv14_2_mbox_loc_flat"
1306 |   flatten_param {
1307 |     axis: 1
1308 |   }
1309 | }
1310 | layer {
1311 |   name: "conv14_2_mbox_conf_new"
1312 |   type: "Convolution"
1313 |   bottom: "conv14_2"
1314 |   top: "conv14_2_mbox_conf"
1315 |   param {
1316 |     lr_mult: 1.0
1317 |     decay_mult: 1.0
1318 |   }
1319 |   param {
1320 |     lr_mult: 2.0
1321 |     decay_mult: 0.0
1322 |   }
1323 |   convolution_param {
1324 |     num_output: 36
1325 |     kernel_size: 1
1326 |     weight_filler {
1327 |       type: "msra"
1328 |     }
1329 |     bias_filler {
1330 |       type: "constant"
1331 |       value: 0.0
1332 |     }
1333 |   }
1334 | }
1335 | layer {
1336 |   name: "conv14_2_mbox_conf_perm"
1337 |   type: "Permute"
1338 |   bottom: "conv14_2_mbox_conf"
1339 |   top: "conv14_2_mbox_conf_perm"
1340 |   permute_param {
1341 |     order: 0
1342 |     order: 2
1343 |     order: 3
1344 |     order: 1
1345 |   }
1346 | }
1347 | layer {
1348 |   name: "conv14_2_mbox_conf_flat"
1349 |   type: "Flatten"
1350 |   bottom: "conv14_2_mbox_conf_perm"
1351 |   top: "conv14_2_mbox_conf_flat"
1352 |   flatten_param {
1353 |     axis: 1
1354 |   }
1355 | }
1356 | layer {
1357 |   name: "conv14_2_mbox_priorbox"
1358 |   type: "PriorBox"
1359 |   bottom: "conv14_2"
1360 |   bottom: "data"
1361 |   top: "conv14_2_mbox_priorbox"
1362 |   prior_box_param {
1363 |     min_size: 100.0
1364 |     max_size: 140.0
1365 |     aspect_ratio: 2.0
1366 |     aspect_ratio: 3.0
1367 |     flip: true
1368 |     clip: false
1369 |     variance: 0.1
1370 |     variance: 0.1
1371 |     variance: 0.2
1372 |     variance: 0.2
1373 |     offset: 0.5
1374 |   }
1375 | }
1376 | layer {
1377 |   name: "conv15_2_mbox_loc"
1378 |   type: "Convolution"
1379 |   bottom: "conv15_2"
1380 |   top: "conv15_2_mbox_loc"
1381 |   param {
1382 |     lr_mult: 1.0
1383 |     decay_mult: 1.0
1384 |   }
1385 |   param {
1386 |     lr_mult: 2.0
1387 |     decay_mult: 0.0
1388 |   }
1389 |   convolution_param {
1390 |     num_output: 24
1391 |     kernel_size: 1
1392 |     weight_filler {
1393 |       type: "msra"
1394 |     }
1395 |     bias_filler {
1396 |       type: "constant"
1397 |       value: 0.0
1398 |     }
1399 |   }
1400 | }
1401 | layer {
1402 |   name: "conv15_2_mbox_loc_perm"
1403 |   type: "Permute"
1404 |   bottom: "conv15_2_mbox_loc"
1405 |   top: "conv15_2_mbox_loc_perm"
1406 |   permute_param {
1407 |     order: 0
1408 |     order: 2
1409 |     order: 3
1410 |     order: 1
1411 |   }
1412 | }
1413 | layer {
1414 |   name: "conv15_2_mbox_loc_flat"
1415 |   type: "Flatten"
1416 |   bottom: "conv15_2_mbox_loc_perm"
1417 |   top: "conv15_2_mbox_loc_flat"
1418 |   flatten_param {
1419 |     axis: 1
1420 |   }
1421 | }
1422 | layer {
1423 |   name: "conv15_2_mbox_conf_new"
1424 |   type: "Convolution"
1425 |   bottom: "conv15_2"
1426 |   top: "conv15_2_mbox_conf"
1427 |   param {
1428 |     lr_mult: 1.0
1429 |     decay_mult: 1.0
1430 |   }
1431 |   param {
1432 |     lr_mult: 2.0
1433 |     decay_mult: 0.0
1434 |   }
1435 |   convolution_param {
1436 |     num_output: 36
1437 |     kernel_size: 1
1438 |     weight_filler {
1439 |       type: "msra"
1440 |     }
1441 |     bias_filler {
1442 |       type: "constant"
1443 |       value: 0.0
1444 |     }
1445 |   }
1446 | }
1447 | layer {
1448 |   name: "conv15_2_mbox_conf_perm"
1449 |   type: "Permute"
1450 |   bottom: "conv15_2_mbox_conf"
1451 |   top: "conv15_2_mbox_conf_perm"
1452 |   permute_param {
1453 |     order: 0
1454 |     order: 2
1455 |     order: 3
1456 |     order: 1
1457 |   }
1458 | }
1459 | layer {
1460 |   name: "conv15_2_mbox_conf_flat"
1461 |   type: "Flatten"
1462 |   bottom: "conv15_2_mbox_conf_perm"
1463 |   top: "conv15_2_mbox_conf_flat"
1464 |   flatten_param {
1465 |     axis: 1
1466 |   }
1467 | }
1468 | layer {
1469 |   name: "conv15_2_mbox_priorbox"
1470 |   type: "PriorBox"
1471 |   bottom: "conv15_2"
1472 |   bottom: "data"
1473 |   top: "conv15_2_mbox_priorbox"
1474 |   prior_box_param {
1475 |     min_size: 140.0
1476 |     max_size: 200.0
1477 |     aspect_ratio: 2.0
1478 |     aspect_ratio: 3.0
1479 |     flip: true
1480 |     clip: false
1481 |     variance: 0.1
1482 |     variance: 0.1
1483 |     variance: 0.2
1484 |     variance: 0.2
1485 |     offset: 0.5
1486 |   }
1487 | }
1488 | layer {
1489 |   name: "conv16_2_mbox_loc"
1490 |   type: "Convolution"
1491 |   bottom: "conv16_2"
1492 |   top: "conv16_2_mbox_loc"
1493 |   param {
1494 |     lr_mult: 1.0
1495 |     decay_mult: 1.0
1496 |   }
1497 |   param {
1498 |     lr_mult: 2.0
1499 |     decay_mult: 0.0
1500 |   }
1501 |   convolution_param {
1502 |     num_output: 24
1503 |     kernel_size: 1
1504 |     weight_filler {
1505 |       type: "msra"
1506 |     }
1507 |     bias_filler {
1508 |       type: "constant"
1509 |       value: 0.0
1510 |     }
1511 |   }
1512 | }
1513 | layer {
1514 |   name: "conv16_2_mbox_loc_perm"
1515 |   type: "Permute"
1516 |   bottom: "conv16_2_mbox_loc"
1517 |   top: "conv16_2_mbox_loc_perm"
1518 |   permute_param {
1519 |     order: 0
1520 |     order: 2
1521 |     order: 3
1522 |     order: 1
1523 |   }
1524 | }
1525 | layer {
1526 |   name: "conv16_2_mbox_loc_flat"
1527 |   type: "Flatten"
1528 |   bottom: "conv16_2_mbox_loc_perm"
1529 |   top: "conv16_2_mbox_loc_flat"
1530 |   flatten_param {
1531 |     axis: 1
1532 |   }
1533 | }
1534 | layer {
1535 |   name: "conv16_2_mbox_conf_new"
1536 |   type: "Convolution"
1537 |   bottom: "conv16_2"
1538 |   top: "conv16_2_mbox_conf"
1539 |   param {
1540 |     lr_mult: 1.0
1541 |     decay_mult: 1.0
1542 |   }
1543 |   param {
1544 |     lr_mult: 2.0
1545 |     decay_mult: 0.0
1546 |   }
1547 |   convolution_param {
1548 |     num_output: 36
1549 |     kernel_size: 1
1550 |     weight_filler {
1551 |       type: "msra"
1552 |     }
1553 |     bias_filler {
1554 |       type: "constant"
1555 |       value: 0.0
1556 |     }
1557 |   }
1558 | }
1559 | layer {
1560 |   name: "conv16_2_mbox_conf_perm"
1561 |   type: "Permute"
1562 |   bottom: "conv16_2_mbox_conf"
1563 |   top: "conv16_2_mbox_conf_perm"
1564 |   permute_param {
1565 |     order: 0
1566 |     order: 2
1567 |     order: 3
1568 |     order: 1
1569 |   }
1570 | }
1571 | layer {
1572 |   name: "conv16_2_mbox_conf_flat"
1573 |   type: "Flatten"
1574 |   bottom: "conv16_2_mbox_conf_perm"
1575 |   top: "conv16_2_mbox_conf_flat"
1576 |   flatten_param {
1577 |     axis: 1
1578 |   }
1579 | }
1580 | layer {
1581 |   name: "conv16_2_mbox_priorbox"
1582 |   type: "PriorBox"
1583 |   bottom: "conv16_2"
1584 |   bottom: "data"
1585 |   top: "conv16_2_mbox_priorbox"
1586 |   prior_box_param {
1587 |     min_size: 200.0
1588 |     max_size: 240.0
1589 |     aspect_ratio: 2.0
1590 |     aspect_ratio: 3.0
1591 |     flip: true
1592 |     clip: false
1593 |     variance: 0.1
1594 |     variance: 0.1
1595 |     variance: 0.2
1596 |     variance: 0.2
1597 |     offset: 0.5
1598 |   }
1599 | }
1600 | layer {
1601 |   name: "conv17_2_mbox_loc"
1602 |   type: "Convolution"
1603 |   bottom: "conv17_2"
1604 |   top: "conv17_2_mbox_loc"
1605 |   param {
1606 |     lr_mult: 1.0
1607 |     decay_mult: 1.0
1608 |   }
1609 |   param {
1610 |     lr_mult: 2.0
1611 |     decay_mult: 0.0
1612 |   }
1613 |   convolution_param {
1614 |     num_output: 24
1615 |     kernel_size: 1
1616 |     weight_filler {
1617 |       type: "msra"
1618 |     }
1619 |     bias_filler {
1620 |       type: "constant"
1621 |       value: 0.0
1622 |     }
1623 |   }
1624 | }
1625 | layer {
1626 |   name: "conv17_2_mbox_loc_perm"
1627 |   type: "Permute"
1628 |   bottom: "conv17_2_mbox_loc"
1629 |   top: "conv17_2_mbox_loc_perm"
1630 |   permute_param {
1631 |     order: 0
1632 |     order: 2
1633 |     order: 3
1634 |     order: 1
1635 |   }
1636 | }
1637 | layer {
1638 |   name: "conv17_2_mbox_loc_flat"
1639 |   type: "Flatten"
1640 |   bottom: "conv17_2_mbox_loc_perm"
1641 |   top: "conv17_2_mbox_loc_flat"
1642 |   flatten_param {
1643 |     axis: 1
1644 |   }
1645 | }
1646 | layer {
1647 |   name: "conv17_2_mbox_conf_new"
1648 |   type: "Convolution"
1649 |   bottom: "conv17_2"
1650 |   top: "conv17_2_mbox_conf"
1651 |   param {
1652 |     lr_mult: 1.0
1653 |     decay_mult: 1.0
1654 |   }
1655 |   param {
1656 |     lr_mult: 2.0
1657 |     decay_mult: 0.0
1658 |   }
1659 |   convolution_param {
1660 |     num_output: 36
1661 |     kernel_size: 1
1662 |     weight_filler {
1663 |       type: "msra"
1664 |     }
1665 |     bias_filler {
1666 |       type: "constant"
1667 |       value: 0.0
1668 |     }
1669 |   }
1670 | }
1671 | layer {
1672 |   name: "conv17_2_mbox_conf_perm"
1673 |   type: "Permute"
1674 |   bottom: "conv17_2_mbox_conf"
1675 |   top: "conv17_2_mbox_conf_perm"
1676 |   permute_param {
1677 |     order: 0
1678 |     order: 2
1679 |     order: 3
1680 |     order: 1
1681 |   }
1682 | }
1683 | layer {
1684 |   name: "conv17_2_mbox_conf_flat"
1685 |   type: "Flatten"
1686 |   bottom: "conv17_2_mbox_conf_perm"
1687 |   top: "conv17_2_mbox_conf_flat"
1688 |   flatten_param {
1689 |     axis: 1
1690 |   }
1691 | }
1692 | layer {
1693 |   name: "conv17_2_mbox_priorbox"
1694 |   type: "PriorBox"
1695 |   bottom: "conv17_2"
1696 |   bottom: "data"
1697 |   top: "conv17_2_mbox_priorbox"
1698 |   prior_box_param {
1699 |     min_size: 240.0
1700 |     max_size: 300.0
1701 |     aspect_ratio: 2.0
1702 |     aspect_ratio: 3.0
1703 |     flip: true
1704 |     clip: false
1705 |     variance: 0.1
1706 |     variance: 0.1
1707 |     variance: 0.2
1708 |     variance: 0.2
1709 |     offset: 0.5
1710 |   }
1711 | }
1712 | layer {
1713 |   name: "mbox_loc"
1714 |   type: "Concat"
1715 |   bottom: "conv11_mbox_loc_flat"
1716 |   bottom: "conv13_mbox_loc_flat"
1717 |   bottom: "conv14_2_mbox_loc_flat"
1718 |   bottom: "conv15_2_mbox_loc_flat"
1719 |   bottom: "conv16_2_mbox_loc_flat"
1720 |   bottom: "conv17_2_mbox_loc_flat"
1721 |   top: "mbox_loc"
1722 |   concat_param {
1723 |     axis: 1
1724 |   }
1725 | }
1726 | layer {
1727 |   name: "mbox_conf"
1728 |   type: "Concat"
1729 |   bottom: "conv11_mbox_conf_flat"
1730 |   bottom: "conv13_mbox_conf_flat"
1731 |   bottom: "conv14_2_mbox_conf_flat"
1732 |   bottom: "conv15_2_mbox_conf_flat"
1733 |   bottom: "conv16_2_mbox_conf_flat"
1734 |   bottom: "conv17_2_mbox_conf_flat"
1735 |   top: "mbox_conf"
1736 |   concat_param {
1737 |     axis: 1
1738 |   }
1739 | }
1740 | layer {
1741 |   name: "mbox_priorbox"
1742 |   type: "Concat"
1743 |   bottom: "conv11_mbox_priorbox"
1744 |   bottom: "conv13_mbox_priorbox"
1745 |   bottom: "conv14_2_mbox_priorbox"
1746 |   bottom: "conv15_2_mbox_priorbox"
1747 |   bottom: "conv16_2_mbox_priorbox"
1748 |   bottom: "conv17_2_mbox_priorbox"
1749 |   top: "mbox_priorbox"
1750 |   concat_param {
1751 |     axis: 2
1752 |   }
1753 | }
1754 | layer {
1755 |   name: "mbox_conf_reshape"
1756 |   type: "Reshape"
1757 |   bottom: "mbox_conf"
1758 |   top: "mbox_conf_reshape"
1759 |   reshape_param {
1760 |     shape {
1761 |       dim: 0
1762 |       dim: -1
1763 |       dim: 6
1764 |     }
1765 |   }
1766 | }
1767 | layer {
1768 |   name: "mbox_conf_softmax"
1769 |   type: "Softmax"
1770 |   bottom: "mbox_conf_reshape"
1771 |   top: "mbox_conf_softmax"
1772 |   softmax_param {
1773 |     axis: 2
1774 |   }
1775 | }
1776 | layer {
1777 |   name: "mbox_conf_flatten"
1778 |   type: "Flatten"
1779 |   bottom: "mbox_conf_softmax"
1780 |   top: "mbox_conf_flatten"
1781 |   flatten_param {
1782 |     axis: 1
1783 |   }
1784 | }
1785 | layer {
1786 |   name: "detection_out"
1787 |   type: "DetectionOutput"
1788 |   bottom: "mbox_loc"
1789 |   bottom: "mbox_conf_flatten"
1790 |   bottom: "mbox_priorbox"
1791 |   top: "detection_out"
1792 |   include {
1793 |     phase: TEST
1794 |   }
1795 |   detection_output_param {
1796 |     num_classes: 6
1797 |     share_location: true
1798 |     background_label_id: 0
1799 |     nms_param {
1800 |       nms_threshold: 0.45
1801 |       top_k: 100
1802 |     }
1803 |     code_type: CENTER_SIZE
1804 |     keep_top_k: 100
1805 |     confidence_threshold: 0.25
1806 |   }
1807 | }
1808 | 


--------------------------------------------------------------------------------
/SSD/MobileNet/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/MobileNet/train.prototxt"
 2 | test_net: "models/MobileNet/test.prototxt"
 3 | test_iter: 673
 4 | test_interval: 10000
 5 | base_lr: 0.0005
 6 | display: 10
 7 | max_iter: 120000
 8 | lr_policy: "multistep"
 9 | gamma: 0.5
10 | weight_decay: 0.00005
11 | snapshot: 1000
12 | snapshot_prefix: "models/MobileNet/MobileNetSSD_deploy"
13 | solver_mode: CPU
14 | debug_info: false
15 | snapshot_after_train: true
16 | test_initialization: false
17 | average_loss: 10
18 | stepvalue: 20000
19 | stepvalue: 40000
20 | iter_size: 1
21 | type: "RMSProp"
22 | eval_type: "detection"
23 | ap_version: "11point"


--------------------------------------------------------------------------------
/SSD/MobileNet/solver_test.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/MobileNet/train.prototxt"
 2 | test_net: "models/MobileNet/test.prototxt"
 3 | test_iter: 673
 4 | test_interval: 10000
 5 | base_lr: 0.0005
 6 | display: 10
 7 | max_iter: 0
 8 | lr_policy: "multistep"
 9 | gamma: 0.5
10 | weight_decay: 0.00005
11 | snapshot: 0
12 | snapshot_prefix: "snapshot/mobilenet"
13 | solver_mode: CPU
14 | debug_info: false
15 | snapshot_after_train: false
16 | test_initialization: true
17 | average_loss: 10
18 | stepvalue: 20000
19 | stepvalue: 40000
20 | iter_size: 1
21 | type: "RMSProp"
22 | eval_type: "detection"
23 | ap_version: "11point"


--------------------------------------------------------------------------------
/SSD/MobileNet_V2/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/MobileNetV2/train_voc.prototxt"
 2 | #test_net: "models/MobileNetV2/test.prototxt"
 3 | #test_iter: 673
 4 | #test_interval: 10000
 5 | base_lr: 0.0005
 6 | display: 10
 7 | max_iter: 50000
 8 | lr_policy: "multistep"
 9 | gamma: 0.5
10 | weight_decay: 0.000004
11 | snapshot: 1000
12 | snapshot_prefix: "models/MobileNetV2/MobileNetSSD_deploy"
13 | solver_mode: GPU
14 | debug_info: false
15 | snapshot_after_train: true
16 | test_initialization: false
17 | average_loss: 10
18 | stepvalue: 8000
19 | stepvalue: 16000
20 | stepvalue: 32000
21 | iter_size: 1
22 | type: "RMSProp"
23 | eval_type: "detection"
24 | ap_version: "11point"


--------------------------------------------------------------------------------
/YOLO/voc.data:
--------------------------------------------------------------------------------
1 | classes= 5
2 | train  = data/voc/2017_trainval.txt
3 | valid  = data/voc/2017_test.txt
4 | names = data/voc.names
5 | backup = backup
6 | 
7 | 


--------------------------------------------------------------------------------
/YOLO/voc.names:
--------------------------------------------------------------------------------
1 | bicycle
2 | car
3 | motorbike
4 | person
5 | cones


--------------------------------------------------------------------------------
/YOLO/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 |  batch=32
  7 |  subdivisions=4
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 50020
 21 | policy=steps
 22 | steps=40000,45000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=30
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | #anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | anchors = 5,7,  11,13,  18,29,  40,41,  119,148,  289,253
136 | classes=5
137 | #classes=80
138 | num=6
139 | jitter=.3
140 | ignore_thresh = .7
141 | truth_thresh = 1
142 | random=1
143 | 
144 | [route]
145 | layers = -4
146 | 
147 | [convolutional]
148 | batch_normalize=1
149 | filters=128
150 | size=1
151 | stride=1
152 | pad=1
153 | activation=leaky
154 | 
155 | [upsample]
156 | stride=2
157 | 
158 | [route]
159 | layers = -1, 8
160 | 
161 | [convolutional]
162 | batch_normalize=1
163 | filters=256
164 | size=3
165 | stride=1
166 | pad=1
167 | activation=leaky
168 | 
169 | [convolutional]
170 | size=1
171 | stride=1
172 | pad=1
173 | filters=30
174 | activation=linear
175 | 
176 | [yolo]
177 | mask = 0,1,2
178 | #anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
179 | #anchors = 5,7,  7,5,  18,29,  29,18,37,58,58,37,81,82,  135,169,  344,319
180 | anchors = 5,7,  11,13,  18,29,  40,41,  119,148,  289,253
181 | classes=5
182 | num=6
183 | jitter=.3
184 | ignore_thresh = .7
185 | truth_thresh = 1
186 | random=1
187 | 


--------------------------------------------------------------------------------
/YOLO/yolov3-tiny_final.weights:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Vehicle-Detection/caedb24b289b1c4774b85ecc15f60cf6b040bec6/YOLO/yolov3-tiny_final.weights


--------------------------------------------------------------------------------
/YOLO/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 |  batch=32
  7 |  subdivisions=16
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 50020
 21 | policy=steps
 22 | steps=40000,45000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=30
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | #anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | anchors = 5,7,  11,13,  18,29,  40,41,  119,148,  289,253
611 | classes=5
612 | num=9
613 | jitter=.3
614 | ignore_thresh = .7
615 | truth_thresh = 1
616 | random=1
617 | 
618 | 
619 | [route]
620 | layers = -4
621 | 
622 | [convolutional]
623 | batch_normalize=1
624 | filters=256
625 | size=1
626 | stride=1
627 | pad=1
628 | activation=leaky
629 | 
630 | [upsample]
631 | stride=2
632 | 
633 | [route]
634 | layers = -1, 61
635 | 
636 | 
637 | 
638 | [convolutional]
639 | batch_normalize=1
640 | filters=256
641 | size=1
642 | stride=1
643 | pad=1
644 | activation=leaky
645 | 
646 | [convolutional]
647 | batch_normalize=1
648 | size=3
649 | stride=1
650 | pad=1
651 | filters=512
652 | activation=leaky
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [convolutional]
663 | batch_normalize=1
664 | size=3
665 | stride=1
666 | pad=1
667 | filters=512
668 | activation=leaky
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | size=1
688 | stride=1
689 | pad=1
690 | filters=30
691 | activation=linear
692 | 
693 | 
694 | [yolo]
695 | mask = 3,4,5
696 | #anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
697 | anchors = 5,7,  11,13,  18,29,  40,41,  119,148,  289,253
698 | classes=5
699 | num=9
700 | jitter=.3
701 | ignore_thresh = .7
702 | truth_thresh = 1
703 | random=1
704 | 
705 | 
706 | 
707 | [route]
708 | layers = -4
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | filters=128
713 | size=1
714 | stride=1
715 | pad=1
716 | activation=leaky
717 | 
718 | [upsample]
719 | stride=2
720 | 
721 | [route]
722 | layers = -1, 36
723 | 
724 | 
725 | 
726 | [convolutional]
727 | batch_normalize=1
728 | filters=128
729 | size=1
730 | stride=1
731 | pad=1
732 | activation=leaky
733 | 
734 | [convolutional]
735 | batch_normalize=1
736 | size=3
737 | stride=1
738 | pad=1
739 | filters=256
740 | activation=leaky
741 | 
742 | [convolutional]
743 | batch_normalize=1
744 | filters=128
745 | size=1
746 | stride=1
747 | pad=1
748 | activation=leaky
749 | 
750 | [convolutional]
751 | batch_normalize=1
752 | size=3
753 | stride=1
754 | pad=1
755 | filters=256
756 | activation=leaky
757 | 
758 | [convolutional]
759 | batch_normalize=1
760 | filters=128
761 | size=1
762 | stride=1
763 | pad=1
764 | activation=leaky
765 | 
766 | [convolutional]
767 | batch_normalize=1
768 | size=3
769 | stride=1
770 | pad=1
771 | filters=256
772 | activation=leaky
773 | 
774 | [convolutional]
775 | size=1
776 | stride=1
777 | pad=1
778 | filters=30
779 | activation=linear
780 | 
781 | 
782 | [yolo]
783 | mask = 0,1,2
784 | #anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
785 | anchors = 5,7,  11,13,  18,29,  40,41,  119,148,  289,253
786 | classes=5
787 | num=9
788 | jitter=.3
789 | ignore_thresh = .7
790 | truth_thresh = 1
791 | random=1
792 | 
793 | 


--------------------------------------------------------------------------------
/faster_rcnn_end2end_avs/VGG19/faster_rcnn_end2end/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "../../models/intel_optimized_models/faster-rcnn/pascal_voc/VGG19/faster_rcnn_end2end/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 50000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg19_faster_rcnn"
16 | iter_size: 2
17 | 


--------------------------------------------------------------------------------
/faster_rcnn_end2end_avs/VGG19/faster_rcnn_end2end/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_19_layer"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 672
  8 |   dim: 672
  9 | }
 10 | 
 11 | input: "im_info"
 12 | input_shape {
 13 |   dim: 1
 14 |   dim: 3
 15 | }
 16 | 
 17 | layer {
 18 |   bottom: "data"
 19 |   top: "conv1_1"
 20 |   name: "conv1_1"
 21 |   type: "Convolution"
 22 |   param {
 23 |     lr_mult: 0
 24 |     decay_mult: 0
 25 |   }
 26 |   param {
 27 |     lr_mult: 0
 28 |     decay_mult: 0
 29 |   }
 30 |   convolution_param {
 31 |     num_output: 64
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   bottom: "conv1_1"
 38 |   top: "conv1_1"
 39 |   name: "relu1_1"
 40 |   type: "ReLU"
 41 | }
 42 | layer {
 43 |   bottom: "conv1_1"
 44 |   top: "conv1_2"
 45 |   name: "conv1_2"
 46 |   type: "Convolution"
 47 |   param {
 48 |     lr_mult: 0
 49 |     decay_mult: 0
 50 |   }
 51 |   param {
 52 |     lr_mult: 0
 53 |     decay_mult: 0
 54 |   }
 55 |   convolution_param {
 56 |     num_output: 64
 57 |     pad: 1
 58 |     kernel_size: 3
 59 |   }
 60 | }
 61 | layer {
 62 |   bottom: "conv1_2"
 63 |   top: "conv1_2"
 64 |   name: "relu1_2"
 65 |   type: "ReLU"
 66 | }
 67 | layer {
 68 |   bottom: "conv1_2"
 69 |   top: "pool1"
 70 |   name: "pool1"
 71 |   type: "Pooling"
 72 |   pooling_param {
 73 |     pool: MAX
 74 |     kernel_size: 2
 75 |     stride: 2
 76 |   }
 77 | }
 78 | layer {
 79 |   bottom: "pool1"
 80 |   top: "conv2_1"
 81 |   name: "conv2_1"
 82 |   type: "Convolution"
 83 |   param {
 84 |     lr_mult: 0
 85 |     decay_mult: 0
 86 |   }
 87 |   param {
 88 |     lr_mult: 0
 89 |     decay_mult: 0
 90 |   }
 91 |   convolution_param {
 92 |     num_output: 128
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layer {
 98 |   bottom: "conv2_1"
 99 |   top: "conv2_1"
100 |   name: "relu2_1"
101 |   type: "ReLU"
102 | }
103 | layer {
104 |   bottom: "conv2_1"
105 |   top: "conv2_2"
106 |   name: "conv2_2"
107 |   type: "Convolution"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }  
116 |   convolution_param {
117 |     num_output: 128
118 |     pad: 1
119 |     kernel_size: 3
120 |   }
121 | }
122 | layer {
123 |   bottom: "conv2_2"
124 |   top: "conv2_2"
125 |   name: "relu2_2"
126 |   type: "ReLU"
127 | }
128 | layer {
129 |   bottom: "conv2_2"
130 |   top: "pool2"
131 |   name: "pool2"
132 |   type: "Pooling"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   bottom: "pool2"
141 |   top: "conv3_1"
142 |   name: "conv3_1"
143 |   type: "Convolution"
144 |   param {
145 |     lr_mult: 1
146 |     decay_mult: 1
147 |   }
148 |   param {
149 |     lr_mult: 2
150 |     decay_mult: 0
151 |   }
152 |   convolution_param {
153 |     num_output: 256
154 |     pad: 1
155 |     kernel_size: 3
156 |   }
157 | }
158 | layer {
159 |   bottom: "conv3_1"
160 |   top: "conv3_1"
161 |   name: "relu3_1"
162 |   type: "ReLU"
163 | }
164 | layer {
165 |   bottom: "conv3_1"
166 |   top: "conv3_2"
167 |   name: "conv3_2"
168 |   type: "Convolution"
169 |   param {
170 |     lr_mult: 1
171 |     decay_mult: 1
172 |   }
173 |   param {
174 |     lr_mult: 2
175 |     decay_mult: 0
176 |   }
177 |   convolution_param {
178 |     num_output: 256
179 |     pad: 1
180 |     kernel_size: 3
181 |   }
182 | }
183 | layer {
184 |   bottom: "conv3_2"
185 |   top: "conv3_2"
186 |   name: "relu3_2"
187 |   type: "ReLU"
188 | }
189 | layer {
190 |   bottom: "conv3_2"
191 |   top: "conv3_3"
192 |   name: "conv3_3"
193 |   type: "Convolution"
194 |   param {
195 |     lr_mult: 1
196 |     decay_mult: 1
197 |   }
198 |   param {
199 |     lr_mult: 2
200 |     decay_mult: 0
201 |   }
202 |   convolution_param {
203 |     num_output: 256
204 |     pad: 1
205 |     kernel_size: 3
206 |   }
207 | }
208 | layer {
209 |   bottom: "conv3_3"
210 |   top: "conv3_3"
211 |   name: "relu3_3"
212 |   type: "ReLU"
213 | }
214 | layer {
215 |   bottom: "conv3_3"
216 |   top: "conv3_4"
217 |   name: "conv3_4"
218 |   type: "Convolution"
219 |   param {
220 |     lr_mult: 1
221 |     decay_mult: 1
222 |   }
223 |   param {
224 |     lr_mult: 2
225 |     decay_mult: 0
226 |   }
227 |   convolution_param {
228 |     num_output: 256
229 |     pad: 1
230 |     kernel_size: 3
231 |   }
232 | }
233 | layer {
234 |   bottom: "conv3_4"
235 |   top: "conv3_4"
236 |   name: "relu3_4"
237 |   type: "ReLU"
238 | }
239 | layer {
240 |   bottom: "conv3_4"
241 |   top: "pool3"
242 |   name: "pool3"
243 |   type: "Pooling"
244 |   pooling_param {
245 |     pool: MAX
246 |     kernel_size: 2
247 |     stride: 2
248 |   }
249 | }
250 | layer {
251 |   bottom: "pool3"
252 |   top: "conv4_1"
253 |   name: "conv4_1"
254 |   type: "Convolution"
255 |   param {
256 |     lr_mult: 1
257 |     decay_mult: 1
258 |   }
259 |   param {
260 |     lr_mult: 2
261 |     decay_mult: 0
262 |   }
263 |   convolution_param {
264 |     num_output: 512
265 |     pad: 1
266 |     kernel_size: 3
267 |   }
268 | }
269 | layer {
270 |   bottom: "conv4_1"
271 |   top: "conv4_1"
272 |   name: "relu4_1"
273 |   type: "ReLU"
274 | }
275 | layer {
276 |   bottom: "conv4_1"
277 |   top: "conv4_2"
278 |   name: "conv4_2"
279 |   type: "Convolution"
280 |   param {
281 |     lr_mult: 1
282 |     decay_mult: 1
283 |   }
284 |   param {
285 |     lr_mult: 2
286 |     decay_mult: 0
287 |   }
288 |   convolution_param {
289 |     num_output: 512
290 |     pad: 1
291 |     kernel_size: 3
292 |   }
293 | }
294 | layer {
295 |   bottom: "conv4_2"
296 |   top: "conv4_2"
297 |   name: "relu4_2"
298 |   type: "ReLU"
299 | }
300 | layer {
301 |   bottom: "conv4_2"
302 |   top: "conv4_3"
303 |   name: "conv4_3"
304 |   type: "Convolution"
305 |   param {
306 |     lr_mult: 1
307 |     decay_mult: 1
308 |   }
309 |   param {
310 |     lr_mult: 2
311 |     decay_mult: 0
312 |   }
313 |   convolution_param {
314 |     num_output: 512
315 |     pad: 1
316 |     kernel_size: 3
317 |   }
318 | }
319 | layer {
320 |   bottom: "conv4_3"
321 |   top: "conv4_3"
322 |   name: "relu4_3"
323 |   type: "ReLU"
324 | }
325 | layer {
326 |   bottom: "conv4_3"
327 |   top: "conv4_4"
328 |   name: "conv4_4"
329 |   type: "Convolution"
330 |   param {
331 |     lr_mult: 1
332 |     decay_mult: 1
333 |   }
334 |   param {
335 |     lr_mult: 2
336 |     decay_mult: 0
337 |   }
338 |   convolution_param {
339 |     num_output: 512
340 |     pad: 1
341 |     kernel_size: 3
342 |   }
343 | }
344 | layer {
345 |   bottom: "conv4_4"
346 |   top: "conv4_4"
347 |   name: "relu4_4"
348 |   type: "ReLU"
349 | }
350 | layer {
351 |   bottom: "conv4_4"
352 |   top: "pool4"
353 |   name: "pool4"
354 |   type: "Pooling"
355 |   pooling_param {
356 |     pool: MAX
357 |     kernel_size: 2
358 |     stride: 2
359 |   }
360 | }
361 | layer {
362 |   bottom: "pool4"
363 |   top: "conv5_1"
364 |   name: "conv5_1"
365 |   type: "Convolution"
366 |   param {
367 |     lr_mult: 1
368 |     decay_mult: 1
369 |   }
370 |   param {
371 |     lr_mult: 2
372 |     decay_mult: 0
373 |   }
374 |   convolution_param {
375 |     num_output: 512
376 |     pad: 1
377 |     kernel_size: 3
378 |   }
379 | }
380 | layer {
381 |   bottom: "conv5_1"
382 |   top: "conv5_1"
383 |   name: "relu5_1"
384 |   type: "ReLU"
385 | }
386 | layer {
387 |   bottom: "conv5_1"
388 |   top: "conv5_2"
389 |   name: "conv5_2"
390 |   type: "Convolution"
391 |   param {
392 |     lr_mult: 1
393 |     decay_mult: 1
394 |   }
395 |   param {
396 |     lr_mult: 2
397 |     decay_mult: 0
398 |   }
399 |   convolution_param {
400 |     num_output: 512
401 |     pad: 1
402 |     kernel_size: 3
403 |   }
404 | }
405 | layer {
406 |   bottom: "conv5_2"
407 |   top: "conv5_2"
408 |   name: "relu5_2"
409 |   type: "ReLU"
410 | }
411 | layer {
412 |   bottom: "conv5_2"
413 |   top: "conv5_3"
414 |   name: "conv5_3"
415 |   type: "Convolution"
416 |   param {
417 |     lr_mult: 1
418 |     decay_mult: 1
419 |   }
420 |   param {
421 |     lr_mult: 2
422 |     decay_mult: 0
423 |   }
424 |   convolution_param {
425 |     num_output: 512
426 |     pad: 1
427 |     kernel_size: 3
428 |   }
429 | }
430 | layer {
431 |   bottom: "conv5_3"
432 |   top: "conv5_3"
433 |   name: "relu5_3"
434 |   type: "ReLU"
435 | }
436 | layer {
437 |   bottom: "conv5_3"
438 |   top: "conv5_4"
439 |   name: "conv5_4"
440 |   type: "Convolution"
441 |   param {
442 |     lr_mult: 1
443 |     decay_mult: 1
444 |   }
445 |   param {
446 |     lr_mult: 2
447 |     decay_mult: 0
448 |   }
449 |   convolution_param {
450 |     num_output: 512
451 |     pad: 1
452 |     kernel_size: 3
453 |   }
454 | }
455 | layer {
456 |   bottom: "conv5_4"
457 |   top: "conv5_4"
458 |   name: "relu5_4"
459 |   type: "ReLU"
460 | }
461 | #========= RPN ============
462 | 
463 | layer {
464 |   name: "rpn_conv/3x3"
465 |   type: "Convolution"
466 |   bottom: "conv5_4"
467 |   top: "rpn/output"
468 |   param { lr_mult: 1.0 decay_mult: 1.0 }
469 |   param { lr_mult: 2.0 decay_mult: 0 }
470 |   convolution_param {
471 |     num_output: 512
472 |     kernel_size: 3 pad: 1 stride: 1
473 |     weight_filler { type: "gaussian" std: 0.01 }
474 |     bias_filler { type: "constant" value: 0 }
475 |   }
476 | }
477 | layer {
478 |   name: "rpn_relu/3x3"
479 |   type: "ReLU"
480 |   bottom: "rpn/output"
481 |   top: "rpn/output"
482 | }
483 | 
484 | layer {
485 |   name: "rpn_cls_score"
486 |   type: "Convolution"
487 |   bottom: "rpn/output"
488 |   top: "rpn_cls_score"
489 |   param { lr_mult: 1.0 decay_mult: 1.0 }
490 |   param { lr_mult: 2.0 decay_mult: 0 }
491 |   convolution_param {
492 |     num_output: 18   # 2(bg/fg) * 9(anchors)
493 |     kernel_size: 1 pad: 0 stride: 1
494 |     weight_filler { type: "gaussian" std: 0.01 }
495 |     bias_filler { type: "constant" value: 0 }
496 |   }
497 | }
498 | layer {
499 |   name: "rpn_bbox_pred"
500 |   type: "Convolution"
501 |   bottom: "rpn/output"
502 |   top: "rpn_bbox_pred"
503 |   param { lr_mult: 1.0 decay_mult: 1.0 }
504 |   param { lr_mult: 2.0 decay_mult: 0 }
505 |   convolution_param {
506 |     num_output: 36   # 4 * 9(anchors)
507 |     kernel_size: 1 pad: 0 stride: 1
508 |     weight_filler { type: "gaussian" std: 0.01 }
509 |     bias_filler { type: "constant" value: 0 }
510 |   }
511 | }
512 | layer {
513 |    bottom: "rpn_cls_score"
514 |    top: "rpn_cls_score_reshape"
515 |    name: "rpn_cls_score_reshape"
516 |    type: "Reshape"
517 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
518 | }
519 | 
520 | #========= RoI Proposal ============
521 | 
522 | layer {
523 |   name: "rpn_cls_prob"
524 |   type: "Softmax"
525 |   bottom: "rpn_cls_score_reshape"
526 |   top: "rpn_cls_prob"
527 | }
528 | layer {
529 |   name: 'rpn_cls_prob_reshape'
530 |   type: 'Reshape'
531 |   bottom: 'rpn_cls_prob'
532 |   top: 'rpn_cls_prob_reshape'
533 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
534 | }
535 | layer {
536 |   name: 'proposal'
537 |   type: 'Python'
538 |   bottom: 'rpn_cls_prob_reshape'
539 |   bottom: 'rpn_bbox_pred'
540 |   bottom: 'im_info'
541 |   top: 'rois'
542 |   python_param {
543 |     module: 'rpn.proposal_layer'
544 |     layer: 'ProposalLayer'
545 |     param_str: "'feat_stride': 16"
546 |   }
547 | }
548 | 
549 | #========= RCNN ============
550 | 
551 | layer {
552 |   name: "roi_pool5"
553 |   type: "ROIPooling"
554 |   bottom: "conv5_4"
555 |   bottom: "rois"
556 |   top: "pool5"
557 |   roi_pooling_param {
558 |     pooled_w: 7
559 |     pooled_h: 7
560 |     spatial_scale: 0.0625 # 1/16
561 |   }
562 | }
563 | layer {
564 |   name: "fc6"
565 |   type: "InnerProduct"
566 |   bottom: "pool5"
567 |   top: "fc6"
568 |   param {
569 |     lr_mult: 1
570 |     decay_mult: 1
571 |   }
572 |   param {
573 |     lr_mult: 2
574 |     decay_mult: 0
575 |   }
576 |   inner_product_param {
577 |     num_output: 4096
578 |   }
579 | }
580 | layer {
581 |   name: "relu6"
582 |   type: "ReLU"
583 |   bottom: "fc6"
584 |   top: "fc6"
585 | }
586 | layer {
587 |   name: "drop6"
588 |   type: "Dropout"
589 |   bottom: "fc6"
590 |   top: "fc6"
591 |   dropout_param {
592 |     dropout_ratio: 0.5
593 |   }
594 | }
595 | layer {
596 |   name: "fc7"
597 |   type: "InnerProduct"
598 |   bottom: "fc6"
599 |   top: "fc7"
600 |   param {
601 |     lr_mult: 1
602 |     decay_mult: 1
603 |   }
604 |   param {
605 |     lr_mult: 2
606 |     decay_mult: 0
607 |   }
608 |   inner_product_param {
609 |     num_output: 4096
610 |   }
611 | }
612 | layer {
613 |   name: "relu7"
614 |   type: "ReLU"
615 |   bottom: "fc7"
616 |   top: "fc7"
617 | }
618 | layer {
619 |   name: "drop7"
620 |   type: "Dropout"
621 |   bottom: "fc7"
622 |   top: "fc7"
623 |   dropout_param {
624 |     dropout_ratio: 0.5
625 |   }
626 | }
627 | layer {
628 |   name: "cls_score2"
629 |   type: "InnerProduct"
630 |   bottom: "fc7"
631 |   top: "cls_score2"
632 |   param {
633 |     lr_mult: 1
634 |     decay_mult: 1
635 |   }
636 |   param {
637 |     lr_mult: 2
638 |     decay_mult: 0
639 |   }
640 |   inner_product_param {
641 |     num_output: 6
642 |     weight_filler {
643 |       type: "gaussian"
644 |       std: 0.01
645 |     }
646 |     bias_filler {
647 |       type: "constant"
648 |       value: 0
649 |     }
650 |   }
651 | }
652 | layer {
653 |   name: "bbox_pred"
654 |   type: "InnerProduct"
655 |   bottom: "fc7"
656 |   top: "bbox_pred"
657 |   param {
658 |     lr_mult: 1
659 |     decay_mult: 1
660 |   }
661 |   param {
662 |     lr_mult: 2
663 |     decay_mult: 0
664 |   }
665 |   inner_product_param {
666 |     num_output: 24
667 |     weight_filler {
668 |       type: "gaussian"
669 |       std: 0.001
670 |     }
671 |     bias_filler {
672 |       type: "constant"
673 |       value: 0
674 |     }
675 |   }
676 | }
677 | layer {
678 |   name: "cls_prob"
679 |   type: "Softmax"
680 |   bottom: "cls_score2"
681 |   top: "cls_prob"
682 | }
683 | 


--------------------------------------------------------------------------------
/faster_rcnn_end2end_avs/VGG19/faster_rcnn_end2end/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_19_layers"
  2 | layer {
  3 |   name: 'input-data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'im_info'
  7 |   top: 'gt_boxes'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 6"
 12 |   }
 13 | }
 14 | layer {
 15 |   bottom: "data"
 16 |   top: "conv1_1"
 17 |   name: "conv1_1"
 18 |   type: "Convolution"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 64
 29 |     pad: 1
 30 |     kernel_size: 3
 31 |   }
 32 | }
 33 | layer {
 34 |   bottom: "conv1_1"
 35 |   top: "conv1_1"
 36 |   name: "relu1_1"
 37 |   type: "ReLU"
 38 | }
 39 | layer {
 40 |   bottom: "conv1_1"
 41 |   top: "conv1_2"
 42 |   name: "conv1_2"
 43 |   type: "Convolution"
 44 |   param {
 45 |     lr_mult: 0
 46 |     decay_mult: 0
 47 |   }
 48 |   param {
 49 |     lr_mult: 0
 50 |     decay_mult: 0
 51 |   }
 52 |   convolution_param {
 53 |     num_output: 64
 54 |     pad: 1
 55 |     kernel_size: 3
 56 |   }
 57 | }
 58 | layer {
 59 |   bottom: "conv1_2"
 60 |   top: "conv1_2"
 61 |   name: "relu1_2"
 62 |   type: "ReLU"
 63 | }
 64 | layer {
 65 |   bottom: "conv1_2"
 66 |   top: "pool1"
 67 |   name: "pool1"
 68 |   type: "Pooling"
 69 |   pooling_param {
 70 |     pool: MAX
 71 |     kernel_size: 2
 72 |     stride: 2
 73 |   }
 74 | }
 75 | layer {
 76 |   bottom: "pool1"
 77 |   top: "conv2_1"
 78 |   name: "conv2_1"
 79 |   type: "Convolution"
 80 |   param {
 81 |     lr_mult: 0
 82 |     decay_mult: 0
 83 |   }
 84 |   param {
 85 |     lr_mult: 0
 86 |     decay_mult: 0
 87 |   }
 88 |   convolution_param {
 89 |     num_output: 128
 90 |     pad: 1
 91 |     kernel_size: 3
 92 |   }
 93 | }
 94 | layer {
 95 |   bottom: "conv2_1"
 96 |   top: "conv2_1"
 97 |   name: "relu2_1"
 98 |   type: "ReLU"
 99 | }
100 | layer {
101 |   bottom: "conv2_1"
102 |   top: "conv2_2"
103 |   name: "conv2_2"
104 |   type: "Convolution"
105 |   param {
106 |     lr_mult: 0
107 |     decay_mult: 0
108 |   }
109 |   param {
110 |     lr_mult: 0
111 |     decay_mult: 0
112 |   }  
113 |   convolution_param {
114 |     num_output: 128
115 |     pad: 1
116 |     kernel_size: 3
117 |   }
118 | }
119 | layer {
120 |   bottom: "conv2_2"
121 |   top: "conv2_2"
122 |   name: "relu2_2"
123 |   type: "ReLU"
124 | }
125 | layer {
126 |   bottom: "conv2_2"
127 |   top: "pool2"
128 |   name: "pool2"
129 |   type: "Pooling"
130 |   pooling_param {
131 |     pool: MAX
132 |     kernel_size: 2
133 |     stride: 2
134 |   }
135 | }
136 | layer {
137 |   bottom: "pool2"
138 |   top: "conv3_1"
139 |   name: "conv3_1"
140 |   type: "Convolution"
141 |   param {
142 |     lr_mult: 1
143 |   }
144 |   param {
145 |     lr_mult: 2
146 |   }
147 |   convolution_param {
148 |     num_output: 256
149 |     pad: 1
150 |     kernel_size: 3
151 |   }
152 | }
153 | layer {
154 |   bottom: "conv3_1"
155 |   top: "conv3_1"
156 |   name: "relu3_1"
157 |   type: "ReLU"
158 | }
159 | layer {
160 |   bottom: "conv3_1"
161 |   top: "conv3_2"
162 |   name: "conv3_2"
163 |   type: "Convolution"
164 |   param {
165 |     lr_mult: 1
166 |   }
167 |   param {
168 |     lr_mult: 2
169 |   }
170 |   convolution_param {
171 |     num_output: 256
172 |     pad: 1
173 |     kernel_size: 3
174 |   }
175 | }
176 | layer {
177 |   bottom: "conv3_2"
178 |   top: "conv3_2"
179 |   name: "relu3_2"
180 |   type: "ReLU"
181 | }
182 | layer {
183 |   bottom: "conv3_2"
184 |   top: "conv3_3"
185 |   name: "conv3_3"
186 |   type: "Convolution"
187 |   param {
188 |     lr_mult: 1
189 |   }
190 |   param {
191 |     lr_mult: 2
192 |   }  
193 |   convolution_param {
194 |     num_output: 256
195 |     pad: 1
196 |     kernel_size: 3
197 |   }
198 | }
199 | layer {
200 |   bottom: "conv3_3"
201 |   top: "conv3_3"
202 |   name: "relu3_3"
203 |   type: "ReLU"
204 | }
205 | layer {
206 |   bottom: "conv3_3"
207 |   top: "conv3_4"
208 |   name: "conv3_4"
209 |   type: "Convolution"
210 |   param {
211 |     lr_mult: 1
212 |   }
213 |   param {
214 |     lr_mult: 2
215 |   }  
216 |   convolution_param {
217 |     num_output: 256
218 |     pad: 1
219 |     kernel_size: 3
220 |   }
221 | }
222 | layer {
223 |   bottom: "conv3_4"
224 |   top: "conv3_4"
225 |   name: "relu3_4"
226 |   type: "ReLU"
227 | }
228 | layer {
229 |   bottom: "conv3_4"
230 |   top: "pool3"
231 |   name: "pool3"
232 |   type: "Pooling"
233 |   pooling_param {
234 |     pool: MAX
235 |     kernel_size: 2
236 |     stride: 2
237 |   }
238 | }
239 | layer {
240 |   bottom: "pool3"
241 |   top: "conv4_1"
242 |   name: "conv4_1"
243 |   type: "Convolution"
244 |   param {
245 |     lr_mult: 1
246 |   }
247 |   param {
248 |     lr_mult: 2
249 |   }  
250 |   convolution_param {
251 |     num_output: 512
252 |     pad: 1
253 |     kernel_size: 3
254 |   }
255 | }
256 | layer {
257 |   bottom: "conv4_1"
258 |   top: "conv4_1"
259 |   name: "relu4_1"
260 |   type: "ReLU"
261 | }
262 | layer {
263 |   bottom: "conv4_1"
264 |   top: "conv4_2"
265 |   name: "conv4_2"
266 |   type: "Convolution"
267 |   param {
268 |     lr_mult: 1
269 |   }
270 |   param {
271 |     lr_mult: 2
272 |   }  
273 |   convolution_param {
274 |     num_output: 512
275 |     pad: 1
276 |     kernel_size: 3
277 |   }
278 | }
279 | layer {
280 |   bottom: "conv4_2"
281 |   top: "conv4_2"
282 |   name: "relu4_2"
283 |   type: "ReLU"
284 | }
285 | layer {
286 |   bottom: "conv4_2"
287 |   top: "conv4_3"
288 |   name: "conv4_3"
289 |   type: "Convolution"
290 |   param {
291 |     lr_mult: 1
292 |   }
293 |   param {
294 |     lr_mult: 2
295 |   }  
296 |   convolution_param {
297 |     num_output: 512
298 |     pad: 1
299 |     kernel_size: 3
300 |   }
301 | }
302 | layer {
303 |   bottom: "conv4_3"
304 |   top: "conv4_3"
305 |   name: "relu4_3"
306 |   type: "ReLU"
307 | }
308 | layer {
309 |   bottom: "conv4_3"
310 |   top: "conv4_4"
311 |   name: "conv4_4"
312 |   type: "Convolution"
313 |   param {
314 |     lr_mult: 1
315 |   }
316 |   param {
317 |     lr_mult: 2
318 |   }  
319 |   convolution_param {
320 |     num_output: 512
321 |     pad: 1
322 |     kernel_size: 3
323 |   }
324 | }
325 | layer {
326 |   bottom: "conv4_4"
327 |   top: "conv4_4"
328 |   name: "relu4_4"
329 |   type: "ReLU"
330 | }
331 | layer {
332 |   bottom: "conv4_4"
333 |   top: "pool4"
334 |   name: "pool4"
335 |   type: "Pooling"
336 |   pooling_param {
337 |     pool: MAX
338 |     kernel_size: 2
339 |     stride: 2
340 |   }
341 | }
342 | layer {
343 |   bottom: "pool4"
344 |   top: "conv5_1"
345 |   name: "conv5_1"
346 |   type: "Convolution"
347 |   param {
348 |     lr_mult: 1
349 |   }
350 |   param {
351 |     lr_mult: 2
352 |   }  
353 |   convolution_param {
354 |     num_output: 512
355 |     pad: 1
356 |     kernel_size: 3
357 |   }
358 | }
359 | layer {
360 |   bottom: "conv5_1"
361 |   top: "conv5_1"
362 |   name: "relu5_1"
363 |   type: "ReLU"
364 | }
365 | layer {
366 |   bottom: "conv5_1"
367 |   top: "conv5_2"
368 |   name: "conv5_2"
369 |   type: "Convolution"
370 |   param {
371 |     lr_mult: 1
372 |   }
373 |   param {
374 |     lr_mult: 2
375 |   }  
376 |   convolution_param {
377 |     num_output: 512
378 |     pad: 1
379 |     kernel_size: 3
380 |   }
381 | }
382 | layer {
383 |   bottom: "conv5_2"
384 |   top: "conv5_2"
385 |   name: "relu5_2"
386 |   type: "ReLU"
387 | }
388 | layer {
389 |   bottom: "conv5_2"
390 |   top: "conv5_3"
391 |   name: "conv5_3"
392 |   type: "Convolution"
393 |   param {
394 |     lr_mult: 1
395 |   }
396 |   param {
397 |     lr_mult: 2
398 |   }  
399 |   convolution_param {
400 |     num_output: 512
401 |     pad: 1
402 |     kernel_size: 3
403 |   }
404 | }
405 | layer {
406 |   bottom: "conv5_3"
407 |   top: "conv5_3"
408 |   name: "relu5_3"
409 |   type: "ReLU"
410 | }
411 | layer {
412 |   bottom: "conv5_3"
413 |   top: "conv5_4"
414 |   name: "conv5_4"
415 |   type: "Convolution"
416 |   param {
417 |     lr_mult: 1
418 |   }
419 |   param {
420 |     lr_mult: 2
421 |   }  
422 |   convolution_param {
423 |     num_output: 512
424 |     pad: 1
425 |     kernel_size: 3
426 |   }
427 | }
428 | layer {
429 |   bottom: "conv5_4"
430 |   top: "conv5_4"
431 |   name: "relu5_4"
432 |   type: "ReLU"
433 | }
434 | #========= RPN ============
435 | 
436 | layer {
437 |   name: "rpn_conv/3x3"
438 |   type: "Convolution"
439 |   bottom: "conv5_4"
440 |   top: "rpn/output"
441 |   param { lr_mult: 1.0 }
442 |   param { lr_mult: 2.0 }
443 |   convolution_param {
444 |     num_output: 512
445 |     kernel_size: 3 pad: 1 stride: 1
446 |     weight_filler { type: "gaussian" std: 0.01 }
447 |     bias_filler { type: "constant" value: 0 }
448 |   }
449 | }
450 | layer {
451 |   name: "rpn_relu/3x3"
452 |   type: "ReLU"
453 |   bottom: "rpn/output"
454 |   top: "rpn/output"
455 | }
456 | 
457 | layer {
458 |   name: "rpn_cls_score"
459 |   type: "Convolution"
460 |   bottom: "rpn/output"
461 |   top: "rpn_cls_score"
462 |   param { lr_mult: 1.0 }
463 |   param { lr_mult: 2.0 }
464 |   convolution_param {
465 |     num_output: 18   # 2(bg/fg) * 9(anchors)
466 |     kernel_size: 1 pad: 0 stride: 1
467 |     weight_filler { type: "gaussian" std: 0.01 }
468 |     bias_filler { type: "constant" value: 0 }
469 |   }
470 | }
471 | 
472 | layer {
473 |   name: "rpn_bbox_pred"
474 |   type: "Convolution"
475 |   bottom: "rpn/output"
476 |   top: "rpn_bbox_pred"
477 |   param { lr_mult: 1.0 }
478 |   param { lr_mult: 2.0 }
479 |   convolution_param {
480 |     num_output: 36   # 4 * 9(anchors)
481 |     kernel_size: 1 pad: 0 stride: 1
482 |     weight_filler { type: "gaussian" std: 0.01 }
483 |     bias_filler { type: "constant" value: 0 }
484 |   }
485 | }
486 | 
487 | layer {
488 |    bottom: "rpn_cls_score"
489 |    top: "rpn_cls_score_reshape"
490 |    name: "rpn_cls_score_reshape"
491 |    type: "Reshape"
492 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
493 | }
494 | 
495 | layer {
496 |   name: 'rpn-data'
497 |   type: 'Python'
498 |   bottom: 'rpn_cls_score'
499 |   bottom: 'gt_boxes'
500 |   bottom: 'im_info'
501 |   bottom: 'data'
502 |   top: 'rpn_labels'
503 |   top: 'rpn_bbox_targets'
504 |   top: 'rpn_bbox_inside_weights'
505 |   top: 'rpn_bbox_outside_weights'
506 |   python_param {
507 |     module: 'rpn.anchor_target_layer'
508 |     layer: 'AnchorTargetLayer'
509 |     param_str: "'feat_stride': 16"
510 |   }
511 | }
512 | 
513 | layer {
514 |   name: "rpn_loss_cls"
515 |   type: "SoftmaxWithLoss"
516 |   bottom: "rpn_cls_score_reshape"
517 |   bottom: "rpn_labels"
518 |   propagate_down: 1
519 |   propagate_down: 0
520 |   top: "rpn_cls_loss"
521 |   loss_weight: 1
522 |   loss_param {
523 |     ignore_label: -1
524 |     normalize: true
525 |   }
526 | }
527 | 
528 | layer {
529 |   name: "rpn_loss_bbox"
530 |   type: "SmoothL1Loss"
531 |   bottom: "rpn_bbox_pred"
532 |   bottom: "rpn_bbox_targets"
533 |   bottom: 'rpn_bbox_inside_weights'
534 |   bottom: 'rpn_bbox_outside_weights'
535 |   top: "rpn_loss_bbox"
536 |   loss_weight: 1
537 |   smooth_l1_loss_param { sigma: 3.0 }
538 | }
539 | 
540 | #========= RoI Proposal ============
541 | 
542 | layer {
543 |   name: "rpn_cls_prob"
544 |   type: "Softmax"
545 |   bottom: "rpn_cls_score_reshape"
546 |   top: "rpn_cls_prob"
547 | }
548 | 
549 | layer {
550 |   name: 'rpn_cls_prob_reshape'
551 |   type: 'Reshape'
552 |   bottom: 'rpn_cls_prob'
553 |   top: 'rpn_cls_prob_reshape'
554 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
555 | }
556 | 
557 | layer {
558 |   name: 'proposal'
559 |   type: 'Python'
560 |   bottom: 'rpn_cls_prob_reshape'
561 |   bottom: 'rpn_bbox_pred'
562 |   bottom: 'im_info'
563 |   top: 'rpn_rois'
564 | #  top: 'rpn_scores'
565 |   python_param {
566 |     module: 'rpn.proposal_layer'
567 |     layer: 'ProposalLayer'
568 |     param_str: "'feat_stride': 16"
569 |   }
570 | }
571 | 
572 | #layer {
573 | #  name: 'debug-data'
574 | #  type: 'Python'
575 | #  bottom: 'data'
576 | #  bottom: 'rpn_rois'
577 | #  bottom: 'rpn_scores'
578 | #  python_param {
579 | #    module: 'rpn.debug_layer'
580 | #    layer: 'RPNDebugLayer'
581 | #  }
582 | #}
583 | 
584 | layer {
585 |   name: 'roi-data'
586 |   type: 'Python'
587 |   bottom: 'rpn_rois'
588 |   bottom: 'gt_boxes'
589 |   top: 'rois'
590 |   top: 'labels'
591 |   top: 'bbox_targets'
592 |   top: 'bbox_inside_weights'
593 |   top: 'bbox_outside_weights'
594 |   python_param {
595 |     module: 'rpn.proposal_target_layer'
596 |     layer: 'ProposalTargetLayer'
597 |     param_str: "'num_classes': 6"
598 |   }
599 | }
600 | 
601 | #========= RCNN ============
602 | 
603 | layer {
604 |   name: "roi_pool5"
605 |   type: "ROIPooling"
606 |   bottom: "conv5_4"
607 |   bottom: "rois"
608 |   top: "pool5"
609 |   roi_pooling_param {
610 |     pooled_w: 7
611 |     pooled_h: 7
612 |     spatial_scale: 0.0625 # 1/16
613 |   }
614 | }
615 | layer {
616 |   name: "fc6"
617 |   type: "InnerProduct"
618 |   bottom: "pool5"
619 |   top: "fc6"
620 |   param {
621 |     lr_mult: 1
622 |   }
623 |   param {
624 |     lr_mult: 2
625 |   }
626 |   inner_product_param {
627 |     num_output: 4096
628 |   }
629 | }
630 | layer {
631 |   name: "relu6"
632 |   type: "ReLU"
633 |   bottom: "fc6"
634 |   top: "fc6"
635 | }
636 | layer {
637 |   name: "drop6"
638 |   type: "Dropout"
639 |   bottom: "fc6"
640 |   top: "fc6"
641 |   dropout_param {
642 |     dropout_ratio: 0.5
643 |   }
644 | }
645 | layer {
646 |   name: "fc7"
647 |   type: "InnerProduct"
648 |   bottom: "fc6"
649 |   top: "fc7"
650 |   param {
651 |     lr_mult: 1
652 |   }
653 |   param {
654 |     lr_mult: 2
655 |   }
656 |   inner_product_param {
657 |     num_output: 4096
658 |   }
659 | }
660 | layer {
661 |   name: "relu7"
662 |   type: "ReLU"
663 |   bottom: "fc7"
664 |   top: "fc7"
665 | }
666 | layer {
667 |   name: "drop7"
668 |   type: "Dropout"
669 |   bottom: "fc7"
670 |   top: "fc7"
671 |   dropout_param {
672 |     dropout_ratio: 0.5
673 |   }
674 | }
675 | layer {
676 |   name: "cls_score2"
677 |   type: "InnerProduct"
678 |   bottom: "fc7"
679 |   top: "cls_score2"
680 |   param {
681 |     lr_mult: 1
682 |   }
683 |   param {
684 |     lr_mult: 2
685 |   }
686 |   inner_product_param {
687 |     num_output: 6
688 |     weight_filler {
689 |       type: "gaussian"
690 |       std: 0.01
691 |     }
692 |     bias_filler {
693 |       type: "constant"
694 |       value: 0
695 |     }
696 |   }
697 | }
698 | layer {
699 |   name: "bbox_pred2"
700 |   type: "InnerProduct"
701 |   bottom: "fc7"
702 |   top: "bbox_pred2"
703 |   param {
704 |     lr_mult: 1
705 |   }
706 |   param {
707 |     lr_mult: 2
708 |   }
709 |   inner_product_param {
710 |     num_output: 24
711 |     weight_filler {
712 |       type: "gaussian"
713 |       std: 0.001
714 |     }
715 |     bias_filler {
716 |       type: "constant"
717 |       value: 0
718 |     }
719 |   }
720 | }
721 | layer {
722 |   name: "loss_cls"
723 |   type: "SoftmaxWithLoss"
724 |   bottom: "cls_score2"
725 |   bottom: "labels"
726 |   propagate_down: 1
727 |   propagate_down: 0
728 |   top: "loss_cls"
729 |   loss_weight: 1
730 | }
731 | layer {
732 |   name: "loss_bbox"
733 |   type: "SmoothL1Loss"
734 |   bottom: "bbox_pred2"
735 |   bottom: "bbox_targets"
736 |   bottom: "bbox_inside_weights"
737 |   bottom: "bbox_outside_weights"
738 |   top: "loss_bbox"
739 |   loss_weight: 1
740 | }
741 | 


--------------------------------------------------------------------------------
/faster_rcnn_end2end_avs/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "../../models/intel_optimized_models/faster-rcnn/pascal_voc/VGG16/faster_rcnn_end2end_avs/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 50000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_faster_rcnn"
16 | iter_size: 2
17 | 


--------------------------------------------------------------------------------
/faster_rcnn_end2end_avs/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "im_info"
 12 | input_shape {
 13 |   dim: 1
 14 |   dim: 3
 15 | }
 16 | 
 17 | layer {
 18 |   name: "conv1_1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1_1"
 22 |   param {
 23 |     lr_mult: 0
 24 |     decay_mult: 0
 25 |   }
 26 |   param {
 27 |     lr_mult: 0
 28 |     decay_mult: 0
 29 |   }
 30 |   convolution_param {
 31 |     num_output: 64
 32 |     pad: 1
 33 |     kernel_size: 3
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "relu1_1"
 38 |   type: "ReLU"
 39 |   bottom: "conv1_1"
 40 |   top: "conv1_1"
 41 | }
 42 | layer {
 43 |   name: "conv1_2"
 44 |   type: "Convolution"
 45 |   bottom: "conv1_1"
 46 |   top: "conv1_2"
 47 |   param {
 48 |     lr_mult: 0
 49 |     decay_mult: 0
 50 |   }
 51 |   param {
 52 |     lr_mult: 0
 53 |     decay_mult: 0
 54 |   }
 55 |   convolution_param {
 56 |     num_output: 64
 57 |     pad: 1
 58 |     kernel_size: 3
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "relu1_2"
 63 |   type: "ReLU"
 64 |   bottom: "conv1_2"
 65 |   top: "conv1_2"
 66 | }
 67 | layer {
 68 |   name: "pool1"
 69 |   type: "Pooling"
 70 |   bottom: "conv1_2"
 71 |   top: "pool1"
 72 |   pooling_param {
 73 |     pool: MAX
 74 |     kernel_size: 2
 75 |     stride: 2
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "conv2_1"
 80 |   type: "Convolution"
 81 |   bottom: "pool1"
 82 |   top: "conv2_1"
 83 |   param {
 84 |     lr_mult: 0
 85 |     decay_mult: 0
 86 |   }
 87 |   param {
 88 |     lr_mult: 0
 89 |     decay_mult: 0
 90 |   }
 91 |   convolution_param {
 92 |     num_output: 128
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "relu2_1"
 99 |   type: "ReLU"
100 |   bottom: "conv2_1"
101 |   top: "conv2_1"
102 | }
103 | layer {
104 |   name: "conv2_2"
105 |   type: "Convolution"
106 |   bottom: "conv2_1"
107 |   top: "conv2_2"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 128
118 |     pad: 1
119 |     kernel_size: 3
120 |   }
121 | }
122 | layer {
123 |   name: "relu2_2"
124 |   type: "ReLU"
125 |   bottom: "conv2_2"
126 |   top: "conv2_2"
127 | }
128 | layer {
129 |   name: "pool2"
130 |   type: "Pooling"
131 |   bottom: "conv2_2"
132 |   top: "pool2"
133 |   pooling_param {
134 |     pool: MAX
135 |     kernel_size: 2
136 |     stride: 2
137 |   }
138 | }
139 | layer {
140 |   name: "conv3_1"
141 |   type: "Convolution"
142 |   bottom: "pool2"
143 |   top: "conv3_1"
144 |   param {
145 |     lr_mult: 1
146 |     decay_mult: 1
147 |   }
148 |   param {
149 |     lr_mult: 2
150 |     decay_mult: 0
151 |   }
152 |   convolution_param {
153 |     num_output: 256
154 |     pad: 1
155 |     kernel_size: 3
156 |   }
157 | }
158 | layer {
159 |   name: "relu3_1"
160 |   type: "ReLU"
161 |   bottom: "conv3_1"
162 |   top: "conv3_1"
163 | }
164 | layer {
165 |   name: "conv3_2"
166 |   type: "Convolution"
167 |   bottom: "conv3_1"
168 |   top: "conv3_2"
169 |   param {
170 |     lr_mult: 1
171 |     decay_mult: 1
172 |   }
173 |   param {
174 |     lr_mult: 2
175 |     decay_mult: 0
176 |   }
177 |   convolution_param {
178 |     num_output: 256
179 |     pad: 1
180 |     kernel_size: 3
181 |   }
182 | }
183 | layer {
184 |   name: "relu3_2"
185 |   type: "ReLU"
186 |   bottom: "conv3_2"
187 |   top: "conv3_2"
188 | }
189 | layer {
190 |   name: "conv3_3"
191 |   type: "Convolution"
192 |   bottom: "conv3_2"
193 |   top: "conv3_3"
194 |   param {
195 |     lr_mult: 1
196 |     decay_mult: 1
197 |   }
198 |   param {
199 |     lr_mult: 2
200 |     decay_mult: 0
201 |   }
202 |   convolution_param {
203 |     num_output: 256
204 |     pad: 1
205 |     kernel_size: 3
206 |   }
207 | }
208 | layer {
209 |   name: "relu3_3"
210 |   type: "ReLU"
211 |   bottom: "conv3_3"
212 |   top: "conv3_3"
213 | }
214 | layer {
215 |   name: "pool3"
216 |   type: "Pooling"
217 |   bottom: "conv3_3"
218 |   top: "pool3"
219 |   pooling_param {
220 |     pool: MAX
221 |     kernel_size: 2
222 |     stride: 2
223 |   }
224 | }
225 | layer {
226 |   name: "conv4_1"
227 |   type: "Convolution"
228 |   bottom: "pool3"
229 |   top: "conv4_1"
230 |   param {
231 |     lr_mult: 1
232 |     decay_mult: 1
233 |   }
234 |   param {
235 |     lr_mult: 2
236 |     decay_mult: 0
237 |   }
238 |   convolution_param {
239 |     num_output: 512
240 |     pad: 1
241 |     kernel_size: 3
242 |   }
243 | }
244 | layer {
245 |   name: "relu4_1"
246 |   type: "ReLU"
247 |   bottom: "conv4_1"
248 |   top: "conv4_1"
249 | }
250 | layer {
251 |   name: "conv4_2"
252 |   type: "Convolution"
253 |   bottom: "conv4_1"
254 |   top: "conv4_2"
255 |   param {
256 |     lr_mult: 1
257 |     decay_mult: 1
258 |   }
259 |   param {
260 |     lr_mult: 2
261 |     decay_mult: 0
262 |   }
263 |   convolution_param {
264 |     num_output: 512
265 |     pad: 1
266 |     kernel_size: 3
267 |   }
268 | }
269 | layer {
270 |   name: "relu4_2"
271 |   type: "ReLU"
272 |   bottom: "conv4_2"
273 |   top: "conv4_2"
274 | }
275 | layer {
276 |   name: "conv4_3"
277 |   type: "Convolution"
278 |   bottom: "conv4_2"
279 |   top: "conv4_3"
280 |   param {
281 |     lr_mult: 1
282 |     decay_mult: 1
283 |   }
284 |   param {
285 |     lr_mult: 2
286 |     decay_mult: 0
287 |   }
288 |   convolution_param {
289 |     num_output: 512
290 |     pad: 1
291 |     kernel_size: 3
292 |   }
293 | }
294 | layer {
295 |   name: "relu4_3"
296 |   type: "ReLU"
297 |   bottom: "conv4_3"
298 |   top: "conv4_3"
299 | }
300 | layer {
301 |   name: "pool4"
302 |   type: "Pooling"
303 |   bottom: "conv4_3"
304 |   top: "pool4"
305 |   pooling_param {
306 |     pool: MAX
307 |     kernel_size: 2
308 |     stride: 2
309 |   }
310 | }
311 | layer {
312 |   name: "conv5_1"
313 |   type: "Convolution"
314 |   bottom: "pool4"
315 |   top: "conv5_1"
316 |   param {
317 |     lr_mult: 1
318 |     decay_mult: 1
319 |   }
320 |   param {
321 |     lr_mult: 2
322 |     decay_mult: 0
323 |   }
324 |   convolution_param {
325 |     num_output: 512
326 |     pad: 1
327 |     kernel_size: 3
328 |   }
329 | }
330 | layer {
331 |   name: "relu5_1"
332 |   type: "ReLU"
333 |   bottom: "conv5_1"
334 |   top: "conv5_1"
335 | }
336 | layer {
337 |   name: "conv5_2"
338 |   type: "Convolution"
339 |   bottom: "conv5_1"
340 |   top: "conv5_2"
341 |   param {
342 |     lr_mult: 1
343 |     decay_mult: 1
344 |   }
345 |   param {
346 |     lr_mult: 2
347 |     decay_mult: 0
348 |   }
349 |   convolution_param {
350 |     num_output: 512
351 |     pad: 1
352 |     kernel_size: 3
353 |   }
354 | }
355 | layer {
356 |   name: "relu5_2"
357 |   type: "ReLU"
358 |   bottom: "conv5_2"
359 |   top: "conv5_2"
360 | }
361 | layer {
362 |   name: "conv5_3"
363 |   type: "Convolution"
364 |   bottom: "conv5_2"
365 |   top: "conv5_3"
366 |   param {
367 |     lr_mult: 1
368 |     decay_mult: 1
369 |   }
370 |   param {
371 |     lr_mult: 2
372 |     decay_mult: 0
373 |   }
374 |   convolution_param {
375 |     num_output: 512
376 |     pad: 1
377 |     kernel_size: 3
378 |   }
379 | }
380 | layer {
381 |   name: "relu5_3"
382 |   type: "ReLU"
383 |   bottom: "conv5_3"
384 |   top: "conv5_3"
385 | }
386 | 
387 | #========= RPN ============
388 | 
389 | layer {
390 |   name: "rpn_conv/3x3"
391 |   type: "Convolution"
392 |   bottom: "conv5_3"
393 |   top: "rpn/output"
394 |   param { lr_mult: 1.0 decay_mult: 1.0 }
395 |   param { lr_mult: 2.0 decay_mult: 0 }
396 |   convolution_param {
397 |     num_output: 512
398 |     kernel_size: 3 pad: 1 stride: 1
399 |     weight_filler { type: "gaussian" std: 0.01 }
400 |     bias_filler { type: "constant" value: 0 }
401 |   }
402 | }
403 | layer {
404 |   name: "rpn_relu/3x3"
405 |   type: "ReLU"
406 |   bottom: "rpn/output"
407 |   top: "rpn/output"
408 | }
409 | 
410 | layer {
411 |   name: "rpn_cls_score"
412 |   type: "Convolution"
413 |   bottom: "rpn/output"
414 |   top: "rpn_cls_score"
415 |   param { lr_mult: 1.0 decay_mult: 1.0 }
416 |   param { lr_mult: 2.0 decay_mult: 0 }
417 |   convolution_param {
418 |     num_output: 18   # 2(bg/fg) * 9(anchors)
419 |     kernel_size: 1 pad: 0 stride: 1
420 |     weight_filler { type: "gaussian" std: 0.01 }
421 |     bias_filler { type: "constant" value: 0 }
422 |   }
423 | }
424 | layer {
425 |   name: "rpn_bbox_pred"
426 |   type: "Convolution"
427 |   bottom: "rpn/output"
428 |   top: "rpn_bbox_pred"
429 |   param { lr_mult: 1.0 decay_mult: 1.0 }
430 |   param { lr_mult: 2.0 decay_mult: 0 }
431 |   convolution_param {
432 |     num_output: 36   # 4 * 9(anchors)
433 |     kernel_size: 1 pad: 0 stride: 1
434 |     weight_filler { type: "gaussian" std: 0.01 }
435 |     bias_filler { type: "constant" value: 0 }
436 |   }
437 | }
438 | layer {
439 |    bottom: "rpn_cls_score"
440 |    top: "rpn_cls_score_reshape"
441 |    name: "rpn_cls_score_reshape"
442 |    type: "Reshape"
443 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
444 | }
445 | 
446 | #========= RoI Proposal ============
447 | 
448 | layer {
449 |   name: "rpn_cls_prob"
450 |   type: "Softmax"
451 |   bottom: "rpn_cls_score_reshape"
452 |   top: "rpn_cls_prob"
453 | }
454 | layer {
455 |   name: 'rpn_cls_prob_reshape'
456 |   type: 'Reshape'
457 |   bottom: 'rpn_cls_prob'
458 |   top: 'rpn_cls_prob_reshape'
459 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
460 | }
461 | layer {
462 |   name: 'proposal'
463 |   type: 'Python'
464 |   bottom: 'rpn_cls_prob_reshape'
465 |   bottom: 'rpn_bbox_pred'
466 |   bottom: 'im_info'
467 |   top: 'rois'
468 |   python_param {
469 |     module: 'rpn.proposal_layer'
470 |     layer: 'ProposalLayer'
471 |     param_str: "'feat_stride': 16"
472 |   }
473 | }
474 | 
475 | #========= RCNN ============
476 | 
477 | layer {
478 |   name: "roi_pool5"
479 |   type: "ROIPooling"
480 |   bottom: "conv5_3"
481 |   bottom: "rois"
482 |   top: "pool5"
483 |   roi_pooling_param {
484 |     pooled_w: 7
485 |     pooled_h: 7
486 |     spatial_scale: 0.0625 # 1/16
487 |   }
488 | }
489 | layer {
490 |   name: "fc6"
491 |   type: "InnerProduct"
492 |   bottom: "pool5"
493 |   top: "fc6"
494 |   param {
495 |     lr_mult: 1
496 |     decay_mult: 1
497 |   }
498 |   param {
499 |     lr_mult: 2
500 |     decay_mult: 0
501 |   }
502 |   inner_product_param {
503 |     num_output: 4096
504 |   }
505 | }
506 | layer {
507 |   name: "relu6"
508 |   type: "ReLU"
509 |   bottom: "fc6"
510 |   top: "fc6"
511 | }
512 | layer {
513 |   name: "drop6"
514 |   type: "Dropout"
515 |   bottom: "fc6"
516 |   top: "fc6"
517 |   dropout_param {
518 |     dropout_ratio: 0.5
519 |   }
520 | }
521 | layer {
522 |   name: "fc7"
523 |   type: "InnerProduct"
524 |   bottom: "fc6"
525 |   top: "fc7"
526 |   param {
527 |     lr_mult: 1
528 |     decay_mult: 1
529 |   }
530 |   param {
531 |     lr_mult: 2
532 |     decay_mult: 0
533 |   }
534 |   inner_product_param {
535 |     num_output: 4096
536 |   }
537 | }
538 | layer {
539 |   name: "relu7"
540 |   type: "ReLU"
541 |   bottom: "fc7"
542 |   top: "fc7"
543 | }
544 | layer {
545 |   name: "drop7"
546 |   type: "Dropout"
547 |   bottom: "fc7"
548 |   top: "fc7"
549 |   dropout_param {
550 |     dropout_ratio: 0.5
551 |   }
552 | }
553 | layer {
554 |   name: "cls_score"
555 |   type: "InnerProduct"
556 |   bottom: "fc7"
557 |   top: "cls_score"
558 |   param {
559 |     lr_mult: 1
560 |     decay_mult: 1
561 |   }
562 |   param {
563 |     lr_mult: 2
564 |     decay_mult: 0
565 |   }
566 |   inner_product_param {
567 |     num_output: 21
568 |     weight_filler {
569 |       type: "gaussian"
570 |       std: 0.01
571 |     }
572 |     bias_filler {
573 |       type: "constant"
574 |       value: 0
575 |     }
576 |   }
577 | }
578 | layer {
579 |   name: "bbox_pred"
580 |   type: "InnerProduct"
581 |   bottom: "fc7"
582 |   top: "bbox_pred"
583 |   param {
584 |     lr_mult: 1
585 |     decay_mult: 1
586 |   }
587 |   param {
588 |     lr_mult: 2
589 |     decay_mult: 0
590 |   }
591 |   inner_product_param {
592 |     num_output: 84
593 |     weight_filler {
594 |       type: "gaussian"
595 |       std: 0.001
596 |     }
597 |     bias_filler {
598 |       type: "constant"
599 |       value: 0
600 |     }
601 |   }
602 | }
603 | layer {
604 |   name: "cls_prob"
605 |   type: "Softmax"
606 |   bottom: "cls_score"
607 |   top: "cls_prob"
608 | }
609 | 


--------------------------------------------------------------------------------
/faster_rcnn_end2end_avs/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | layer {
  3 |   name: 'input-data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'im_info'
  7 |   top: 'gt_boxes'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 6"
 12 |   }
 13 | }
 14 | 
 15 | layer {
 16 |   name: "conv1_1"
 17 |   type: "Convolution"
 18 |   bottom: "data"
 19 |   top: "conv1_1"
 20 |   param {
 21 |     lr_mult: 0
 22 |     decay_mult: 0
 23 |   }
 24 |   param {
 25 |     lr_mult: 0
 26 |     decay_mult: 0
 27 |   }
 28 |   convolution_param {
 29 |     num_output: 64
 30 |     pad: 1
 31 |     kernel_size: 3
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "relu1_1"
 36 |   type: "ReLU"
 37 |   bottom: "conv1_1"
 38 |   top: "conv1_1"
 39 | }
 40 | layer {
 41 |   name: "conv1_2"
 42 |   type: "Convolution"
 43 |   bottom: "conv1_1"
 44 |   top: "conv1_2"
 45 |   param {
 46 |     lr_mult: 0
 47 |     decay_mult: 0
 48 |   }
 49 |   param {
 50 |     lr_mult: 0
 51 |     decay_mult: 0
 52 |   }
 53 |   convolution_param {
 54 |     num_output: 64
 55 |     pad: 1
 56 |     kernel_size: 3
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "relu1_2"
 61 |   type: "ReLU"
 62 |   bottom: "conv1_2"
 63 |   top: "conv1_2"
 64 | }
 65 | layer {
 66 |   name: "pool1"
 67 |   type: "Pooling"
 68 |   bottom: "conv1_2"
 69 |   top: "pool1"
 70 |   pooling_param {
 71 |     pool: MAX
 72 |     kernel_size: 2
 73 |     stride: 2
 74 |   }
 75 | }
 76 | layer {
 77 |   name: "conv2_1"
 78 |   type: "Convolution"
 79 |   bottom: "pool1"
 80 |   top: "conv2_1"
 81 |   param {
 82 |     lr_mult: 0
 83 |     decay_mult: 0
 84 |   }
 85 |   param {
 86 |     lr_mult: 0
 87 |     decay_mult: 0
 88 |   }
 89 |   convolution_param {
 90 |     num_output: 128
 91 |     pad: 1
 92 |     kernel_size: 3
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "relu2_1"
 97 |   type: "ReLU"
 98 |   bottom: "conv2_1"
 99 |   top: "conv2_1"
100 | }
101 | layer {
102 |   name: "conv2_2"
103 |   type: "Convolution"
104 |   bottom: "conv2_1"
105 |   top: "conv2_2"
106 |   param {
107 |     lr_mult: 0
108 |     decay_mult: 0
109 |   }
110 |   param {
111 |     lr_mult: 0
112 |     decay_mult: 0
113 |   }
114 |   convolution_param {
115 |     num_output: 128
116 |     pad: 1
117 |     kernel_size: 3
118 |   }
119 | }
120 | layer {
121 |   name: "relu2_2"
122 |   type: "ReLU"
123 |   bottom: "conv2_2"
124 |   top: "conv2_2"
125 | }
126 | layer {
127 |   name: "pool2"
128 |   type: "Pooling"
129 |   bottom: "conv2_2"
130 |   top: "pool2"
131 |   pooling_param {
132 |     pool: MAX
133 |     kernel_size: 2
134 |     stride: 2
135 |   }
136 | }
137 | layer {
138 |   name: "conv3_1"
139 |   type: "Convolution"
140 |   bottom: "pool2"
141 |   top: "conv3_1"
142 |   param {
143 |     lr_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |   }
148 |   convolution_param {
149 |     num_output: 256
150 |     pad: 1
151 |     kernel_size: 3
152 |   }
153 | }
154 | layer {
155 |   name: "relu3_1"
156 |   type: "ReLU"
157 |   bottom: "conv3_1"
158 |   top: "conv3_1"
159 | }
160 | layer {
161 |   name: "conv3_2"
162 |   type: "Convolution"
163 |   bottom: "conv3_1"
164 |   top: "conv3_2"
165 |   param {
166 |     lr_mult: 1
167 |   }
168 |   param {
169 |     lr_mult: 2
170 |   }
171 |   convolution_param {
172 |     num_output: 256
173 |     pad: 1
174 |     kernel_size: 3
175 |   }
176 | }
177 | layer {
178 |   name: "relu3_2"
179 |   type: "ReLU"
180 |   bottom: "conv3_2"
181 |   top: "conv3_2"
182 | }
183 | layer {
184 |   name: "conv3_3"
185 |   type: "Convolution"
186 |   bottom: "conv3_2"
187 |   top: "conv3_3"
188 |   param {
189 |     lr_mult: 1
190 |   }
191 |   param {
192 |     lr_mult: 2
193 |   }
194 |   convolution_param {
195 |     num_output: 256
196 |     pad: 1
197 |     kernel_size: 3
198 |   }
199 | }
200 | layer {
201 |   name: "relu3_3"
202 |   type: "ReLU"
203 |   bottom: "conv3_3"
204 |   top: "conv3_3"
205 | }
206 | layer {
207 |   name: "pool3"
208 |   type: "Pooling"
209 |   bottom: "conv3_3"
210 |   top: "pool3"
211 |   pooling_param {
212 |     pool: MAX
213 |     kernel_size: 2
214 |     stride: 2
215 |   }
216 | }
217 | layer {
218 |   name: "conv4_1"
219 |   type: "Convolution"
220 |   bottom: "pool3"
221 |   top: "conv4_1"
222 |   param {
223 |     lr_mult: 1
224 |   }
225 |   param {
226 |     lr_mult: 2
227 |   }
228 |   convolution_param {
229 |     num_output: 512
230 |     pad: 1
231 |     kernel_size: 3
232 |   }
233 | }
234 | layer {
235 |   name: "relu4_1"
236 |   type: "ReLU"
237 |   bottom: "conv4_1"
238 |   top: "conv4_1"
239 | }
240 | layer {
241 |   name: "conv4_2"
242 |   type: "Convolution"
243 |   bottom: "conv4_1"
244 |   top: "conv4_2"
245 |   param {
246 |     lr_mult: 1
247 |   }
248 |   param {
249 |     lr_mult: 2
250 |   }
251 |   convolution_param {
252 |     num_output: 512
253 |     pad: 1
254 |     kernel_size: 3
255 |   }
256 | }
257 | layer {
258 |   name: "relu4_2"
259 |   type: "ReLU"
260 |   bottom: "conv4_2"
261 |   top: "conv4_2"
262 | }
263 | layer {
264 |   name: "conv4_3"
265 |   type: "Convolution"
266 |   bottom: "conv4_2"
267 |   top: "conv4_3"
268 |   param {
269 |     lr_mult: 1
270 |   }
271 |   param {
272 |     lr_mult: 2
273 |   }
274 |   convolution_param {
275 |     num_output: 512
276 |     pad: 1
277 |     kernel_size: 3
278 |   }
279 | }
280 | layer {
281 |   name: "relu4_3"
282 |   type: "ReLU"
283 |   bottom: "conv4_3"
284 |   top: "conv4_3"
285 | }
286 | layer {
287 |   name: "pool4"
288 |   type: "Pooling"
289 |   bottom: "conv4_3"
290 |   top: "pool4"
291 |   pooling_param {
292 |     pool: MAX
293 |     kernel_size: 2
294 |     stride: 2
295 |   }
296 | }
297 | layer {
298 |   name: "conv5_1"
299 |   type: "Convolution"
300 |   bottom: "pool4"
301 |   top: "conv5_1"
302 |   param {
303 |     lr_mult: 1
304 |   }
305 |   param {
306 |     lr_mult: 2
307 |   }
308 |   convolution_param {
309 |     num_output: 512
310 |     pad: 1
311 |     kernel_size: 3
312 |   }
313 | }
314 | layer {
315 |   name: "relu5_1"
316 |   type: "ReLU"
317 |   bottom: "conv5_1"
318 |   top: "conv5_1"
319 | }
320 | layer {
321 |   name: "conv5_2"
322 |   type: "Convolution"
323 |   bottom: "conv5_1"
324 |   top: "conv5_2"
325 |   param {
326 |     lr_mult: 1
327 |   }
328 |   param {
329 |     lr_mult: 2
330 |   }
331 |   convolution_param {
332 |     num_output: 512
333 |     pad: 1
334 |     kernel_size: 3
335 |   }
336 | }
337 | layer {
338 |   name: "relu5_2"
339 |   type: "ReLU"
340 |   bottom: "conv5_2"
341 |   top: "conv5_2"
342 | }
343 | layer {
344 |   name: "conv5_3"
345 |   type: "Convolution"
346 |   bottom: "conv5_2"
347 |   top: "conv5_3"
348 |   param {
349 |     lr_mult: 1
350 |   }
351 |   param {
352 |     lr_mult: 2
353 |   }
354 |   convolution_param {
355 |     num_output: 512
356 |     pad: 1
357 |     kernel_size: 3
358 |   }
359 | }
360 | layer {
361 |   name: "relu5_3"
362 |   type: "ReLU"
363 |   bottom: "conv5_3"
364 |   top: "conv5_3"
365 | }
366 | 
367 | #========= RPN ============
368 | 
369 | layer {
370 |   name: "rpn_conv/3x3"
371 |   type: "Convolution"
372 |   bottom: "conv5_3"
373 |   top: "rpn/output"
374 |   param { lr_mult: 1.0 }
375 |   param { lr_mult: 2.0 }
376 |   convolution_param {
377 |     num_output: 512
378 |     kernel_size: 3 pad: 1 stride: 1
379 |     weight_filler { type: "gaussian" std: 0.01 }
380 |     bias_filler { type: "constant" value: 0 }
381 |   }
382 | }
383 | layer {
384 |   name: "rpn_relu/3x3"
385 |   type: "ReLU"
386 |   bottom: "rpn/output"
387 |   top: "rpn/output"
388 | }
389 | 
390 | layer {
391 |   name: "rpn_cls_score"
392 |   type: "Convolution"
393 |   bottom: "rpn/output"
394 |   top: "rpn_cls_score"
395 |   param { lr_mult: 1.0 }
396 |   param { lr_mult: 2.0 }
397 |   convolution_param {
398 |     num_output: 18   # 2(bg/fg) * 9(anchors)
399 |     kernel_size: 1 pad: 0 stride: 1
400 |     weight_filler { type: "gaussian" std: 0.01 }
401 |     bias_filler { type: "constant" value: 0 }
402 |   }
403 | }
404 | 
405 | layer {
406 |   name: "rpn_bbox_pred"
407 |   type: "Convolution"
408 |   bottom: "rpn/output"
409 |   top: "rpn_bbox_pred"
410 |   param { lr_mult: 1.0 }
411 |   param { lr_mult: 2.0 }
412 |   convolution_param {
413 |     num_output: 36   # 4 * 9(anchors)
414 |     kernel_size: 1 pad: 0 stride: 1
415 |     weight_filler { type: "gaussian" std: 0.01 }
416 |     bias_filler { type: "constant" value: 0 }
417 |   }
418 | }
419 | 
420 | layer {
421 |    bottom: "rpn_cls_score"
422 |    top: "rpn_cls_score_reshape"
423 |    name: "rpn_cls_score_reshape"
424 |    type: "Reshape"
425 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
426 | }
427 | 
428 | layer {
429 |   name: 'rpn-data'
430 |   type: 'Python'
431 |   bottom: 'rpn_cls_score'
432 |   bottom: 'gt_boxes'
433 |   bottom: 'im_info'
434 |   bottom: 'data'
435 |   top: 'rpn_labels'
436 |   top: 'rpn_bbox_targets'
437 |   top: 'rpn_bbox_inside_weights'
438 |   top: 'rpn_bbox_outside_weights'
439 |   python_param {
440 |     module: 'rpn.anchor_target_layer'
441 |     layer: 'AnchorTargetLayer'
442 |     param_str: "'feat_stride': 16"
443 |   }
444 | }
445 | 
446 | layer {
447 |   name: "rpn_loss_cls"
448 |   type: "SoftmaxWithLoss"
449 |   bottom: "rpn_cls_score_reshape"
450 |   bottom: "rpn_labels"
451 |   propagate_down: 1
452 |   propagate_down: 0
453 |   top: "rpn_cls_loss"
454 |   loss_weight: 1
455 |   loss_param {
456 |     ignore_label: -1
457 |     normalize: true
458 |   }
459 | }
460 | 
461 | layer {
462 |   name: "rpn_loss_bbox"
463 |   type: "SmoothL1Loss"
464 |   bottom: "rpn_bbox_pred"
465 |   bottom: "rpn_bbox_targets"
466 |   bottom: 'rpn_bbox_inside_weights'
467 |   bottom: 'rpn_bbox_outside_weights'
468 |   top: "rpn_loss_bbox"
469 |   loss_weight: 1
470 |   smooth_l1_loss_param { sigma: 3.0 }
471 | }
472 | 
473 | #========= RoI Proposal ============
474 | 
475 | layer {
476 |   name: "rpn_cls_prob"
477 |   type: "Softmax"
478 |   bottom: "rpn_cls_score_reshape"
479 |   top: "rpn_cls_prob"
480 | }
481 | 
482 | layer {
483 |   name: 'rpn_cls_prob_reshape'
484 |   type: 'Reshape'
485 |   bottom: 'rpn_cls_prob'
486 |   top: 'rpn_cls_prob_reshape'
487 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
488 | }
489 | 
490 | layer {
491 |   name: 'proposal'
492 |   type: 'Python'
493 |   bottom: 'rpn_cls_prob_reshape'
494 |   bottom: 'rpn_bbox_pred'
495 |   bottom: 'im_info'
496 |   top: 'rpn_rois'
497 | #  top: 'rpn_scores'
498 |   python_param {
499 |     module: 'rpn.proposal_layer'
500 |     layer: 'ProposalLayer'
501 |     param_str: "'feat_stride': 16"
502 |   }
503 | }
504 | 
505 | #layer {
506 | #  name: 'debug-data'
507 | #  type: 'Python'
508 | #  bottom: 'data'
509 | #  bottom: 'rpn_rois'
510 | #  bottom: 'rpn_scores'
511 | #  python_param {
512 | #    module: 'rpn.debug_layer'
513 | #    layer: 'RPNDebugLayer'
514 | #  }
515 | #}
516 | 
517 | layer {
518 |   name: 'roi-data'
519 |   type: 'Python'
520 |   bottom: 'rpn_rois'
521 |   bottom: 'gt_boxes'
522 |   top: 'rois'
523 |   top: 'labels'
524 |   top: 'bbox_targets'
525 |   top: 'bbox_inside_weights'
526 |   top: 'bbox_outside_weights'
527 |   python_param {
528 |     module: 'rpn.proposal_target_layer'
529 |     layer: 'ProposalTargetLayer'
530 |     param_str: "'num_classes': 21"
531 |   }
532 | }
533 | 
534 | #========= RCNN ============
535 | 
536 | layer {
537 |   name: "roi_pool5"
538 |   type: "ROIPooling"
539 |   bottom: "conv5_3"
540 |   bottom: "rois"
541 |   top: "pool5"
542 |   roi_pooling_param {
543 |     pooled_w: 7
544 |     pooled_h: 7
545 |     spatial_scale: 0.0625 # 1/16
546 |   }
547 | }
548 | layer {
549 |   name: "fc6"
550 |   type: "InnerProduct"
551 |   bottom: "pool5"
552 |   top: "fc6"
553 |   param {
554 |     lr_mult: 1
555 |   }
556 |   param {
557 |     lr_mult: 2
558 |   }
559 |   inner_product_param {
560 |     num_output: 4096
561 |   }
562 | }
563 | layer {
564 |   name: "relu6"
565 |   type: "ReLU"
566 |   bottom: "fc6"
567 |   top: "fc6"
568 | }
569 | layer {
570 |   name: "drop6"
571 |   type: "Dropout"
572 |   bottom: "fc6"
573 |   top: "fc6"
574 |   dropout_param {
575 |     dropout_ratio: 0.5
576 |   }
577 | }
578 | layer {
579 |   name: "fc7"
580 |   type: "InnerProduct"
581 |   bottom: "fc6"
582 |   top: "fc7"
583 |   param {
584 |     lr_mult: 1
585 |   }
586 |   param {
587 |     lr_mult: 2
588 |   }
589 |   inner_product_param {
590 |     num_output: 4096
591 |   }
592 | }
593 | layer {
594 |   name: "relu7"
595 |   type: "ReLU"
596 |   bottom: "fc7"
597 |   top: "fc7"
598 | }
599 | layer {
600 |   name: "drop7"
601 |   type: "Dropout"
602 |   bottom: "fc7"
603 |   top: "fc7"
604 |   dropout_param {
605 |     dropout_ratio: 0.5
606 |   }
607 | }
608 | layer {
609 |   name: "cls_score"
610 |   type: "InnerProduct"
611 |   bottom: "fc7"
612 |   top: "cls_score"
613 |   param {
614 |     lr_mult: 1
615 |   }
616 |   param {
617 |     lr_mult: 2
618 |   }
619 |   inner_product_param {
620 |     num_output: 6
621 |     weight_filler {
622 |       type: "gaussian"
623 |       std: 0.01
624 |     }
625 |     bias_filler {
626 |       type: "constant"
627 |       value: 0
628 |     }
629 |   }
630 | }
631 | layer {
632 |   name: "bbox_pred"
633 |   type: "InnerProduct"
634 |   bottom: "fc7"
635 |   top: "bbox_pred"
636 |   param {
637 |     lr_mult: 1
638 |   }
639 |   param {
640 |     lr_mult: 2
641 |   }
642 |   inner_product_param {
643 |     num_output: 84
644 |     weight_filler {
645 |       type: "gaussian"
646 |       std: 0.001
647 |     }
648 |     bias_filler {
649 |       type: "constant"
650 |       value: 0
651 |     }
652 |   }
653 | }
654 | layer {
655 |   name: "loss_cls"
656 |   type: "SoftmaxWithLoss"
657 |   bottom: "cls_score"
658 |   bottom: "labels"
659 |   propagate_down: 1
660 |   propagate_down: 0
661 |   top: "loss_cls"
662 |   loss_weight: 1
663 | }
664 | layer {
665 |   name: "loss_bbox"
666 |   type: "SmoothL1Loss"
667 |   bottom: "bbox_pred"
668 |   bottom: "bbox_targets"
669 |   bottom: "bbox_inside_weights"
670 |   bottom: "bbox_outside_weights"
671 |   top: "loss_bbox"
672 |   loss_weight: 1
673 | }
674 | 


--------------------------------------------------------------------------------