├── .gitattributes ├── .gitignore ├── MobileNet-YOLO ├── mobilenet_yolov3_deploy.prototxt └── mobilenet_yolov3_deploy_iter_63000.caffemodel ├── README.md ├── SSD ├── MobileNet │ ├── MobileNetSSD_deploy.prototxt │ ├── MobileNetSSD_deploy2.prototxt │ ├── MobileNetSSD_deploy_custom.prototxt │ ├── solver.prototxt │ ├── solver_test.prototxt │ ├── test.prototxt │ └── train.prototxt └── MobileNet_V2 │ ├── deploy.prototxt │ ├── solver.prototxt │ └── train.prototxt ├── YOLO ├── voc.data ├── voc.names ├── yolov3-tiny.cfg ├── yolov3-tiny_final.weights └── yolov3.cfg └── faster_rcnn_end2end_avs ├── VGG19 └── faster_rcnn_end2end │ ├── solver.prototxt │ ├── test.prototxt │ └── train.prototxt ├── solver.prototxt ├── test.prototxt └── train.prototxt /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## General 2 | 3 | # Model and weighting 4 | 5 | #*.caffemodel 6 | *.solverstate 7 | *.py 8 | -------------------------------------------------------------------------------- /MobileNet-YOLO/mobilenet_yolov3_deploy_iter_63000.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eric612/Vehicle-Detection/caedb24b289b1c4774b85ecc15f60cf6b040bec6/MobileNet-YOLO/mobilenet_yolov3_deploy_iter_63000.caffemodel -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vehicle Detection 2 | 3 | ## Compare different model by using same dataset 4 | 5 | 1. [MobileNet-YOLO](https://github.com/eric612/MobileNet-YOLO) 6 | 7 | 2. [YoloV3](https://pjreddie.com/darknet/yolo/) 8 | 9 | 3. [FasterRCNN](https://github.com/intel/caffe) 10 | 11 | 4. [MobileNet(V2) SSD](https://github.com/eric612/MobileNet-SSD-windows) 12 | 13 | ### New !! Detection and Segementation 14 | 15 | Dectection and Segementation in one stage end-to-end [models](http://ethereon.github.io/netscope/#/gist/4e0f9e4840f0cced223ee60c31772b68) 16 | 17 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/rHMW8gkbe6s/0.jpg)](https://www.youtube.com/watch?v=rHMW8gkbe6s) 18 | 19 | [Training project](https://github.com/eric612/MobileNet-YOLO/edit/detection_with_segmentation/README.md) 20 | 21 | ### MobileNet-YOLO Result 22 | 23 | [Run on linux](https://github.com/eric612/MobileNet-YOLO) 24 | 25 | [Run on windows](https://github.com/eric612/Caffe-YOLOv2-Windows) 26 | 27 | [Models and Weights](/MobileNet-YOLO) 28 | 29 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/moW84z9zKOg/0.jpg)](https://www.youtube.com/watch?v=moW84z9zKOg) 30 | 31 | ### YOLOv3 Tiny Result 32 | 33 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/rA5nHltRGRE/0.jpg)](https://www.youtube.com/watch?v=rA5nHltRGRE) 34 | 35 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/KSsnJuSrWMw/0.jpg)](https://www.youtube.com/watch?v=KSsnJuSrWMw) 36 | 37 | ### YOLOv3 Tiny Model 38 | 39 | [weights](/YOLO/yolov3-tiny_final.weights) 40 | 41 | [model](/YOLO/yolov3-tiny.cfg) 42 | 43 | ### YOLOv3-416x416-full Result 44 | 45 | [![YOLOv3-full](https://img.youtube.com/vi/YYz58loXJU0/0.jpg)](https://www.youtube.com/watch?v=YYz58loXJU0) 46 | 47 | ### YOLOv3-416x416-full Model 48 | 49 | [weights](https://drive.google.com/open?id=1BIBiZmTQOuqV2yQ1S67FSMCzRTmzThon) 50 | 51 | [model](/YOLO/yolov3.cfg) 52 | 53 | ### MobileNetSSD Model 54 | 55 | [weights](https://drive.google.com/open?id=1LbLSTPFSlHML5qAUYN-kt1bw2HxvvNWS) 56 | 57 | [model](https://drive.google.com/open?id=1KOE5r-71FFWU0LZbpo9HMEUwM_RE1LHR) 58 | 59 | ### MobileNetSSD_V2 Model 60 | 61 | [weights](https://drive.google.com/open?id=1v5X4tCaMFa59cfS4Ksr1J-Fl40vKlfpl) 62 | 63 | [model](https://github.com/eric612/MobileNet-SSD-windows/blob/master/models/MobileNetV2/deploy.prototxt) 64 | 65 | ### MobileNetSSD Result 66 | 67 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/jn6SOzT_wPA/0.jpg)](https://www.youtube.com/watch?v=jn6SOzT_wPA) 68 | 69 | ### MobileNetSSD_V2 Result 70 | 71 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/oc3tXxOoSH4/0.jpg)](https://www.youtube.com/watch?v=oc3tXxOoSH4) 72 | 73 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/dsdeU8B0CJE/0.jpg)](https://www.youtube.com/watch?v=dsdeU8B0CJE) 74 | 75 | ### FasterRCNN Model 76 | 77 | [VGG16](https://drive.google.com/open?id=1NQ9F74FTZnXM-hyuwYAoDBOYBjDSf5bp) 78 | 79 | [VGG19](https://drive.google.com/open?id=1FiSktKooiABZJB5UIun9tAmD5aTEAHxn) 80 | 81 | ### FasterRCNN Result 82 | 83 | ####VGG19 84 | 85 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/NhID_pNwgac/0.jpg)](https://www.youtube.com/watch?v=NhID_pNwgac) 86 | 87 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/AjnaTelt0KM/0.jpg)](https://www.youtube.com/watch?v=AjnaTelt0KM) 88 | 89 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/vxk77dicGAQ/0.jpg)](https://www.youtube.com/watch?v=vxk77dicGAQ) 90 | 91 | ####VGG16 92 | 93 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/xjIB9t1tLOg/0.jpg)](https://www.youtube.com/watch?v=xjIB9t1tLOg) 94 | 95 | 96 | 97 | ### Source Video 98 | 99 | [street](https://drive.google.com/open?id=1Wkk_n_yXz0C8nuwPK_1B8bJVJf7vzWeB) 100 | 101 | [tunnel](https://drive.google.com/open?id=1TDdBmZDtm_02WCLM3ENvnNnm6jUmeJFu) 102 | 103 | [rear view](https://drive.google.com/open?id=1pJQ4F1jqUHywI9bw0r2f3R-xyJOghujm) 104 | 105 | [night 1](https://drive.google.com/open?id=1r2QyxgAy_dt_30rWZXcHG5TE0zltXdkY) 106 | 107 | [night 2](https://drive.google.com/open?id=11NKTl15IPMdowTZnVVW8_HAYSdpTeJU-) 108 | 109 | 110 | -------------------------------------------------------------------------------- /SSD/MobileNet/MobileNetSSD_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "MobileNet-SSD" 2 | input: "data" 3 | input_shape { 4 | dim: 1 5 | dim: 3 6 | dim: 300 7 | dim: 300 8 | } 9 | layer { 10 | name: "conv0" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv0" 14 | param { 15 | lr_mult: 1.0 16 | decay_mult: 1.0 17 | } 18 | param { 19 | lr_mult: 2.0 20 | decay_mult: 0.0 21 | } 22 | convolution_param { 23 | num_output: 32 24 | pad: 1 25 | kernel_size: 3 26 | stride: 2 27 | weight_filler { 28 | type: "msra" 29 | } 30 | bias_filler { 31 | type: "constant" 32 | value: 0.0 33 | } 34 | } 35 | } 36 | layer { 37 | name: "conv0/relu" 38 | type: "ReLU" 39 | bottom: "conv0" 40 | top: "conv0" 41 | } 42 | layer { 43 | name: "conv1/dw" 44 | type: "Convolution" 45 | bottom: "conv0" 46 | top: "conv1/dw" 47 | param { 48 | lr_mult: 1.0 49 | decay_mult: 1.0 50 | } 51 | param { 52 | lr_mult: 2.0 53 | decay_mult: 0.0 54 | } 55 | convolution_param { 56 | num_output: 32 57 | pad: 1 58 | kernel_size: 3 59 | group: 32 60 | engine: CAFFE 61 | weight_filler { 62 | type: "msra" 63 | } 64 | bias_filler { 65 | type: "constant" 66 | value: 0.0 67 | } 68 | } 69 | } 70 | layer { 71 | name: "conv1/dw/relu" 72 | type: "ReLU" 73 | bottom: "conv1/dw" 74 | top: "conv1/dw" 75 | } 76 | layer { 77 | name: "conv1" 78 | type: "Convolution" 79 | bottom: "conv1/dw" 80 | top: "conv1" 81 | param { 82 | lr_mult: 1.0 83 | decay_mult: 1.0 84 | } 85 | param { 86 | lr_mult: 2.0 87 | decay_mult: 0.0 88 | } 89 | convolution_param { 90 | num_output: 64 91 | kernel_size: 1 92 | weight_filler { 93 | type: "msra" 94 | } 95 | bias_filler { 96 | type: "constant" 97 | value: 0.0 98 | } 99 | } 100 | } 101 | layer { 102 | name: "conv1/relu" 103 | type: "ReLU" 104 | bottom: "conv1" 105 | top: "conv1" 106 | } 107 | layer { 108 | name: "conv2/dw" 109 | type: "Convolution" 110 | bottom: "conv1" 111 | top: "conv2/dw" 112 | param { 113 | lr_mult: 1.0 114 | decay_mult: 1.0 115 | } 116 | param { 117 | lr_mult: 2.0 118 | decay_mult: 0.0 119 | } 120 | convolution_param { 121 | num_output: 64 122 | pad: 1 123 | kernel_size: 3 124 | stride: 2 125 | group: 64 126 | engine: CAFFE 127 | weight_filler { 128 | type: "msra" 129 | } 130 | bias_filler { 131 | type: "constant" 132 | value: 0.0 133 | } 134 | } 135 | } 136 | layer { 137 | name: "conv2/dw/relu" 138 | type: "ReLU" 139 | bottom: "conv2/dw" 140 | top: "conv2/dw" 141 | } 142 | layer { 143 | name: "conv2" 144 | type: "Convolution" 145 | bottom: "conv2/dw" 146 | top: "conv2" 147 | param { 148 | lr_mult: 1.0 149 | decay_mult: 1.0 150 | } 151 | param { 152 | lr_mult: 2.0 153 | decay_mult: 0.0 154 | } 155 | convolution_param { 156 | num_output: 128 157 | kernel_size: 1 158 | weight_filler { 159 | type: "msra" 160 | } 161 | bias_filler { 162 | type: "constant" 163 | value: 0.0 164 | } 165 | } 166 | } 167 | layer { 168 | name: "conv2/relu" 169 | type: "ReLU" 170 | bottom: "conv2" 171 | top: "conv2" 172 | } 173 | layer { 174 | name: "conv3/dw" 175 | type: "Convolution" 176 | bottom: "conv2" 177 | top: "conv3/dw" 178 | param { 179 | lr_mult: 1.0 180 | decay_mult: 1.0 181 | } 182 | param { 183 | lr_mult: 2.0 184 | decay_mult: 0.0 185 | } 186 | convolution_param { 187 | num_output: 128 188 | pad: 1 189 | kernel_size: 3 190 | group: 128 191 | engine: CAFFE 192 | weight_filler { 193 | type: "msra" 194 | } 195 | bias_filler { 196 | type: "constant" 197 | value: 0.0 198 | } 199 | } 200 | } 201 | layer { 202 | name: "conv3/dw/relu" 203 | type: "ReLU" 204 | bottom: "conv3/dw" 205 | top: "conv3/dw" 206 | } 207 | layer { 208 | name: "conv3" 209 | type: "Convolution" 210 | bottom: "conv3/dw" 211 | top: "conv3" 212 | param { 213 | lr_mult: 1.0 214 | decay_mult: 1.0 215 | } 216 | param { 217 | lr_mult: 2.0 218 | decay_mult: 0.0 219 | } 220 | convolution_param { 221 | num_output: 128 222 | kernel_size: 1 223 | weight_filler { 224 | type: "msra" 225 | } 226 | bias_filler { 227 | type: "constant" 228 | value: 0.0 229 | } 230 | } 231 | } 232 | layer { 233 | name: "conv3/relu" 234 | type: "ReLU" 235 | bottom: "conv3" 236 | top: "conv3" 237 | } 238 | layer { 239 | name: "conv4/dw" 240 | type: "Convolution" 241 | bottom: "conv3" 242 | top: "conv4/dw" 243 | param { 244 | lr_mult: 1.0 245 | decay_mult: 1.0 246 | } 247 | param { 248 | lr_mult: 2.0 249 | decay_mult: 0.0 250 | } 251 | convolution_param { 252 | num_output: 128 253 | pad: 1 254 | kernel_size: 3 255 | stride: 2 256 | group: 128 257 | engine: CAFFE 258 | weight_filler { 259 | type: "msra" 260 | } 261 | bias_filler { 262 | type: "constant" 263 | value: 0.0 264 | } 265 | } 266 | } 267 | layer { 268 | name: "conv4/dw/relu" 269 | type: "ReLU" 270 | bottom: "conv4/dw" 271 | top: "conv4/dw" 272 | } 273 | layer { 274 | name: "conv4" 275 | type: "Convolution" 276 | bottom: "conv4/dw" 277 | top: "conv4" 278 | param { 279 | lr_mult: 1.0 280 | decay_mult: 1.0 281 | } 282 | param { 283 | lr_mult: 2.0 284 | decay_mult: 0.0 285 | } 286 | convolution_param { 287 | num_output: 256 288 | kernel_size: 1 289 | weight_filler { 290 | type: "msra" 291 | } 292 | bias_filler { 293 | type: "constant" 294 | value: 0.0 295 | } 296 | } 297 | } 298 | layer { 299 | name: "conv4/relu" 300 | type: "ReLU" 301 | bottom: "conv4" 302 | top: "conv4" 303 | } 304 | layer { 305 | name: "conv5/dw" 306 | type: "Convolution" 307 | bottom: "conv4" 308 | top: "conv5/dw" 309 | param { 310 | lr_mult: 1.0 311 | decay_mult: 1.0 312 | } 313 | param { 314 | lr_mult: 2.0 315 | decay_mult: 0.0 316 | } 317 | convolution_param { 318 | num_output: 256 319 | pad: 1 320 | kernel_size: 3 321 | group: 256 322 | engine: CAFFE 323 | weight_filler { 324 | type: "msra" 325 | } 326 | bias_filler { 327 | type: "constant" 328 | value: 0.0 329 | } 330 | } 331 | } 332 | layer { 333 | name: "conv5/dw/relu" 334 | type: "ReLU" 335 | bottom: "conv5/dw" 336 | top: "conv5/dw" 337 | } 338 | layer { 339 | name: "conv5" 340 | type: "Convolution" 341 | bottom: "conv5/dw" 342 | top: "conv5" 343 | param { 344 | lr_mult: 1.0 345 | decay_mult: 1.0 346 | } 347 | param { 348 | lr_mult: 2.0 349 | decay_mult: 0.0 350 | } 351 | convolution_param { 352 | num_output: 256 353 | kernel_size: 1 354 | weight_filler { 355 | type: "msra" 356 | } 357 | bias_filler { 358 | type: "constant" 359 | value: 0.0 360 | } 361 | } 362 | } 363 | layer { 364 | name: "conv5/relu" 365 | type: "ReLU" 366 | bottom: "conv5" 367 | top: "conv5" 368 | } 369 | layer { 370 | name: "conv6/dw" 371 | type: "Convolution" 372 | bottom: "conv5" 373 | top: "conv6/dw" 374 | param { 375 | lr_mult: 1.0 376 | decay_mult: 1.0 377 | } 378 | param { 379 | lr_mult: 2.0 380 | decay_mult: 0.0 381 | } 382 | convolution_param { 383 | num_output: 256 384 | pad: 1 385 | kernel_size: 3 386 | stride: 2 387 | group: 256 388 | engine: CAFFE 389 | weight_filler { 390 | type: "msra" 391 | } 392 | bias_filler { 393 | type: "constant" 394 | value: 0.0 395 | } 396 | } 397 | } 398 | layer { 399 | name: "conv6/dw/relu" 400 | type: "ReLU" 401 | bottom: "conv6/dw" 402 | top: "conv6/dw" 403 | } 404 | layer { 405 | name: "conv6" 406 | type: "Convolution" 407 | bottom: "conv6/dw" 408 | top: "conv6" 409 | param { 410 | lr_mult: 1.0 411 | decay_mult: 1.0 412 | } 413 | param { 414 | lr_mult: 2.0 415 | decay_mult: 0.0 416 | } 417 | convolution_param { 418 | num_output: 512 419 | kernel_size: 1 420 | weight_filler { 421 | type: "msra" 422 | } 423 | bias_filler { 424 | type: "constant" 425 | value: 0.0 426 | } 427 | } 428 | } 429 | layer { 430 | name: "conv6/relu" 431 | type: "ReLU" 432 | bottom: "conv6" 433 | top: "conv6" 434 | } 435 | layer { 436 | name: "conv7/dw" 437 | type: "Convolution" 438 | bottom: "conv6" 439 | top: "conv7/dw" 440 | param { 441 | lr_mult: 1.0 442 | decay_mult: 1.0 443 | } 444 | param { 445 | lr_mult: 2.0 446 | decay_mult: 0.0 447 | } 448 | convolution_param { 449 | num_output: 512 450 | pad: 1 451 | kernel_size: 3 452 | group: 512 453 | engine: CAFFE 454 | weight_filler { 455 | type: "msra" 456 | } 457 | bias_filler { 458 | type: "constant" 459 | value: 0.0 460 | } 461 | } 462 | } 463 | layer { 464 | name: "conv7/dw/relu" 465 | type: "ReLU" 466 | bottom: "conv7/dw" 467 | top: "conv7/dw" 468 | } 469 | layer { 470 | name: "conv7" 471 | type: "Convolution" 472 | bottom: "conv7/dw" 473 | top: "conv7" 474 | param { 475 | lr_mult: 1.0 476 | decay_mult: 1.0 477 | } 478 | param { 479 | lr_mult: 2.0 480 | decay_mult: 0.0 481 | } 482 | convolution_param { 483 | num_output: 512 484 | kernel_size: 1 485 | weight_filler { 486 | type: "msra" 487 | } 488 | bias_filler { 489 | type: "constant" 490 | value: 0.0 491 | } 492 | } 493 | } 494 | layer { 495 | name: "conv7/relu" 496 | type: "ReLU" 497 | bottom: "conv7" 498 | top: "conv7" 499 | } 500 | layer { 501 | name: "conv8/dw" 502 | type: "Convolution" 503 | bottom: "conv7" 504 | top: "conv8/dw" 505 | param { 506 | lr_mult: 1.0 507 | decay_mult: 1.0 508 | } 509 | param { 510 | lr_mult: 2.0 511 | decay_mult: 0.0 512 | } 513 | convolution_param { 514 | num_output: 512 515 | pad: 1 516 | kernel_size: 3 517 | group: 512 518 | engine: CAFFE 519 | weight_filler { 520 | type: "msra" 521 | } 522 | bias_filler { 523 | type: "constant" 524 | value: 0.0 525 | } 526 | } 527 | } 528 | layer { 529 | name: "conv8/dw/relu" 530 | type: "ReLU" 531 | bottom: "conv8/dw" 532 | top: "conv8/dw" 533 | } 534 | layer { 535 | name: "conv8" 536 | type: "Convolution" 537 | bottom: "conv8/dw" 538 | top: "conv8" 539 | param { 540 | lr_mult: 1.0 541 | decay_mult: 1.0 542 | } 543 | param { 544 | lr_mult: 2.0 545 | decay_mult: 0.0 546 | } 547 | convolution_param { 548 | num_output: 512 549 | kernel_size: 1 550 | weight_filler { 551 | type: "msra" 552 | } 553 | bias_filler { 554 | type: "constant" 555 | value: 0.0 556 | } 557 | } 558 | } 559 | layer { 560 | name: "conv8/relu" 561 | type: "ReLU" 562 | bottom: "conv8" 563 | top: "conv8" 564 | } 565 | layer { 566 | name: "conv9/dw" 567 | type: "Convolution" 568 | bottom: "conv8" 569 | top: "conv9/dw" 570 | param { 571 | lr_mult: 1.0 572 | decay_mult: 1.0 573 | } 574 | param { 575 | lr_mult: 2.0 576 | decay_mult: 0.0 577 | } 578 | convolution_param { 579 | num_output: 512 580 | pad: 1 581 | kernel_size: 3 582 | group: 512 583 | engine: CAFFE 584 | weight_filler { 585 | type: "msra" 586 | } 587 | bias_filler { 588 | type: "constant" 589 | value: 0.0 590 | } 591 | } 592 | } 593 | layer { 594 | name: "conv9/dw/relu" 595 | type: "ReLU" 596 | bottom: "conv9/dw" 597 | top: "conv9/dw" 598 | } 599 | layer { 600 | name: "conv9" 601 | type: "Convolution" 602 | bottom: "conv9/dw" 603 | top: "conv9" 604 | param { 605 | lr_mult: 1.0 606 | decay_mult: 1.0 607 | } 608 | param { 609 | lr_mult: 2.0 610 | decay_mult: 0.0 611 | } 612 | convolution_param { 613 | num_output: 512 614 | kernel_size: 1 615 | weight_filler { 616 | type: "msra" 617 | } 618 | bias_filler { 619 | type: "constant" 620 | value: 0.0 621 | } 622 | } 623 | } 624 | layer { 625 | name: "conv9/relu" 626 | type: "ReLU" 627 | bottom: "conv9" 628 | top: "conv9" 629 | } 630 | layer { 631 | name: "conv10/dw" 632 | type: "Convolution" 633 | bottom: "conv9" 634 | top: "conv10/dw" 635 | param { 636 | lr_mult: 1.0 637 | decay_mult: 1.0 638 | } 639 | param { 640 | lr_mult: 2.0 641 | decay_mult: 0.0 642 | } 643 | convolution_param { 644 | num_output: 512 645 | pad: 1 646 | kernel_size: 3 647 | group: 512 648 | engine: CAFFE 649 | weight_filler { 650 | type: "msra" 651 | } 652 | bias_filler { 653 | type: "constant" 654 | value: 0.0 655 | } 656 | } 657 | } 658 | layer { 659 | name: "conv10/dw/relu" 660 | type: "ReLU" 661 | bottom: "conv10/dw" 662 | top: "conv10/dw" 663 | } 664 | layer { 665 | name: "conv10" 666 | type: "Convolution" 667 | bottom: "conv10/dw" 668 | top: "conv10" 669 | param { 670 | lr_mult: 1.0 671 | decay_mult: 1.0 672 | } 673 | param { 674 | lr_mult: 2.0 675 | decay_mult: 0.0 676 | } 677 | convolution_param { 678 | num_output: 512 679 | kernel_size: 1 680 | weight_filler { 681 | type: "msra" 682 | } 683 | bias_filler { 684 | type: "constant" 685 | value: 0.0 686 | } 687 | } 688 | } 689 | layer { 690 | name: "conv10/relu" 691 | type: "ReLU" 692 | bottom: "conv10" 693 | top: "conv10" 694 | } 695 | layer { 696 | name: "conv11/dw" 697 | type: "Convolution" 698 | bottom: "conv10" 699 | top: "conv11/dw" 700 | param { 701 | lr_mult: 1.0 702 | decay_mult: 1.0 703 | } 704 | param { 705 | lr_mult: 2.0 706 | decay_mult: 0.0 707 | } 708 | convolution_param { 709 | num_output: 512 710 | pad: 1 711 | kernel_size: 3 712 | group: 512 713 | engine: CAFFE 714 | weight_filler { 715 | type: "msra" 716 | } 717 | bias_filler { 718 | type: "constant" 719 | value: 0.0 720 | } 721 | } 722 | } 723 | layer { 724 | name: "conv11/dw/relu" 725 | type: "ReLU" 726 | bottom: "conv11/dw" 727 | top: "conv11/dw" 728 | } 729 | layer { 730 | name: "conv11" 731 | type: "Convolution" 732 | bottom: "conv11/dw" 733 | top: "conv11" 734 | param { 735 | lr_mult: 1.0 736 | decay_mult: 1.0 737 | } 738 | param { 739 | lr_mult: 2.0 740 | decay_mult: 0.0 741 | } 742 | convolution_param { 743 | num_output: 512 744 | kernel_size: 1 745 | weight_filler { 746 | type: "msra" 747 | } 748 | bias_filler { 749 | type: "constant" 750 | value: 0.0 751 | } 752 | } 753 | } 754 | layer { 755 | name: "conv11/relu" 756 | type: "ReLU" 757 | bottom: "conv11" 758 | top: "conv11" 759 | } 760 | layer { 761 | name: "conv12/dw" 762 | type: "Convolution" 763 | bottom: "conv11" 764 | top: "conv12/dw" 765 | param { 766 | lr_mult: 1.0 767 | decay_mult: 1.0 768 | } 769 | param { 770 | lr_mult: 2.0 771 | decay_mult: 0.0 772 | } 773 | convolution_param { 774 | num_output: 512 775 | pad: 1 776 | kernel_size: 3 777 | stride: 2 778 | group: 512 779 | engine: CAFFE 780 | weight_filler { 781 | type: "msra" 782 | } 783 | bias_filler { 784 | type: "constant" 785 | value: 0.0 786 | } 787 | } 788 | } 789 | layer { 790 | name: "conv12/dw/relu" 791 | type: "ReLU" 792 | bottom: "conv12/dw" 793 | top: "conv12/dw" 794 | } 795 | layer { 796 | name: "conv12" 797 | type: "Convolution" 798 | bottom: "conv12/dw" 799 | top: "conv12" 800 | param { 801 | lr_mult: 1.0 802 | decay_mult: 1.0 803 | } 804 | param { 805 | lr_mult: 2.0 806 | decay_mult: 0.0 807 | } 808 | convolution_param { 809 | num_output: 1024 810 | kernel_size: 1 811 | weight_filler { 812 | type: "msra" 813 | } 814 | bias_filler { 815 | type: "constant" 816 | value: 0.0 817 | } 818 | } 819 | } 820 | layer { 821 | name: "conv12/relu" 822 | type: "ReLU" 823 | bottom: "conv12" 824 | top: "conv12" 825 | } 826 | layer { 827 | name: "conv13/dw" 828 | type: "Convolution" 829 | bottom: "conv12" 830 | top: "conv13/dw" 831 | param { 832 | lr_mult: 1.0 833 | decay_mult: 1.0 834 | } 835 | param { 836 | lr_mult: 2.0 837 | decay_mult: 0.0 838 | } 839 | convolution_param { 840 | num_output: 1024 841 | pad: 1 842 | kernel_size: 3 843 | group: 1024 844 | engine: CAFFE 845 | weight_filler { 846 | type: "msra" 847 | } 848 | bias_filler { 849 | type: "constant" 850 | value: 0.0 851 | } 852 | } 853 | } 854 | layer { 855 | name: "conv13/dw/relu" 856 | type: "ReLU" 857 | bottom: "conv13/dw" 858 | top: "conv13/dw" 859 | } 860 | layer { 861 | name: "conv13" 862 | type: "Convolution" 863 | bottom: "conv13/dw" 864 | top: "conv13" 865 | param { 866 | lr_mult: 1.0 867 | decay_mult: 1.0 868 | } 869 | param { 870 | lr_mult: 2.0 871 | decay_mult: 0.0 872 | } 873 | convolution_param { 874 | num_output: 1024 875 | kernel_size: 1 876 | weight_filler { 877 | type: "msra" 878 | } 879 | bias_filler { 880 | type: "constant" 881 | value: 0.0 882 | } 883 | } 884 | } 885 | layer { 886 | name: "conv13/relu" 887 | type: "ReLU" 888 | bottom: "conv13" 889 | top: "conv13" 890 | } 891 | layer { 892 | name: "conv14_1" 893 | type: "Convolution" 894 | bottom: "conv13" 895 | top: "conv14_1" 896 | param { 897 | lr_mult: 1.0 898 | decay_mult: 1.0 899 | } 900 | param { 901 | lr_mult: 2.0 902 | decay_mult: 0.0 903 | } 904 | convolution_param { 905 | num_output: 256 906 | kernel_size: 1 907 | weight_filler { 908 | type: "msra" 909 | } 910 | bias_filler { 911 | type: "constant" 912 | value: 0.0 913 | } 914 | } 915 | } 916 | layer { 917 | name: "conv14_1/relu" 918 | type: "ReLU" 919 | bottom: "conv14_1" 920 | top: "conv14_1" 921 | } 922 | layer { 923 | name: "conv14_2" 924 | type: "Convolution" 925 | bottom: "conv14_1" 926 | top: "conv14_2" 927 | param { 928 | lr_mult: 1.0 929 | decay_mult: 1.0 930 | } 931 | param { 932 | lr_mult: 2.0 933 | decay_mult: 0.0 934 | } 935 | convolution_param { 936 | num_output: 512 937 | pad: 1 938 | kernel_size: 3 939 | stride: 2 940 | weight_filler { 941 | type: "msra" 942 | } 943 | bias_filler { 944 | type: "constant" 945 | value: 0.0 946 | } 947 | } 948 | } 949 | layer { 950 | name: "conv14_2/relu" 951 | type: "ReLU" 952 | bottom: "conv14_2" 953 | top: "conv14_2" 954 | } 955 | layer { 956 | name: "conv15_1" 957 | type: "Convolution" 958 | bottom: "conv14_2" 959 | top: "conv15_1" 960 | param { 961 | lr_mult: 1.0 962 | decay_mult: 1.0 963 | } 964 | param { 965 | lr_mult: 2.0 966 | decay_mult: 0.0 967 | } 968 | convolution_param { 969 | num_output: 128 970 | kernel_size: 1 971 | weight_filler { 972 | type: "msra" 973 | } 974 | bias_filler { 975 | type: "constant" 976 | value: 0.0 977 | } 978 | } 979 | } 980 | layer { 981 | name: "conv15_1/relu" 982 | type: "ReLU" 983 | bottom: "conv15_1" 984 | top: "conv15_1" 985 | } 986 | layer { 987 | name: "conv15_2" 988 | type: "Convolution" 989 | bottom: "conv15_1" 990 | top: "conv15_2" 991 | param { 992 | lr_mult: 1.0 993 | decay_mult: 1.0 994 | } 995 | param { 996 | lr_mult: 2.0 997 | decay_mult: 0.0 998 | } 999 | convolution_param { 1000 | num_output: 256 1001 | pad: 1 1002 | kernel_size: 3 1003 | stride: 2 1004 | weight_filler { 1005 | type: "msra" 1006 | } 1007 | bias_filler { 1008 | type: "constant" 1009 | value: 0.0 1010 | } 1011 | } 1012 | } 1013 | layer { 1014 | name: "conv15_2/relu" 1015 | type: "ReLU" 1016 | bottom: "conv15_2" 1017 | top: "conv15_2" 1018 | } 1019 | layer { 1020 | name: "conv16_1" 1021 | type: "Convolution" 1022 | bottom: "conv15_2" 1023 | top: "conv16_1" 1024 | param { 1025 | lr_mult: 1.0 1026 | decay_mult: 1.0 1027 | } 1028 | param { 1029 | lr_mult: 2.0 1030 | decay_mult: 0.0 1031 | } 1032 | convolution_param { 1033 | num_output: 128 1034 | kernel_size: 1 1035 | weight_filler { 1036 | type: "msra" 1037 | } 1038 | bias_filler { 1039 | type: "constant" 1040 | value: 0.0 1041 | } 1042 | } 1043 | } 1044 | layer { 1045 | name: "conv16_1/relu" 1046 | type: "ReLU" 1047 | bottom: "conv16_1" 1048 | top: "conv16_1" 1049 | } 1050 | layer { 1051 | name: "conv16_2" 1052 | type: "Convolution" 1053 | bottom: "conv16_1" 1054 | top: "conv16_2" 1055 | param { 1056 | lr_mult: 1.0 1057 | decay_mult: 1.0 1058 | } 1059 | param { 1060 | lr_mult: 2.0 1061 | decay_mult: 0.0 1062 | } 1063 | convolution_param { 1064 | num_output: 256 1065 | pad: 1 1066 | kernel_size: 3 1067 | stride: 2 1068 | weight_filler { 1069 | type: "msra" 1070 | } 1071 | bias_filler { 1072 | type: "constant" 1073 | value: 0.0 1074 | } 1075 | } 1076 | } 1077 | layer { 1078 | name: "conv16_2/relu" 1079 | type: "ReLU" 1080 | bottom: "conv16_2" 1081 | top: "conv16_2" 1082 | } 1083 | layer { 1084 | name: "conv17_1" 1085 | type: "Convolution" 1086 | bottom: "conv16_2" 1087 | top: "conv17_1" 1088 | param { 1089 | lr_mult: 1.0 1090 | decay_mult: 1.0 1091 | } 1092 | param { 1093 | lr_mult: 2.0 1094 | decay_mult: 0.0 1095 | } 1096 | convolution_param { 1097 | num_output: 64 1098 | kernel_size: 1 1099 | weight_filler { 1100 | type: "msra" 1101 | } 1102 | bias_filler { 1103 | type: "constant" 1104 | value: 0.0 1105 | } 1106 | } 1107 | } 1108 | layer { 1109 | name: "conv17_1/relu" 1110 | type: "ReLU" 1111 | bottom: "conv17_1" 1112 | top: "conv17_1" 1113 | } 1114 | layer { 1115 | name: "conv17_2" 1116 | type: "Convolution" 1117 | bottom: "conv17_1" 1118 | top: "conv17_2" 1119 | param { 1120 | lr_mult: 1.0 1121 | decay_mult: 1.0 1122 | } 1123 | param { 1124 | lr_mult: 2.0 1125 | decay_mult: 0.0 1126 | } 1127 | convolution_param { 1128 | num_output: 128 1129 | pad: 1 1130 | kernel_size: 3 1131 | stride: 2 1132 | weight_filler { 1133 | type: "msra" 1134 | } 1135 | bias_filler { 1136 | type: "constant" 1137 | value: 0.0 1138 | } 1139 | } 1140 | } 1141 | layer { 1142 | name: "conv17_2/relu" 1143 | type: "ReLU" 1144 | bottom: "conv17_2" 1145 | top: "conv17_2" 1146 | } 1147 | layer { 1148 | name: "conv11_mbox_loc" 1149 | type: "Convolution" 1150 | bottom: "conv11" 1151 | top: "conv11_mbox_loc" 1152 | param { 1153 | lr_mult: 1.0 1154 | decay_mult: 1.0 1155 | } 1156 | param { 1157 | lr_mult: 2.0 1158 | decay_mult: 0.0 1159 | } 1160 | convolution_param { 1161 | num_output: 12 1162 | kernel_size: 1 1163 | weight_filler { 1164 | type: "msra" 1165 | } 1166 | bias_filler { 1167 | type: "constant" 1168 | value: 0.0 1169 | } 1170 | } 1171 | } 1172 | layer { 1173 | name: "conv11_mbox_loc_perm" 1174 | type: "Permute" 1175 | bottom: "conv11_mbox_loc" 1176 | top: "conv11_mbox_loc_perm" 1177 | permute_param { 1178 | order: 0 1179 | order: 2 1180 | order: 3 1181 | order: 1 1182 | } 1183 | } 1184 | layer { 1185 | name: "conv11_mbox_loc_flat" 1186 | type: "Flatten" 1187 | bottom: "conv11_mbox_loc_perm" 1188 | top: "conv11_mbox_loc_flat" 1189 | flatten_param { 1190 | axis: 1 1191 | } 1192 | } 1193 | layer { 1194 | name: "conv11_mbox_conf" 1195 | type: "Convolution" 1196 | bottom: "conv11" 1197 | top: "conv11_mbox_conf" 1198 | param { 1199 | lr_mult: 1.0 1200 | decay_mult: 1.0 1201 | } 1202 | param { 1203 | lr_mult: 2.0 1204 | decay_mult: 0.0 1205 | } 1206 | convolution_param { 1207 | num_output: 18 1208 | kernel_size: 1 1209 | weight_filler { 1210 | type: "msra" 1211 | } 1212 | bias_filler { 1213 | type: "constant" 1214 | value: 0.0 1215 | } 1216 | } 1217 | } 1218 | layer { 1219 | name: "conv11_mbox_conf_perm" 1220 | type: "Permute" 1221 | bottom: "conv11_mbox_conf" 1222 | top: "conv11_mbox_conf_perm" 1223 | permute_param { 1224 | order: 0 1225 | order: 2 1226 | order: 3 1227 | order: 1 1228 | } 1229 | } 1230 | layer { 1231 | name: "conv11_mbox_conf_flat" 1232 | type: "Flatten" 1233 | bottom: "conv11_mbox_conf_perm" 1234 | top: "conv11_mbox_conf_flat" 1235 | flatten_param { 1236 | axis: 1 1237 | } 1238 | } 1239 | layer { 1240 | name: "conv11_mbox_priorbox" 1241 | type: "PriorBox" 1242 | bottom: "conv11" 1243 | bottom: "data" 1244 | top: "conv11_mbox_priorbox" 1245 | prior_box_param { 1246 | min_size: 60.0 1247 | aspect_ratio: 2.0 1248 | flip: true 1249 | clip: false 1250 | variance: 0.1 1251 | variance: 0.1 1252 | variance: 0.2 1253 | variance: 0.2 1254 | offset: 0.5 1255 | } 1256 | } 1257 | layer { 1258 | name: "conv13_mbox_loc" 1259 | type: "Convolution" 1260 | bottom: "conv13" 1261 | top: "conv13_mbox_loc" 1262 | param { 1263 | lr_mult: 1.0 1264 | decay_mult: 1.0 1265 | } 1266 | param { 1267 | lr_mult: 2.0 1268 | decay_mult: 0.0 1269 | } 1270 | convolution_param { 1271 | num_output: 24 1272 | kernel_size: 1 1273 | weight_filler { 1274 | type: "msra" 1275 | } 1276 | bias_filler { 1277 | type: "constant" 1278 | value: 0.0 1279 | } 1280 | } 1281 | } 1282 | layer { 1283 | name: "conv13_mbox_loc_perm" 1284 | type: "Permute" 1285 | bottom: "conv13_mbox_loc" 1286 | top: "conv13_mbox_loc_perm" 1287 | permute_param { 1288 | order: 0 1289 | order: 2 1290 | order: 3 1291 | order: 1 1292 | } 1293 | } 1294 | layer { 1295 | name: "conv13_mbox_loc_flat" 1296 | type: "Flatten" 1297 | bottom: "conv13_mbox_loc_perm" 1298 | top: "conv13_mbox_loc_flat" 1299 | flatten_param { 1300 | axis: 1 1301 | } 1302 | } 1303 | layer { 1304 | name: "conv13_mbox_conf" 1305 | type: "Convolution" 1306 | bottom: "conv13" 1307 | top: "conv13_mbox_conf" 1308 | param { 1309 | lr_mult: 1.0 1310 | decay_mult: 1.0 1311 | } 1312 | param { 1313 | lr_mult: 2.0 1314 | decay_mult: 0.0 1315 | } 1316 | convolution_param { 1317 | num_output: 36 1318 | kernel_size: 1 1319 | weight_filler { 1320 | type: "msra" 1321 | } 1322 | bias_filler { 1323 | type: "constant" 1324 | value: 0.0 1325 | } 1326 | } 1327 | } 1328 | layer { 1329 | name: "conv13_mbox_conf_perm" 1330 | type: "Permute" 1331 | bottom: "conv13_mbox_conf" 1332 | top: "conv13_mbox_conf_perm" 1333 | permute_param { 1334 | order: 0 1335 | order: 2 1336 | order: 3 1337 | order: 1 1338 | } 1339 | } 1340 | layer { 1341 | name: "conv13_mbox_conf_flat" 1342 | type: "Flatten" 1343 | bottom: "conv13_mbox_conf_perm" 1344 | top: "conv13_mbox_conf_flat" 1345 | flatten_param { 1346 | axis: 1 1347 | } 1348 | } 1349 | layer { 1350 | name: "conv13_mbox_priorbox" 1351 | type: "PriorBox" 1352 | bottom: "conv13" 1353 | bottom: "data" 1354 | top: "conv13_mbox_priorbox" 1355 | prior_box_param { 1356 | min_size: 105.0 1357 | max_size: 150.0 1358 | aspect_ratio: 2.0 1359 | aspect_ratio: 3.0 1360 | flip: true 1361 | clip: false 1362 | variance: 0.1 1363 | variance: 0.1 1364 | variance: 0.2 1365 | variance: 0.2 1366 | offset: 0.5 1367 | } 1368 | } 1369 | layer { 1370 | name: "conv14_2_mbox_loc" 1371 | type: "Convolution" 1372 | bottom: "conv14_2" 1373 | top: "conv14_2_mbox_loc" 1374 | param { 1375 | lr_mult: 1.0 1376 | decay_mult: 1.0 1377 | } 1378 | param { 1379 | lr_mult: 2.0 1380 | decay_mult: 0.0 1381 | } 1382 | convolution_param { 1383 | num_output: 24 1384 | kernel_size: 1 1385 | weight_filler { 1386 | type: "msra" 1387 | } 1388 | bias_filler { 1389 | type: "constant" 1390 | value: 0.0 1391 | } 1392 | } 1393 | } 1394 | layer { 1395 | name: "conv14_2_mbox_loc_perm" 1396 | type: "Permute" 1397 | bottom: "conv14_2_mbox_loc" 1398 | top: "conv14_2_mbox_loc_perm" 1399 | permute_param { 1400 | order: 0 1401 | order: 2 1402 | order: 3 1403 | order: 1 1404 | } 1405 | } 1406 | layer { 1407 | name: "conv14_2_mbox_loc_flat" 1408 | type: "Flatten" 1409 | bottom: "conv14_2_mbox_loc_perm" 1410 | top: "conv14_2_mbox_loc_flat" 1411 | flatten_param { 1412 | axis: 1 1413 | } 1414 | } 1415 | layer { 1416 | name: "conv14_2_mbox_conf" 1417 | type: "Convolution" 1418 | bottom: "conv14_2" 1419 | top: "conv14_2_mbox_conf" 1420 | param { 1421 | lr_mult: 1.0 1422 | decay_mult: 1.0 1423 | } 1424 | param { 1425 | lr_mult: 2.0 1426 | decay_mult: 0.0 1427 | } 1428 | convolution_param { 1429 | num_output: 36 1430 | kernel_size: 1 1431 | weight_filler { 1432 | type: "msra" 1433 | } 1434 | bias_filler { 1435 | type: "constant" 1436 | value: 0.0 1437 | } 1438 | } 1439 | } 1440 | layer { 1441 | name: "conv14_2_mbox_conf_perm" 1442 | type: "Permute" 1443 | bottom: "conv14_2_mbox_conf" 1444 | top: "conv14_2_mbox_conf_perm" 1445 | permute_param { 1446 | order: 0 1447 | order: 2 1448 | order: 3 1449 | order: 1 1450 | } 1451 | } 1452 | layer { 1453 | name: "conv14_2_mbox_conf_flat" 1454 | type: "Flatten" 1455 | bottom: "conv14_2_mbox_conf_perm" 1456 | top: "conv14_2_mbox_conf_flat" 1457 | flatten_param { 1458 | axis: 1 1459 | } 1460 | } 1461 | layer { 1462 | name: "conv14_2_mbox_priorbox" 1463 | type: "PriorBox" 1464 | bottom: "conv14_2" 1465 | bottom: "data" 1466 | top: "conv14_2_mbox_priorbox" 1467 | prior_box_param { 1468 | min_size: 150.0 1469 | max_size: 195.0 1470 | aspect_ratio: 2.0 1471 | aspect_ratio: 3.0 1472 | flip: true 1473 | clip: false 1474 | variance: 0.1 1475 | variance: 0.1 1476 | variance: 0.2 1477 | variance: 0.2 1478 | offset: 0.5 1479 | } 1480 | } 1481 | layer { 1482 | name: "conv15_2_mbox_loc" 1483 | type: "Convolution" 1484 | bottom: "conv15_2" 1485 | top: "conv15_2_mbox_loc" 1486 | param { 1487 | lr_mult: 1.0 1488 | decay_mult: 1.0 1489 | } 1490 | param { 1491 | lr_mult: 2.0 1492 | decay_mult: 0.0 1493 | } 1494 | convolution_param { 1495 | num_output: 24 1496 | kernel_size: 1 1497 | weight_filler { 1498 | type: "msra" 1499 | } 1500 | bias_filler { 1501 | type: "constant" 1502 | value: 0.0 1503 | } 1504 | } 1505 | } 1506 | layer { 1507 | name: "conv15_2_mbox_loc_perm" 1508 | type: "Permute" 1509 | bottom: "conv15_2_mbox_loc" 1510 | top: "conv15_2_mbox_loc_perm" 1511 | permute_param { 1512 | order: 0 1513 | order: 2 1514 | order: 3 1515 | order: 1 1516 | } 1517 | } 1518 | layer { 1519 | name: "conv15_2_mbox_loc_flat" 1520 | type: "Flatten" 1521 | bottom: "conv15_2_mbox_loc_perm" 1522 | top: "conv15_2_mbox_loc_flat" 1523 | flatten_param { 1524 | axis: 1 1525 | } 1526 | } 1527 | layer { 1528 | name: "conv15_2_mbox_conf" 1529 | type: "Convolution" 1530 | bottom: "conv15_2" 1531 | top: "conv15_2_mbox_conf" 1532 | param { 1533 | lr_mult: 1.0 1534 | decay_mult: 1.0 1535 | } 1536 | param { 1537 | lr_mult: 2.0 1538 | decay_mult: 0.0 1539 | } 1540 | convolution_param { 1541 | num_output: 36 1542 | kernel_size: 1 1543 | weight_filler { 1544 | type: "msra" 1545 | } 1546 | bias_filler { 1547 | type: "constant" 1548 | value: 0.0 1549 | } 1550 | } 1551 | } 1552 | layer { 1553 | name: "conv15_2_mbox_conf_perm" 1554 | type: "Permute" 1555 | bottom: "conv15_2_mbox_conf" 1556 | top: "conv15_2_mbox_conf_perm" 1557 | permute_param { 1558 | order: 0 1559 | order: 2 1560 | order: 3 1561 | order: 1 1562 | } 1563 | } 1564 | layer { 1565 | name: "conv15_2_mbox_conf_flat" 1566 | type: "Flatten" 1567 | bottom: "conv15_2_mbox_conf_perm" 1568 | top: "conv15_2_mbox_conf_flat" 1569 | flatten_param { 1570 | axis: 1 1571 | } 1572 | } 1573 | layer { 1574 | name: "conv15_2_mbox_priorbox" 1575 | type: "PriorBox" 1576 | bottom: "conv15_2" 1577 | bottom: "data" 1578 | top: "conv15_2_mbox_priorbox" 1579 | prior_box_param { 1580 | min_size: 195.0 1581 | max_size: 240.0 1582 | aspect_ratio: 2.0 1583 | aspect_ratio: 3.0 1584 | flip: true 1585 | clip: false 1586 | variance: 0.1 1587 | variance: 0.1 1588 | variance: 0.2 1589 | variance: 0.2 1590 | offset: 0.5 1591 | } 1592 | } 1593 | layer { 1594 | name: "conv16_2_mbox_loc" 1595 | type: "Convolution" 1596 | bottom: "conv16_2" 1597 | top: "conv16_2_mbox_loc" 1598 | param { 1599 | lr_mult: 1.0 1600 | decay_mult: 1.0 1601 | } 1602 | param { 1603 | lr_mult: 2.0 1604 | decay_mult: 0.0 1605 | } 1606 | convolution_param { 1607 | num_output: 24 1608 | kernel_size: 1 1609 | weight_filler { 1610 | type: "msra" 1611 | } 1612 | bias_filler { 1613 | type: "constant" 1614 | value: 0.0 1615 | } 1616 | } 1617 | } 1618 | layer { 1619 | name: "conv16_2_mbox_loc_perm" 1620 | type: "Permute" 1621 | bottom: "conv16_2_mbox_loc" 1622 | top: "conv16_2_mbox_loc_perm" 1623 | permute_param { 1624 | order: 0 1625 | order: 2 1626 | order: 3 1627 | order: 1 1628 | } 1629 | } 1630 | layer { 1631 | name: "conv16_2_mbox_loc_flat" 1632 | type: "Flatten" 1633 | bottom: "conv16_2_mbox_loc_perm" 1634 | top: "conv16_2_mbox_loc_flat" 1635 | flatten_param { 1636 | axis: 1 1637 | } 1638 | } 1639 | layer { 1640 | name: "conv16_2_mbox_conf" 1641 | type: "Convolution" 1642 | bottom: "conv16_2" 1643 | top: "conv16_2_mbox_conf" 1644 | param { 1645 | lr_mult: 1.0 1646 | decay_mult: 1.0 1647 | } 1648 | param { 1649 | lr_mult: 2.0 1650 | decay_mult: 0.0 1651 | } 1652 | convolution_param { 1653 | num_output: 36 1654 | kernel_size: 1 1655 | weight_filler { 1656 | type: "msra" 1657 | } 1658 | bias_filler { 1659 | type: "constant" 1660 | value: 0.0 1661 | } 1662 | } 1663 | } 1664 | layer { 1665 | name: "conv16_2_mbox_conf_perm" 1666 | type: "Permute" 1667 | bottom: "conv16_2_mbox_conf" 1668 | top: "conv16_2_mbox_conf_perm" 1669 | permute_param { 1670 | order: 0 1671 | order: 2 1672 | order: 3 1673 | order: 1 1674 | } 1675 | } 1676 | layer { 1677 | name: "conv16_2_mbox_conf_flat" 1678 | type: "Flatten" 1679 | bottom: "conv16_2_mbox_conf_perm" 1680 | top: "conv16_2_mbox_conf_flat" 1681 | flatten_param { 1682 | axis: 1 1683 | } 1684 | } 1685 | layer { 1686 | name: "conv16_2_mbox_priorbox" 1687 | type: "PriorBox" 1688 | bottom: "conv16_2" 1689 | bottom: "data" 1690 | top: "conv16_2_mbox_priorbox" 1691 | prior_box_param { 1692 | min_size: 240.0 1693 | max_size: 285.0 1694 | aspect_ratio: 2.0 1695 | aspect_ratio: 3.0 1696 | flip: true 1697 | clip: false 1698 | variance: 0.1 1699 | variance: 0.1 1700 | variance: 0.2 1701 | variance: 0.2 1702 | offset: 0.5 1703 | } 1704 | } 1705 | layer { 1706 | name: "conv17_2_mbox_loc" 1707 | type: "Convolution" 1708 | bottom: "conv17_2" 1709 | top: "conv17_2_mbox_loc" 1710 | param { 1711 | lr_mult: 1.0 1712 | decay_mult: 1.0 1713 | } 1714 | param { 1715 | lr_mult: 2.0 1716 | decay_mult: 0.0 1717 | } 1718 | convolution_param { 1719 | num_output: 24 1720 | kernel_size: 1 1721 | weight_filler { 1722 | type: "msra" 1723 | } 1724 | bias_filler { 1725 | type: "constant" 1726 | value: 0.0 1727 | } 1728 | } 1729 | } 1730 | layer { 1731 | name: "conv17_2_mbox_loc_perm" 1732 | type: "Permute" 1733 | bottom: "conv17_2_mbox_loc" 1734 | top: "conv17_2_mbox_loc_perm" 1735 | permute_param { 1736 | order: 0 1737 | order: 2 1738 | order: 3 1739 | order: 1 1740 | } 1741 | } 1742 | layer { 1743 | name: "conv17_2_mbox_loc_flat" 1744 | type: "Flatten" 1745 | bottom: "conv17_2_mbox_loc_perm" 1746 | top: "conv17_2_mbox_loc_flat" 1747 | flatten_param { 1748 | axis: 1 1749 | } 1750 | } 1751 | layer { 1752 | name: "conv17_2_mbox_conf" 1753 | type: "Convolution" 1754 | bottom: "conv17_2" 1755 | top: "conv17_2_mbox_conf" 1756 | param { 1757 | lr_mult: 1.0 1758 | decay_mult: 1.0 1759 | } 1760 | param { 1761 | lr_mult: 2.0 1762 | decay_mult: 0.0 1763 | } 1764 | convolution_param { 1765 | num_output: 36 1766 | kernel_size: 1 1767 | weight_filler { 1768 | type: "msra" 1769 | } 1770 | bias_filler { 1771 | type: "constant" 1772 | value: 0.0 1773 | } 1774 | } 1775 | } 1776 | layer { 1777 | name: "conv17_2_mbox_conf_perm" 1778 | type: "Permute" 1779 | bottom: "conv17_2_mbox_conf" 1780 | top: "conv17_2_mbox_conf_perm" 1781 | permute_param { 1782 | order: 0 1783 | order: 2 1784 | order: 3 1785 | order: 1 1786 | } 1787 | } 1788 | layer { 1789 | name: "conv17_2_mbox_conf_flat" 1790 | type: "Flatten" 1791 | bottom: "conv17_2_mbox_conf_perm" 1792 | top: "conv17_2_mbox_conf_flat" 1793 | flatten_param { 1794 | axis: 1 1795 | } 1796 | } 1797 | layer { 1798 | name: "conv17_2_mbox_priorbox" 1799 | type: "PriorBox" 1800 | bottom: "conv17_2" 1801 | bottom: "data" 1802 | top: "conv17_2_mbox_priorbox" 1803 | prior_box_param { 1804 | min_size: 285.0 1805 | max_size: 300.0 1806 | aspect_ratio: 2.0 1807 | aspect_ratio: 3.0 1808 | flip: true 1809 | clip: false 1810 | variance: 0.1 1811 | variance: 0.1 1812 | variance: 0.2 1813 | variance: 0.2 1814 | offset: 0.5 1815 | } 1816 | } 1817 | layer { 1818 | name: "mbox_loc" 1819 | type: "Concat" 1820 | bottom: "conv11_mbox_loc_flat" 1821 | bottom: "conv13_mbox_loc_flat" 1822 | bottom: "conv14_2_mbox_loc_flat" 1823 | bottom: "conv15_2_mbox_loc_flat" 1824 | bottom: "conv16_2_mbox_loc_flat" 1825 | bottom: "conv17_2_mbox_loc_flat" 1826 | top: "mbox_loc" 1827 | concat_param { 1828 | axis: 1 1829 | } 1830 | } 1831 | layer { 1832 | name: "mbox_conf" 1833 | type: "Concat" 1834 | bottom: "conv11_mbox_conf_flat" 1835 | bottom: "conv13_mbox_conf_flat" 1836 | bottom: "conv14_2_mbox_conf_flat" 1837 | bottom: "conv15_2_mbox_conf_flat" 1838 | bottom: "conv16_2_mbox_conf_flat" 1839 | bottom: "conv17_2_mbox_conf_flat" 1840 | top: "mbox_conf" 1841 | concat_param { 1842 | axis: 1 1843 | } 1844 | } 1845 | layer { 1846 | name: "mbox_priorbox" 1847 | type: "Concat" 1848 | bottom: "conv11_mbox_priorbox" 1849 | bottom: "conv13_mbox_priorbox" 1850 | bottom: "conv14_2_mbox_priorbox" 1851 | bottom: "conv15_2_mbox_priorbox" 1852 | bottom: "conv16_2_mbox_priorbox" 1853 | bottom: "conv17_2_mbox_priorbox" 1854 | top: "mbox_priorbox" 1855 | concat_param { 1856 | axis: 2 1857 | } 1858 | } 1859 | layer { 1860 | name: "mbox_conf_reshape" 1861 | type: "Reshape" 1862 | bottom: "mbox_conf" 1863 | top: "mbox_conf_reshape" 1864 | reshape_param { 1865 | shape { 1866 | dim: 0 1867 | dim: -1 1868 | dim: 6 1869 | } 1870 | } 1871 | } 1872 | layer { 1873 | name: "mbox_conf_softmax" 1874 | type: "Softmax" 1875 | bottom: "mbox_conf_reshape" 1876 | top: "mbox_conf_softmax" 1877 | softmax_param { 1878 | axis: 2 1879 | } 1880 | } 1881 | layer { 1882 | name: "mbox_conf_flatten" 1883 | type: "Flatten" 1884 | bottom: "mbox_conf_softmax" 1885 | top: "mbox_conf_flatten" 1886 | flatten_param { 1887 | axis: 1 1888 | } 1889 | } 1890 | layer { 1891 | name: "detection_out" 1892 | type: "DetectionOutput" 1893 | bottom: "mbox_loc" 1894 | bottom: "mbox_conf_flatten" 1895 | bottom: "mbox_priorbox" 1896 | top: "detection_out" 1897 | include { 1898 | phase: TEST 1899 | } 1900 | detection_output_param { 1901 | num_classes: 6 1902 | share_location: true 1903 | background_label_id: 0 1904 | nms_param { 1905 | nms_threshold: 0.45 1906 | top_k: 100 1907 | } 1908 | code_type: CENTER_SIZE 1909 | keep_top_k: 100 1910 | confidence_threshold: 0.25 1911 | } 1912 | } 1913 | -------------------------------------------------------------------------------- /SSD/MobileNet/MobileNetSSD_deploy2.prototxt: -------------------------------------------------------------------------------- 1 | name: "MobileNet-SSD" 2 | input: "data" 3 | input_shape { 4 | dim: 1 5 | dim: 3 6 | dim: 300 7 | dim: 300 8 | } 9 | layer { 10 | name: "conv0" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv0" 14 | param { 15 | lr_mult: 1.0 16 | decay_mult: 1.0 17 | } 18 | param { 19 | lr_mult: 2.0 20 | decay_mult: 0.0 21 | } 22 | convolution_param { 23 | num_output: 32 24 | pad: 1 25 | kernel_size: 3 26 | stride: 2 27 | weight_filler { 28 | type: "msra" 29 | } 30 | bias_filler { 31 | type: "constant" 32 | value: 0.0 33 | } 34 | } 35 | } 36 | layer { 37 | name: "conv0/relu" 38 | type: "ReLU" 39 | bottom: "conv0" 40 | top: "conv0" 41 | } 42 | layer { 43 | name: "conv1/dw" 44 | type: "Convolution" 45 | bottom: "conv0" 46 | top: "conv1/dw" 47 | param { 48 | lr_mult: 1.0 49 | decay_mult: 1.0 50 | } 51 | param { 52 | lr_mult: 2.0 53 | decay_mult: 0.0 54 | } 55 | convolution_param { 56 | num_output: 32 57 | pad: 1 58 | kernel_size: 3 59 | group: 32 60 | engine: CAFFE 61 | weight_filler { 62 | type: "msra" 63 | } 64 | bias_filler { 65 | type: "constant" 66 | value: 0.0 67 | } 68 | } 69 | } 70 | layer { 71 | name: "conv1/dw/relu" 72 | type: "ReLU" 73 | bottom: "conv1/dw" 74 | top: "conv1/dw" 75 | } 76 | layer { 77 | name: "conv1" 78 | type: "Convolution" 79 | bottom: "conv1/dw" 80 | top: "conv1" 81 | param { 82 | lr_mult: 1.0 83 | decay_mult: 1.0 84 | } 85 | param { 86 | lr_mult: 2.0 87 | decay_mult: 0.0 88 | } 89 | convolution_param { 90 | num_output: 64 91 | kernel_size: 1 92 | weight_filler { 93 | type: "msra" 94 | } 95 | bias_filler { 96 | type: "constant" 97 | value: 0.0 98 | } 99 | } 100 | } 101 | layer { 102 | name: "conv1/relu" 103 | type: "ReLU" 104 | bottom: "conv1" 105 | top: "conv1" 106 | } 107 | layer { 108 | name: "conv2/dw" 109 | type: "Convolution" 110 | bottom: "conv1" 111 | top: "conv2/dw" 112 | param { 113 | lr_mult: 1.0 114 | decay_mult: 1.0 115 | } 116 | param { 117 | lr_mult: 2.0 118 | decay_mult: 0.0 119 | } 120 | convolution_param { 121 | num_output: 64 122 | pad: 1 123 | kernel_size: 3 124 | stride: 2 125 | group: 64 126 | engine: CAFFE 127 | weight_filler { 128 | type: "msra" 129 | } 130 | bias_filler { 131 | type: "constant" 132 | value: 0.0 133 | } 134 | } 135 | } 136 | layer { 137 | name: "conv2/dw/relu" 138 | type: "ReLU" 139 | bottom: "conv2/dw" 140 | top: "conv2/dw" 141 | } 142 | layer { 143 | name: "conv2" 144 | type: "Convolution" 145 | bottom: "conv2/dw" 146 | top: "conv2" 147 | param { 148 | lr_mult: 1.0 149 | decay_mult: 1.0 150 | } 151 | param { 152 | lr_mult: 2.0 153 | decay_mult: 0.0 154 | } 155 | convolution_param { 156 | num_output: 128 157 | kernel_size: 1 158 | weight_filler { 159 | type: "msra" 160 | } 161 | bias_filler { 162 | type: "constant" 163 | value: 0.0 164 | } 165 | } 166 | } 167 | layer { 168 | name: "conv2/relu" 169 | type: "ReLU" 170 | bottom: "conv2" 171 | top: "conv2" 172 | } 173 | layer { 174 | name: "conv3/dw" 175 | type: "Convolution" 176 | bottom: "conv2" 177 | top: "conv3/dw" 178 | param { 179 | lr_mult: 1.0 180 | decay_mult: 1.0 181 | } 182 | param { 183 | lr_mult: 2.0 184 | decay_mult: 0.0 185 | } 186 | convolution_param { 187 | num_output: 128 188 | pad: 1 189 | kernel_size: 3 190 | group: 128 191 | engine: CAFFE 192 | weight_filler { 193 | type: "msra" 194 | } 195 | bias_filler { 196 | type: "constant" 197 | value: 0.0 198 | } 199 | } 200 | } 201 | layer { 202 | name: "conv3/dw/relu" 203 | type: "ReLU" 204 | bottom: "conv3/dw" 205 | top: "conv3/dw" 206 | } 207 | layer { 208 | name: "conv3" 209 | type: "Convolution" 210 | bottom: "conv3/dw" 211 | top: "conv3" 212 | param { 213 | lr_mult: 1.0 214 | decay_mult: 1.0 215 | } 216 | param { 217 | lr_mult: 2.0 218 | decay_mult: 0.0 219 | } 220 | convolution_param { 221 | num_output: 128 222 | kernel_size: 1 223 | weight_filler { 224 | type: "msra" 225 | } 226 | bias_filler { 227 | type: "constant" 228 | value: 0.0 229 | } 230 | } 231 | } 232 | layer { 233 | name: "conv3/relu" 234 | type: "ReLU" 235 | bottom: "conv3" 236 | top: "conv3" 237 | } 238 | layer { 239 | name: "conv4/dw" 240 | type: "Convolution" 241 | bottom: "conv3" 242 | top: "conv4/dw" 243 | param { 244 | lr_mult: 1.0 245 | decay_mult: 1.0 246 | } 247 | param { 248 | lr_mult: 2.0 249 | decay_mult: 0.0 250 | } 251 | convolution_param { 252 | num_output: 128 253 | pad: 1 254 | kernel_size: 3 255 | stride: 2 256 | group: 128 257 | engine: CAFFE 258 | weight_filler { 259 | type: "msra" 260 | } 261 | bias_filler { 262 | type: "constant" 263 | value: 0.0 264 | } 265 | } 266 | } 267 | layer { 268 | name: "conv4/dw/relu" 269 | type: "ReLU" 270 | bottom: "conv4/dw" 271 | top: "conv4/dw" 272 | } 273 | layer { 274 | name: "conv4" 275 | type: "Convolution" 276 | bottom: "conv4/dw" 277 | top: "conv4" 278 | param { 279 | lr_mult: 1.0 280 | decay_mult: 1.0 281 | } 282 | param { 283 | lr_mult: 2.0 284 | decay_mult: 0.0 285 | } 286 | convolution_param { 287 | num_output: 256 288 | kernel_size: 1 289 | weight_filler { 290 | type: "msra" 291 | } 292 | bias_filler { 293 | type: "constant" 294 | value: 0.0 295 | } 296 | } 297 | } 298 | layer { 299 | name: "conv4/relu" 300 | type: "ReLU" 301 | bottom: "conv4" 302 | top: "conv4" 303 | } 304 | layer { 305 | name: "conv5/dw" 306 | type: "Convolution" 307 | bottom: "conv4" 308 | top: "conv5/dw" 309 | param { 310 | lr_mult: 1.0 311 | decay_mult: 1.0 312 | } 313 | param { 314 | lr_mult: 2.0 315 | decay_mult: 0.0 316 | } 317 | convolution_param { 318 | num_output: 256 319 | pad: 1 320 | kernel_size: 3 321 | group: 256 322 | engine: CAFFE 323 | weight_filler { 324 | type: "msra" 325 | } 326 | bias_filler { 327 | type: "constant" 328 | value: 0.0 329 | } 330 | } 331 | } 332 | layer { 333 | name: "conv5/dw/relu" 334 | type: "ReLU" 335 | bottom: "conv5/dw" 336 | top: "conv5/dw" 337 | } 338 | layer { 339 | name: "conv5" 340 | type: "Convolution" 341 | bottom: "conv5/dw" 342 | top: "conv5" 343 | param { 344 | lr_mult: 1.0 345 | decay_mult: 1.0 346 | } 347 | param { 348 | lr_mult: 2.0 349 | decay_mult: 0.0 350 | } 351 | convolution_param { 352 | num_output: 256 353 | kernel_size: 1 354 | weight_filler { 355 | type: "msra" 356 | } 357 | bias_filler { 358 | type: "constant" 359 | value: 0.0 360 | } 361 | } 362 | } 363 | layer { 364 | name: "conv5/relu" 365 | type: "ReLU" 366 | bottom: "conv5" 367 | top: "conv5" 368 | } 369 | layer { 370 | name: "conv6/dw" 371 | type: "Convolution" 372 | bottom: "conv5" 373 | top: "conv6/dw" 374 | param { 375 | lr_mult: 1.0 376 | decay_mult: 1.0 377 | } 378 | param { 379 | lr_mult: 2.0 380 | decay_mult: 0.0 381 | } 382 | convolution_param { 383 | num_output: 256 384 | pad: 1 385 | kernel_size: 3 386 | stride: 2 387 | group: 256 388 | engine: CAFFE 389 | weight_filler { 390 | type: "msra" 391 | } 392 | bias_filler { 393 | type: "constant" 394 | value: 0.0 395 | } 396 | } 397 | } 398 | layer { 399 | name: "conv6/dw/relu" 400 | type: "ReLU" 401 | bottom: "conv6/dw" 402 | top: "conv6/dw" 403 | } 404 | layer { 405 | name: "conv6" 406 | type: "Convolution" 407 | bottom: "conv6/dw" 408 | top: "conv6" 409 | param { 410 | lr_mult: 1.0 411 | decay_mult: 1.0 412 | } 413 | param { 414 | lr_mult: 2.0 415 | decay_mult: 0.0 416 | } 417 | convolution_param { 418 | num_output: 512 419 | kernel_size: 1 420 | weight_filler { 421 | type: "msra" 422 | } 423 | bias_filler { 424 | type: "constant" 425 | value: 0.0 426 | } 427 | } 428 | } 429 | layer { 430 | name: "conv6/relu" 431 | type: "ReLU" 432 | bottom: "conv6" 433 | top: "conv6" 434 | } 435 | layer { 436 | name: "conv7/dw" 437 | type: "Convolution" 438 | bottom: "conv6" 439 | top: "conv7/dw" 440 | param { 441 | lr_mult: 1.0 442 | decay_mult: 1.0 443 | } 444 | param { 445 | lr_mult: 2.0 446 | decay_mult: 0.0 447 | } 448 | convolution_param { 449 | num_output: 512 450 | pad: 1 451 | kernel_size: 3 452 | group: 512 453 | engine: CAFFE 454 | weight_filler { 455 | type: "msra" 456 | } 457 | bias_filler { 458 | type: "constant" 459 | value: 0.0 460 | } 461 | } 462 | } 463 | layer { 464 | name: "conv7/dw/relu" 465 | type: "ReLU" 466 | bottom: "conv7/dw" 467 | top: "conv7/dw" 468 | } 469 | layer { 470 | name: "conv7" 471 | type: "Convolution" 472 | bottom: "conv7/dw" 473 | top: "conv7" 474 | param { 475 | lr_mult: 1.0 476 | decay_mult: 1.0 477 | } 478 | param { 479 | lr_mult: 2.0 480 | decay_mult: 0.0 481 | } 482 | convolution_param { 483 | num_output: 512 484 | kernel_size: 1 485 | weight_filler { 486 | type: "msra" 487 | } 488 | bias_filler { 489 | type: "constant" 490 | value: 0.0 491 | } 492 | } 493 | } 494 | layer { 495 | name: "conv7/relu" 496 | type: "ReLU" 497 | bottom: "conv7" 498 | top: "conv7" 499 | } 500 | layer { 501 | name: "conv8/dw" 502 | type: "Convolution" 503 | bottom: "conv7" 504 | top: "conv8/dw" 505 | param { 506 | lr_mult: 1.0 507 | decay_mult: 1.0 508 | } 509 | param { 510 | lr_mult: 2.0 511 | decay_mult: 0.0 512 | } 513 | convolution_param { 514 | num_output: 512 515 | pad: 1 516 | kernel_size: 3 517 | group: 512 518 | engine: CAFFE 519 | weight_filler { 520 | type: "msra" 521 | } 522 | bias_filler { 523 | type: "constant" 524 | value: 0.0 525 | } 526 | } 527 | } 528 | layer { 529 | name: "conv8/dw/relu" 530 | type: "ReLU" 531 | bottom: "conv8/dw" 532 | top: "conv8/dw" 533 | } 534 | layer { 535 | name: "conv8" 536 | type: "Convolution" 537 | bottom: "conv8/dw" 538 | top: "conv8" 539 | param { 540 | lr_mult: 1.0 541 | decay_mult: 1.0 542 | } 543 | param { 544 | lr_mult: 2.0 545 | decay_mult: 0.0 546 | } 547 | convolution_param { 548 | num_output: 512 549 | kernel_size: 1 550 | weight_filler { 551 | type: "msra" 552 | } 553 | bias_filler { 554 | type: "constant" 555 | value: 0.0 556 | } 557 | } 558 | } 559 | layer { 560 | name: "conv8/relu" 561 | type: "ReLU" 562 | bottom: "conv8" 563 | top: "conv8" 564 | } 565 | layer { 566 | name: "conv9/dw" 567 | type: "Convolution" 568 | bottom: "conv8" 569 | top: "conv9/dw" 570 | param { 571 | lr_mult: 1.0 572 | decay_mult: 1.0 573 | } 574 | param { 575 | lr_mult: 2.0 576 | decay_mult: 0.0 577 | } 578 | convolution_param { 579 | num_output: 512 580 | pad: 1 581 | kernel_size: 3 582 | group: 512 583 | engine: CAFFE 584 | weight_filler { 585 | type: "msra" 586 | } 587 | bias_filler { 588 | type: "constant" 589 | value: 0.0 590 | } 591 | } 592 | } 593 | layer { 594 | name: "conv9/dw/relu" 595 | type: "ReLU" 596 | bottom: "conv9/dw" 597 | top: "conv9/dw" 598 | } 599 | layer { 600 | name: "conv9" 601 | type: "Convolution" 602 | bottom: "conv9/dw" 603 | top: "conv9" 604 | param { 605 | lr_mult: 1.0 606 | decay_mult: 1.0 607 | } 608 | param { 609 | lr_mult: 2.0 610 | decay_mult: 0.0 611 | } 612 | convolution_param { 613 | num_output: 512 614 | kernel_size: 1 615 | weight_filler { 616 | type: "msra" 617 | } 618 | bias_filler { 619 | type: "constant" 620 | value: 0.0 621 | } 622 | } 623 | } 624 | layer { 625 | name: "conv9/relu" 626 | type: "ReLU" 627 | bottom: "conv9" 628 | top: "conv9" 629 | } 630 | layer { 631 | name: "conv10/dw" 632 | type: "Convolution" 633 | bottom: "conv9" 634 | top: "conv10/dw" 635 | param { 636 | lr_mult: 1.0 637 | decay_mult: 1.0 638 | } 639 | param { 640 | lr_mult: 2.0 641 | decay_mult: 0.0 642 | } 643 | convolution_param { 644 | num_output: 512 645 | pad: 1 646 | kernel_size: 3 647 | group: 512 648 | engine: CAFFE 649 | weight_filler { 650 | type: "msra" 651 | } 652 | bias_filler { 653 | type: "constant" 654 | value: 0.0 655 | } 656 | } 657 | } 658 | layer { 659 | name: "conv10/dw/relu" 660 | type: "ReLU" 661 | bottom: "conv10/dw" 662 | top: "conv10/dw" 663 | } 664 | layer { 665 | name: "conv10" 666 | type: "Convolution" 667 | bottom: "conv10/dw" 668 | top: "conv10" 669 | param { 670 | lr_mult: 1.0 671 | decay_mult: 1.0 672 | } 673 | param { 674 | lr_mult: 2.0 675 | decay_mult: 0.0 676 | } 677 | convolution_param { 678 | num_output: 512 679 | kernel_size: 1 680 | weight_filler { 681 | type: "msra" 682 | } 683 | bias_filler { 684 | type: "constant" 685 | value: 0.0 686 | } 687 | } 688 | } 689 | layer { 690 | name: "conv10/relu" 691 | type: "ReLU" 692 | bottom: "conv10" 693 | top: "conv10" 694 | } 695 | layer { 696 | name: "conv11/dw" 697 | type: "Convolution" 698 | bottom: "conv10" 699 | top: "conv11/dw" 700 | param { 701 | lr_mult: 1.0 702 | decay_mult: 1.0 703 | } 704 | param { 705 | lr_mult: 2.0 706 | decay_mult: 0.0 707 | } 708 | convolution_param { 709 | num_output: 512 710 | pad: 1 711 | kernel_size: 3 712 | group: 512 713 | engine: CAFFE 714 | weight_filler { 715 | type: "msra" 716 | } 717 | bias_filler { 718 | type: "constant" 719 | value: 0.0 720 | } 721 | } 722 | } 723 | layer { 724 | name: "conv11/dw/relu" 725 | type: "ReLU" 726 | bottom: "conv11/dw" 727 | top: "conv11/dw" 728 | } 729 | layer { 730 | name: "conv11" 731 | type: "Convolution" 732 | bottom: "conv11/dw" 733 | top: "conv11" 734 | param { 735 | lr_mult: 1.0 736 | decay_mult: 1.0 737 | } 738 | param { 739 | lr_mult: 2.0 740 | decay_mult: 0.0 741 | } 742 | convolution_param { 743 | num_output: 512 744 | kernel_size: 1 745 | weight_filler { 746 | type: "msra" 747 | } 748 | bias_filler { 749 | type: "constant" 750 | value: 0.0 751 | } 752 | } 753 | } 754 | layer { 755 | name: "conv11/relu" 756 | type: "ReLU" 757 | bottom: "conv11" 758 | top: "conv11" 759 | } 760 | layer { 761 | name: "conv12/dw" 762 | type: "Convolution" 763 | bottom: "conv11" 764 | top: "conv12/dw" 765 | param { 766 | lr_mult: 1.0 767 | decay_mult: 1.0 768 | } 769 | param { 770 | lr_mult: 2.0 771 | decay_mult: 0.0 772 | } 773 | convolution_param { 774 | num_output: 512 775 | pad: 1 776 | kernel_size: 3 777 | stride: 2 778 | group: 512 779 | engine: CAFFE 780 | weight_filler { 781 | type: "msra" 782 | } 783 | bias_filler { 784 | type: "constant" 785 | value: 0.0 786 | } 787 | } 788 | } 789 | layer { 790 | name: "conv12/dw/relu" 791 | type: "ReLU" 792 | bottom: "conv12/dw" 793 | top: "conv12/dw" 794 | } 795 | layer { 796 | name: "conv12" 797 | type: "Convolution" 798 | bottom: "conv12/dw" 799 | top: "conv12" 800 | param { 801 | lr_mult: 1.0 802 | decay_mult: 1.0 803 | } 804 | param { 805 | lr_mult: 2.0 806 | decay_mult: 0.0 807 | } 808 | convolution_param { 809 | num_output: 1024 810 | kernel_size: 1 811 | weight_filler { 812 | type: "msra" 813 | } 814 | bias_filler { 815 | type: "constant" 816 | value: 0.0 817 | } 818 | } 819 | } 820 | layer { 821 | name: "conv12/relu" 822 | type: "ReLU" 823 | bottom: "conv12" 824 | top: "conv12" 825 | } 826 | layer { 827 | name: "conv13/dw" 828 | type: "Convolution" 829 | bottom: "conv12" 830 | top: "conv13/dw" 831 | param { 832 | lr_mult: 1.0 833 | decay_mult: 1.0 834 | } 835 | param { 836 | lr_mult: 2.0 837 | decay_mult: 0.0 838 | } 839 | convolution_param { 840 | num_output: 1024 841 | pad: 1 842 | kernel_size: 3 843 | group: 1024 844 | engine: CAFFE 845 | weight_filler { 846 | type: "msra" 847 | } 848 | bias_filler { 849 | type: "constant" 850 | value: 0.0 851 | } 852 | } 853 | } 854 | layer { 855 | name: "conv13/dw/relu" 856 | type: "ReLU" 857 | bottom: "conv13/dw" 858 | top: "conv13/dw" 859 | } 860 | layer { 861 | name: "conv13" 862 | type: "Convolution" 863 | bottom: "conv13/dw" 864 | top: "conv13" 865 | param { 866 | lr_mult: 1.0 867 | decay_mult: 1.0 868 | } 869 | param { 870 | lr_mult: 2.0 871 | decay_mult: 0.0 872 | } 873 | convolution_param { 874 | num_output: 1024 875 | kernel_size: 1 876 | weight_filler { 877 | type: "msra" 878 | } 879 | bias_filler { 880 | type: "constant" 881 | value: 0.0 882 | } 883 | } 884 | } 885 | layer { 886 | name: "conv13/relu" 887 | type: "ReLU" 888 | bottom: "conv13" 889 | top: "conv13" 890 | } 891 | layer { 892 | name: "conv14_1" 893 | type: "Convolution" 894 | bottom: "conv13" 895 | top: "conv14_1" 896 | param { 897 | lr_mult: 1.0 898 | decay_mult: 1.0 899 | } 900 | param { 901 | lr_mult: 2.0 902 | decay_mult: 0.0 903 | } 904 | convolution_param { 905 | num_output: 256 906 | kernel_size: 1 907 | weight_filler { 908 | type: "msra" 909 | } 910 | bias_filler { 911 | type: "constant" 912 | value: 0.0 913 | } 914 | } 915 | } 916 | layer { 917 | name: "conv14_1/relu" 918 | type: "ReLU" 919 | bottom: "conv14_1" 920 | top: "conv14_1" 921 | } 922 | layer { 923 | name: "conv14_2" 924 | type: "Convolution" 925 | bottom: "conv14_1" 926 | top: "conv14_2" 927 | param { 928 | lr_mult: 1.0 929 | decay_mult: 1.0 930 | } 931 | param { 932 | lr_mult: 2.0 933 | decay_mult: 0.0 934 | } 935 | convolution_param { 936 | num_output: 512 937 | pad: 1 938 | kernel_size: 3 939 | stride: 2 940 | weight_filler { 941 | type: "msra" 942 | } 943 | bias_filler { 944 | type: "constant" 945 | value: 0.0 946 | } 947 | } 948 | } 949 | layer { 950 | name: "conv14_2/relu" 951 | type: "ReLU" 952 | bottom: "conv14_2" 953 | top: "conv14_2" 954 | } 955 | layer { 956 | name: "conv15_1" 957 | type: "Convolution" 958 | bottom: "conv14_2" 959 | top: "conv15_1" 960 | param { 961 | lr_mult: 1.0 962 | decay_mult: 1.0 963 | } 964 | param { 965 | lr_mult: 2.0 966 | decay_mult: 0.0 967 | } 968 | convolution_param { 969 | num_output: 128 970 | kernel_size: 1 971 | weight_filler { 972 | type: "msra" 973 | } 974 | bias_filler { 975 | type: "constant" 976 | value: 0.0 977 | } 978 | } 979 | } 980 | layer { 981 | name: "conv15_1/relu" 982 | type: "ReLU" 983 | bottom: "conv15_1" 984 | top: "conv15_1" 985 | } 986 | layer { 987 | name: "conv15_2" 988 | type: "Convolution" 989 | bottom: "conv15_1" 990 | top: "conv15_2" 991 | param { 992 | lr_mult: 1.0 993 | decay_mult: 1.0 994 | } 995 | param { 996 | lr_mult: 2.0 997 | decay_mult: 0.0 998 | } 999 | convolution_param { 1000 | num_output: 256 1001 | pad: 1 1002 | kernel_size: 3 1003 | stride: 2 1004 | weight_filler { 1005 | type: "msra" 1006 | } 1007 | bias_filler { 1008 | type: "constant" 1009 | value: 0.0 1010 | } 1011 | } 1012 | } 1013 | layer { 1014 | name: "conv15_2/relu" 1015 | type: "ReLU" 1016 | bottom: "conv15_2" 1017 | top: "conv15_2" 1018 | } 1019 | layer { 1020 | name: "conv16_1" 1021 | type: "Convolution" 1022 | bottom: "conv15_2" 1023 | top: "conv16_1" 1024 | param { 1025 | lr_mult: 1.0 1026 | decay_mult: 1.0 1027 | } 1028 | param { 1029 | lr_mult: 2.0 1030 | decay_mult: 0.0 1031 | } 1032 | convolution_param { 1033 | num_output: 128 1034 | kernel_size: 1 1035 | weight_filler { 1036 | type: "msra" 1037 | } 1038 | bias_filler { 1039 | type: "constant" 1040 | value: 0.0 1041 | } 1042 | } 1043 | } 1044 | layer { 1045 | name: "conv16_1/relu" 1046 | type: "ReLU" 1047 | bottom: "conv16_1" 1048 | top: "conv16_1" 1049 | } 1050 | layer { 1051 | name: "conv16_2" 1052 | type: "Convolution" 1053 | bottom: "conv16_1" 1054 | top: "conv16_2" 1055 | param { 1056 | lr_mult: 1.0 1057 | decay_mult: 1.0 1058 | } 1059 | param { 1060 | lr_mult: 2.0 1061 | decay_mult: 0.0 1062 | } 1063 | convolution_param { 1064 | num_output: 256 1065 | pad: 1 1066 | kernel_size: 3 1067 | stride: 2 1068 | weight_filler { 1069 | type: "msra" 1070 | } 1071 | bias_filler { 1072 | type: "constant" 1073 | value: 0.0 1074 | } 1075 | } 1076 | } 1077 | layer { 1078 | name: "conv16_2/relu" 1079 | type: "ReLU" 1080 | bottom: "conv16_2" 1081 | top: "conv16_2" 1082 | } 1083 | layer { 1084 | name: "conv17_1" 1085 | type: "Convolution" 1086 | bottom: "conv16_2" 1087 | top: "conv17_1" 1088 | param { 1089 | lr_mult: 1.0 1090 | decay_mult: 1.0 1091 | } 1092 | param { 1093 | lr_mult: 2.0 1094 | decay_mult: 0.0 1095 | } 1096 | convolution_param { 1097 | num_output: 64 1098 | kernel_size: 1 1099 | weight_filler { 1100 | type: "msra" 1101 | } 1102 | bias_filler { 1103 | type: "constant" 1104 | value: 0.0 1105 | } 1106 | } 1107 | } 1108 | layer { 1109 | name: "conv17_1/relu" 1110 | type: "ReLU" 1111 | bottom: "conv17_1" 1112 | top: "conv17_1" 1113 | } 1114 | layer { 1115 | name: "conv17_2" 1116 | type: "Convolution" 1117 | bottom: "conv17_1" 1118 | top: "conv17_2" 1119 | param { 1120 | lr_mult: 1.0 1121 | decay_mult: 1.0 1122 | } 1123 | param { 1124 | lr_mult: 2.0 1125 | decay_mult: 0.0 1126 | } 1127 | convolution_param { 1128 | num_output: 128 1129 | pad: 1 1130 | kernel_size: 3 1131 | stride: 2 1132 | weight_filler { 1133 | type: "msra" 1134 | } 1135 | bias_filler { 1136 | type: "constant" 1137 | value: 0.0 1138 | } 1139 | } 1140 | } 1141 | layer { 1142 | name: "conv17_2/relu" 1143 | type: "ReLU" 1144 | bottom: "conv17_2" 1145 | top: "conv17_2" 1146 | } 1147 | layer { 1148 | name: "conv11_mbox_loc" 1149 | type: "Convolution" 1150 | bottom: "conv11" 1151 | top: "conv11_mbox_loc" 1152 | param { 1153 | lr_mult: 1.0 1154 | decay_mult: 1.0 1155 | } 1156 | param { 1157 | lr_mult: 2.0 1158 | decay_mult: 0.0 1159 | } 1160 | convolution_param { 1161 | num_output: 12 1162 | kernel_size: 1 1163 | weight_filler { 1164 | type: "msra" 1165 | } 1166 | bias_filler { 1167 | type: "constant" 1168 | value: 0.0 1169 | } 1170 | } 1171 | } 1172 | layer { 1173 | name: "conv11_mbox_loc_perm" 1174 | type: "Permute" 1175 | bottom: "conv11_mbox_loc" 1176 | top: "conv11_mbox_loc_perm" 1177 | permute_param { 1178 | order: 0 1179 | order: 2 1180 | order: 3 1181 | order: 1 1182 | } 1183 | } 1184 | layer { 1185 | name: "conv11_mbox_loc_flat" 1186 | type: "Flatten" 1187 | bottom: "conv11_mbox_loc_perm" 1188 | top: "conv11_mbox_loc_flat" 1189 | flatten_param { 1190 | axis: 1 1191 | } 1192 | } 1193 | layer { 1194 | name: "conv11_mbox_conf" 1195 | type: "Convolution" 1196 | bottom: "conv11" 1197 | top: "conv11_mbox_conf" 1198 | param { 1199 | lr_mult: 1.0 1200 | decay_mult: 1.0 1201 | } 1202 | param { 1203 | lr_mult: 2.0 1204 | decay_mult: 0.0 1205 | } 1206 | convolution_param { 1207 | num_output: 63 1208 | kernel_size: 1 1209 | weight_filler { 1210 | type: "msra" 1211 | } 1212 | bias_filler { 1213 | type: "constant" 1214 | value: 0.0 1215 | } 1216 | } 1217 | } 1218 | layer { 1219 | name: "conv11_mbox_conf_perm" 1220 | type: "Permute" 1221 | bottom: "conv11_mbox_conf" 1222 | top: "conv11_mbox_conf_perm" 1223 | permute_param { 1224 | order: 0 1225 | order: 2 1226 | order: 3 1227 | order: 1 1228 | } 1229 | } 1230 | layer { 1231 | name: "conv11_mbox_conf_flat" 1232 | type: "Flatten" 1233 | bottom: "conv11_mbox_conf_perm" 1234 | top: "conv11_mbox_conf_flat" 1235 | flatten_param { 1236 | axis: 1 1237 | } 1238 | } 1239 | layer { 1240 | name: "conv11_mbox_priorbox" 1241 | type: "PriorBox" 1242 | bottom: "conv11" 1243 | bottom: "data" 1244 | top: "conv11_mbox_priorbox" 1245 | prior_box_param { 1246 | min_size: 60.0 1247 | aspect_ratio: 2.0 1248 | flip: true 1249 | clip: false 1250 | variance: 0.1 1251 | variance: 0.1 1252 | variance: 0.2 1253 | variance: 0.2 1254 | offset: 0.5 1255 | } 1256 | } 1257 | layer { 1258 | name: "conv13_mbox_loc" 1259 | type: "Convolution" 1260 | bottom: "conv13" 1261 | top: "conv13_mbox_loc" 1262 | param { 1263 | lr_mult: 1.0 1264 | decay_mult: 1.0 1265 | } 1266 | param { 1267 | lr_mult: 2.0 1268 | decay_mult: 0.0 1269 | } 1270 | convolution_param { 1271 | num_output: 24 1272 | kernel_size: 1 1273 | weight_filler { 1274 | type: "msra" 1275 | } 1276 | bias_filler { 1277 | type: "constant" 1278 | value: 0.0 1279 | } 1280 | } 1281 | } 1282 | layer { 1283 | name: "conv13_mbox_loc_perm" 1284 | type: "Permute" 1285 | bottom: "conv13_mbox_loc" 1286 | top: "conv13_mbox_loc_perm" 1287 | permute_param { 1288 | order: 0 1289 | order: 2 1290 | order: 3 1291 | order: 1 1292 | } 1293 | } 1294 | layer { 1295 | name: "conv13_mbox_loc_flat" 1296 | type: "Flatten" 1297 | bottom: "conv13_mbox_loc_perm" 1298 | top: "conv13_mbox_loc_flat" 1299 | flatten_param { 1300 | axis: 1 1301 | } 1302 | } 1303 | layer { 1304 | name: "conv13_mbox_conf" 1305 | type: "Convolution" 1306 | bottom: "conv13" 1307 | top: "conv13_mbox_conf" 1308 | param { 1309 | lr_mult: 1.0 1310 | decay_mult: 1.0 1311 | } 1312 | param { 1313 | lr_mult: 2.0 1314 | decay_mult: 0.0 1315 | } 1316 | convolution_param { 1317 | num_output: 126 1318 | kernel_size: 1 1319 | weight_filler { 1320 | type: "msra" 1321 | } 1322 | bias_filler { 1323 | type: "constant" 1324 | value: 0.0 1325 | } 1326 | } 1327 | } 1328 | layer { 1329 | name: "conv13_mbox_conf_perm" 1330 | type: "Permute" 1331 | bottom: "conv13_mbox_conf" 1332 | top: "conv13_mbox_conf_perm" 1333 | permute_param { 1334 | order: 0 1335 | order: 2 1336 | order: 3 1337 | order: 1 1338 | } 1339 | } 1340 | layer { 1341 | name: "conv13_mbox_conf_flat" 1342 | type: "Flatten" 1343 | bottom: "conv13_mbox_conf_perm" 1344 | top: "conv13_mbox_conf_flat" 1345 | flatten_param { 1346 | axis: 1 1347 | } 1348 | } 1349 | layer { 1350 | name: "conv13_mbox_priorbox" 1351 | type: "PriorBox" 1352 | bottom: "conv13" 1353 | bottom: "data" 1354 | top: "conv13_mbox_priorbox" 1355 | prior_box_param { 1356 | min_size: 105.0 1357 | max_size: 150.0 1358 | aspect_ratio: 2.0 1359 | aspect_ratio: 3.0 1360 | flip: true 1361 | clip: false 1362 | variance: 0.1 1363 | variance: 0.1 1364 | variance: 0.2 1365 | variance: 0.2 1366 | offset: 0.5 1367 | } 1368 | } 1369 | layer { 1370 | name: "conv14_2_mbox_loc" 1371 | type: "Convolution" 1372 | bottom: "conv14_2" 1373 | top: "conv14_2_mbox_loc" 1374 | param { 1375 | lr_mult: 1.0 1376 | decay_mult: 1.0 1377 | } 1378 | param { 1379 | lr_mult: 2.0 1380 | decay_mult: 0.0 1381 | } 1382 | convolution_param { 1383 | num_output: 24 1384 | kernel_size: 1 1385 | weight_filler { 1386 | type: "msra" 1387 | } 1388 | bias_filler { 1389 | type: "constant" 1390 | value: 0.0 1391 | } 1392 | } 1393 | } 1394 | layer { 1395 | name: "conv14_2_mbox_loc_perm" 1396 | type: "Permute" 1397 | bottom: "conv14_2_mbox_loc" 1398 | top: "conv14_2_mbox_loc_perm" 1399 | permute_param { 1400 | order: 0 1401 | order: 2 1402 | order: 3 1403 | order: 1 1404 | } 1405 | } 1406 | layer { 1407 | name: "conv14_2_mbox_loc_flat" 1408 | type: "Flatten" 1409 | bottom: "conv14_2_mbox_loc_perm" 1410 | top: "conv14_2_mbox_loc_flat" 1411 | flatten_param { 1412 | axis: 1 1413 | } 1414 | } 1415 | layer { 1416 | name: "conv14_2_mbox_conf" 1417 | type: "Convolution" 1418 | bottom: "conv14_2" 1419 | top: "conv14_2_mbox_conf" 1420 | param { 1421 | lr_mult: 1.0 1422 | decay_mult: 1.0 1423 | } 1424 | param { 1425 | lr_mult: 2.0 1426 | decay_mult: 0.0 1427 | } 1428 | convolution_param { 1429 | num_output: 126 1430 | kernel_size: 1 1431 | weight_filler { 1432 | type: "msra" 1433 | } 1434 | bias_filler { 1435 | type: "constant" 1436 | value: 0.0 1437 | } 1438 | } 1439 | } 1440 | layer { 1441 | name: "conv14_2_mbox_conf_perm" 1442 | type: "Permute" 1443 | bottom: "conv14_2_mbox_conf" 1444 | top: "conv14_2_mbox_conf_perm" 1445 | permute_param { 1446 | order: 0 1447 | order: 2 1448 | order: 3 1449 | order: 1 1450 | } 1451 | } 1452 | layer { 1453 | name: "conv14_2_mbox_conf_flat" 1454 | type: "Flatten" 1455 | bottom: "conv14_2_mbox_conf_perm" 1456 | top: "conv14_2_mbox_conf_flat" 1457 | flatten_param { 1458 | axis: 1 1459 | } 1460 | } 1461 | layer { 1462 | name: "conv14_2_mbox_priorbox" 1463 | type: "PriorBox" 1464 | bottom: "conv14_2" 1465 | bottom: "data" 1466 | top: "conv14_2_mbox_priorbox" 1467 | prior_box_param { 1468 | min_size: 150.0 1469 | max_size: 195.0 1470 | aspect_ratio: 2.0 1471 | aspect_ratio: 3.0 1472 | flip: true 1473 | clip: false 1474 | variance: 0.1 1475 | variance: 0.1 1476 | variance: 0.2 1477 | variance: 0.2 1478 | offset: 0.5 1479 | } 1480 | } 1481 | layer { 1482 | name: "conv15_2_mbox_loc" 1483 | type: "Convolution" 1484 | bottom: "conv15_2" 1485 | top: "conv15_2_mbox_loc" 1486 | param { 1487 | lr_mult: 1.0 1488 | decay_mult: 1.0 1489 | } 1490 | param { 1491 | lr_mult: 2.0 1492 | decay_mult: 0.0 1493 | } 1494 | convolution_param { 1495 | num_output: 24 1496 | kernel_size: 1 1497 | weight_filler { 1498 | type: "msra" 1499 | } 1500 | bias_filler { 1501 | type: "constant" 1502 | value: 0.0 1503 | } 1504 | } 1505 | } 1506 | layer { 1507 | name: "conv15_2_mbox_loc_perm" 1508 | type: "Permute" 1509 | bottom: "conv15_2_mbox_loc" 1510 | top: "conv15_2_mbox_loc_perm" 1511 | permute_param { 1512 | order: 0 1513 | order: 2 1514 | order: 3 1515 | order: 1 1516 | } 1517 | } 1518 | layer { 1519 | name: "conv15_2_mbox_loc_flat" 1520 | type: "Flatten" 1521 | bottom: "conv15_2_mbox_loc_perm" 1522 | top: "conv15_2_mbox_loc_flat" 1523 | flatten_param { 1524 | axis: 1 1525 | } 1526 | } 1527 | layer { 1528 | name: "conv15_2_mbox_conf" 1529 | type: "Convolution" 1530 | bottom: "conv15_2" 1531 | top: "conv15_2_mbox_conf" 1532 | param { 1533 | lr_mult: 1.0 1534 | decay_mult: 1.0 1535 | } 1536 | param { 1537 | lr_mult: 2.0 1538 | decay_mult: 0.0 1539 | } 1540 | convolution_param { 1541 | num_output: 126 1542 | kernel_size: 1 1543 | weight_filler { 1544 | type: "msra" 1545 | } 1546 | bias_filler { 1547 | type: "constant" 1548 | value: 0.0 1549 | } 1550 | } 1551 | } 1552 | layer { 1553 | name: "conv15_2_mbox_conf_perm" 1554 | type: "Permute" 1555 | bottom: "conv15_2_mbox_conf" 1556 | top: "conv15_2_mbox_conf_perm" 1557 | permute_param { 1558 | order: 0 1559 | order: 2 1560 | order: 3 1561 | order: 1 1562 | } 1563 | } 1564 | layer { 1565 | name: "conv15_2_mbox_conf_flat" 1566 | type: "Flatten" 1567 | bottom: "conv15_2_mbox_conf_perm" 1568 | top: "conv15_2_mbox_conf_flat" 1569 | flatten_param { 1570 | axis: 1 1571 | } 1572 | } 1573 | layer { 1574 | name: "conv15_2_mbox_priorbox" 1575 | type: "PriorBox" 1576 | bottom: "conv15_2" 1577 | bottom: "data" 1578 | top: "conv15_2_mbox_priorbox" 1579 | prior_box_param { 1580 | min_size: 195.0 1581 | max_size: 240.0 1582 | aspect_ratio: 2.0 1583 | aspect_ratio: 3.0 1584 | flip: true 1585 | clip: false 1586 | variance: 0.1 1587 | variance: 0.1 1588 | variance: 0.2 1589 | variance: 0.2 1590 | offset: 0.5 1591 | } 1592 | } 1593 | layer { 1594 | name: "conv16_2_mbox_loc" 1595 | type: "Convolution" 1596 | bottom: "conv16_2" 1597 | top: "conv16_2_mbox_loc" 1598 | param { 1599 | lr_mult: 1.0 1600 | decay_mult: 1.0 1601 | } 1602 | param { 1603 | lr_mult: 2.0 1604 | decay_mult: 0.0 1605 | } 1606 | convolution_param { 1607 | num_output: 24 1608 | kernel_size: 1 1609 | weight_filler { 1610 | type: "msra" 1611 | } 1612 | bias_filler { 1613 | type: "constant" 1614 | value: 0.0 1615 | } 1616 | } 1617 | } 1618 | layer { 1619 | name: "conv16_2_mbox_loc_perm" 1620 | type: "Permute" 1621 | bottom: "conv16_2_mbox_loc" 1622 | top: "conv16_2_mbox_loc_perm" 1623 | permute_param { 1624 | order: 0 1625 | order: 2 1626 | order: 3 1627 | order: 1 1628 | } 1629 | } 1630 | layer { 1631 | name: "conv16_2_mbox_loc_flat" 1632 | type: "Flatten" 1633 | bottom: "conv16_2_mbox_loc_perm" 1634 | top: "conv16_2_mbox_loc_flat" 1635 | flatten_param { 1636 | axis: 1 1637 | } 1638 | } 1639 | layer { 1640 | name: "conv16_2_mbox_conf" 1641 | type: "Convolution" 1642 | bottom: "conv16_2" 1643 | top: "conv16_2_mbox_conf" 1644 | param { 1645 | lr_mult: 1.0 1646 | decay_mult: 1.0 1647 | } 1648 | param { 1649 | lr_mult: 2.0 1650 | decay_mult: 0.0 1651 | } 1652 | convolution_param { 1653 | num_output: 126 1654 | kernel_size: 1 1655 | weight_filler { 1656 | type: "msra" 1657 | } 1658 | bias_filler { 1659 | type: "constant" 1660 | value: 0.0 1661 | } 1662 | } 1663 | } 1664 | layer { 1665 | name: "conv16_2_mbox_conf_perm" 1666 | type: "Permute" 1667 | bottom: "conv16_2_mbox_conf" 1668 | top: "conv16_2_mbox_conf_perm" 1669 | permute_param { 1670 | order: 0 1671 | order: 2 1672 | order: 3 1673 | order: 1 1674 | } 1675 | } 1676 | layer { 1677 | name: "conv16_2_mbox_conf_flat" 1678 | type: "Flatten" 1679 | bottom: "conv16_2_mbox_conf_perm" 1680 | top: "conv16_2_mbox_conf_flat" 1681 | flatten_param { 1682 | axis: 1 1683 | } 1684 | } 1685 | layer { 1686 | name: "conv16_2_mbox_priorbox" 1687 | type: "PriorBox" 1688 | bottom: "conv16_2" 1689 | bottom: "data" 1690 | top: "conv16_2_mbox_priorbox" 1691 | prior_box_param { 1692 | min_size: 240.0 1693 | max_size: 285.0 1694 | aspect_ratio: 2.0 1695 | aspect_ratio: 3.0 1696 | flip: true 1697 | clip: false 1698 | variance: 0.1 1699 | variance: 0.1 1700 | variance: 0.2 1701 | variance: 0.2 1702 | offset: 0.5 1703 | } 1704 | } 1705 | layer { 1706 | name: "conv17_2_mbox_loc" 1707 | type: "Convolution" 1708 | bottom: "conv17_2" 1709 | top: "conv17_2_mbox_loc" 1710 | param { 1711 | lr_mult: 1.0 1712 | decay_mult: 1.0 1713 | } 1714 | param { 1715 | lr_mult: 2.0 1716 | decay_mult: 0.0 1717 | } 1718 | convolution_param { 1719 | num_output: 24 1720 | kernel_size: 1 1721 | weight_filler { 1722 | type: "msra" 1723 | } 1724 | bias_filler { 1725 | type: "constant" 1726 | value: 0.0 1727 | } 1728 | } 1729 | } 1730 | layer { 1731 | name: "conv17_2_mbox_loc_perm" 1732 | type: "Permute" 1733 | bottom: "conv17_2_mbox_loc" 1734 | top: "conv17_2_mbox_loc_perm" 1735 | permute_param { 1736 | order: 0 1737 | order: 2 1738 | order: 3 1739 | order: 1 1740 | } 1741 | } 1742 | layer { 1743 | name: "conv17_2_mbox_loc_flat" 1744 | type: "Flatten" 1745 | bottom: "conv17_2_mbox_loc_perm" 1746 | top: "conv17_2_mbox_loc_flat" 1747 | flatten_param { 1748 | axis: 1 1749 | } 1750 | } 1751 | layer { 1752 | name: "conv17_2_mbox_conf" 1753 | type: "Convolution" 1754 | bottom: "conv17_2" 1755 | top: "conv17_2_mbox_conf" 1756 | param { 1757 | lr_mult: 1.0 1758 | decay_mult: 1.0 1759 | } 1760 | param { 1761 | lr_mult: 2.0 1762 | decay_mult: 0.0 1763 | } 1764 | convolution_param { 1765 | num_output: 126 1766 | kernel_size: 1 1767 | weight_filler { 1768 | type: "msra" 1769 | } 1770 | bias_filler { 1771 | type: "constant" 1772 | value: 0.0 1773 | } 1774 | } 1775 | } 1776 | layer { 1777 | name: "conv17_2_mbox_conf_perm" 1778 | type: "Permute" 1779 | bottom: "conv17_2_mbox_conf" 1780 | top: "conv17_2_mbox_conf_perm" 1781 | permute_param { 1782 | order: 0 1783 | order: 2 1784 | order: 3 1785 | order: 1 1786 | } 1787 | } 1788 | layer { 1789 | name: "conv17_2_mbox_conf_flat" 1790 | type: "Flatten" 1791 | bottom: "conv17_2_mbox_conf_perm" 1792 | top: "conv17_2_mbox_conf_flat" 1793 | flatten_param { 1794 | axis: 1 1795 | } 1796 | } 1797 | layer { 1798 | name: "conv17_2_mbox_priorbox" 1799 | type: "PriorBox" 1800 | bottom: "conv17_2" 1801 | bottom: "data" 1802 | top: "conv17_2_mbox_priorbox" 1803 | prior_box_param { 1804 | min_size: 285.0 1805 | max_size: 300.0 1806 | aspect_ratio: 2.0 1807 | aspect_ratio: 3.0 1808 | flip: true 1809 | clip: false 1810 | variance: 0.1 1811 | variance: 0.1 1812 | variance: 0.2 1813 | variance: 0.2 1814 | offset: 0.5 1815 | } 1816 | } 1817 | layer { 1818 | name: "mbox_loc" 1819 | type: "Concat" 1820 | bottom: "conv11_mbox_loc_flat" 1821 | bottom: "conv13_mbox_loc_flat" 1822 | bottom: "conv14_2_mbox_loc_flat" 1823 | bottom: "conv15_2_mbox_loc_flat" 1824 | bottom: "conv16_2_mbox_loc_flat" 1825 | bottom: "conv17_2_mbox_loc_flat" 1826 | top: "mbox_loc" 1827 | concat_param { 1828 | axis: 1 1829 | } 1830 | } 1831 | layer { 1832 | name: "mbox_conf" 1833 | type: "Concat" 1834 | bottom: "conv11_mbox_conf_flat" 1835 | bottom: "conv13_mbox_conf_flat" 1836 | bottom: "conv14_2_mbox_conf_flat" 1837 | bottom: "conv15_2_mbox_conf_flat" 1838 | bottom: "conv16_2_mbox_conf_flat" 1839 | bottom: "conv17_2_mbox_conf_flat" 1840 | top: "mbox_conf" 1841 | concat_param { 1842 | axis: 1 1843 | } 1844 | } 1845 | layer { 1846 | name: "mbox_priorbox" 1847 | type: "Concat" 1848 | bottom: "conv11_mbox_priorbox" 1849 | bottom: "conv13_mbox_priorbox" 1850 | bottom: "conv14_2_mbox_priorbox" 1851 | bottom: "conv15_2_mbox_priorbox" 1852 | bottom: "conv16_2_mbox_priorbox" 1853 | bottom: "conv17_2_mbox_priorbox" 1854 | top: "mbox_priorbox" 1855 | concat_param { 1856 | axis: 2 1857 | } 1858 | } 1859 | layer { 1860 | name: "mbox_conf_reshape" 1861 | type: "Reshape" 1862 | bottom: "mbox_conf" 1863 | top: "mbox_conf_reshape" 1864 | reshape_param { 1865 | shape { 1866 | dim: 0 1867 | dim: -1 1868 | dim: 21 1869 | } 1870 | } 1871 | } 1872 | layer { 1873 | name: "mbox_conf_softmax" 1874 | type: "Softmax" 1875 | bottom: "mbox_conf_reshape" 1876 | top: "mbox_conf_softmax" 1877 | softmax_param { 1878 | axis: 2 1879 | } 1880 | } 1881 | layer { 1882 | name: "mbox_conf_flatten" 1883 | type: "Flatten" 1884 | bottom: "mbox_conf_softmax" 1885 | top: "mbox_conf_flatten" 1886 | flatten_param { 1887 | axis: 1 1888 | } 1889 | } 1890 | layer { 1891 | name: "detection_out" 1892 | type: "DetectionOutput" 1893 | bottom: "mbox_loc" 1894 | bottom: "mbox_conf_flatten" 1895 | bottom: "mbox_priorbox" 1896 | top: "detection_out" 1897 | include { 1898 | phase: TEST 1899 | } 1900 | detection_output_param { 1901 | num_classes: 21 1902 | share_location: true 1903 | background_label_id: 0 1904 | nms_param { 1905 | nms_threshold: 0.45 1906 | top_k: 100 1907 | } 1908 | code_type: CENTER_SIZE 1909 | keep_top_k: 100 1910 | confidence_threshold: 0.25 1911 | } 1912 | } 1913 | -------------------------------------------------------------------------------- /SSD/MobileNet/MobileNetSSD_deploy_custom.prototxt: -------------------------------------------------------------------------------- 1 | name: "MobileNet-SSD" 2 | input: "data" 3 | input_shape { 4 | dim: 1 5 | dim: 3 6 | dim: 300 7 | dim: 300 8 | } 9 | layer { 10 | name: "conv0" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv0" 14 | param { 15 | lr_mult: 1.0 16 | decay_mult: 1.0 17 | } 18 | convolution_param { 19 | num_output: 32 20 | bias_term: false 21 | pad: 1 22 | kernel_size: 3 23 | stride: 2 24 | weight_filler { 25 | type: "msra" 26 | } 27 | bias_filler { 28 | type: "constant" 29 | value: 0.0 30 | } 31 | } 32 | } 33 | layer { 34 | name: "conv0/relu" 35 | type: "ReLU" 36 | bottom: "conv0" 37 | top: "conv0" 38 | } 39 | layer { 40 | name: "conv1/dw" 41 | type: "Convolution" 42 | bottom: "conv0" 43 | top: "conv1/dw" 44 | param { 45 | lr_mult: 1.0 46 | decay_mult: 1.0 47 | } 48 | convolution_param { 49 | num_output: 32 50 | bias_term: false 51 | pad: 1 52 | kernel_size: 3 53 | group: 32 54 | engine: CAFFE 55 | weight_filler { 56 | type: "msra" 57 | } 58 | bias_filler { 59 | type: "constant" 60 | value: 0.0 61 | } 62 | } 63 | } 64 | layer { 65 | name: "conv1/dw/relu" 66 | type: "ReLU" 67 | bottom: "conv1/dw" 68 | top: "conv1/dw" 69 | } 70 | layer { 71 | name: "conv1" 72 | type: "Convolution" 73 | bottom: "conv1/dw" 74 | top: "conv1" 75 | param { 76 | lr_mult: 1.0 77 | decay_mult: 1.0 78 | } 79 | convolution_param { 80 | num_output: 64 81 | bias_term: false 82 | kernel_size: 1 83 | weight_filler { 84 | type: "msra" 85 | } 86 | bias_filler { 87 | type: "constant" 88 | value: 0.0 89 | } 90 | } 91 | } 92 | layer { 93 | name: "conv1/relu" 94 | type: "ReLU" 95 | bottom: "conv1" 96 | top: "conv1" 97 | } 98 | layer { 99 | name: "conv2/dw" 100 | type: "Convolution" 101 | bottom: "conv1" 102 | top: "conv2/dw" 103 | param { 104 | lr_mult: 1.0 105 | decay_mult: 1.0 106 | } 107 | convolution_param { 108 | num_output: 64 109 | bias_term: false 110 | pad: 1 111 | kernel_size: 3 112 | stride: 2 113 | group: 64 114 | engine: CAFFE 115 | weight_filler { 116 | type: "msra" 117 | } 118 | bias_filler { 119 | type: "constant" 120 | value: 0.0 121 | } 122 | } 123 | } 124 | layer { 125 | name: "conv2/dw/relu" 126 | type: "ReLU" 127 | bottom: "conv2/dw" 128 | top: "conv2/dw" 129 | } 130 | layer { 131 | name: "conv2" 132 | type: "Convolution" 133 | bottom: "conv2/dw" 134 | top: "conv2" 135 | param { 136 | lr_mult: 1.0 137 | decay_mult: 1.0 138 | } 139 | convolution_param { 140 | num_output: 128 141 | bias_term: false 142 | kernel_size: 1 143 | weight_filler { 144 | type: "msra" 145 | } 146 | bias_filler { 147 | type: "constant" 148 | value: 0.0 149 | } 150 | } 151 | } 152 | layer { 153 | name: "conv2/relu" 154 | type: "ReLU" 155 | bottom: "conv2" 156 | top: "conv2" 157 | } 158 | layer { 159 | name: "conv3/dw" 160 | type: "Convolution" 161 | bottom: "conv2" 162 | top: "conv3/dw" 163 | param { 164 | lr_mult: 1.0 165 | decay_mult: 1.0 166 | } 167 | convolution_param { 168 | num_output: 128 169 | bias_term: false 170 | pad: 1 171 | kernel_size: 3 172 | group: 128 173 | engine: CAFFE 174 | weight_filler { 175 | type: "msra" 176 | } 177 | bias_filler { 178 | type: "constant" 179 | value: 0.0 180 | } 181 | } 182 | } 183 | layer { 184 | name: "conv3/dw/relu" 185 | type: "ReLU" 186 | bottom: "conv3/dw" 187 | top: "conv3/dw" 188 | } 189 | layer { 190 | name: "conv3" 191 | type: "Convolution" 192 | bottom: "conv3/dw" 193 | top: "conv3" 194 | param { 195 | lr_mult: 1.0 196 | decay_mult: 1.0 197 | } 198 | convolution_param { 199 | num_output: 128 200 | bias_term: false 201 | kernel_size: 1 202 | weight_filler { 203 | type: "msra" 204 | } 205 | bias_filler { 206 | type: "constant" 207 | value: 0.0 208 | } 209 | } 210 | } 211 | layer { 212 | name: "conv3/relu" 213 | type: "ReLU" 214 | bottom: "conv3" 215 | top: "conv3" 216 | } 217 | layer { 218 | name: "conv4/dw" 219 | type: "Convolution" 220 | bottom: "conv3" 221 | top: "conv4/dw" 222 | param { 223 | lr_mult: 1.0 224 | decay_mult: 1.0 225 | } 226 | convolution_param { 227 | num_output: 128 228 | bias_term: false 229 | pad: 1 230 | kernel_size: 3 231 | stride: 2 232 | group: 128 233 | engine: CAFFE 234 | weight_filler { 235 | type: "msra" 236 | } 237 | bias_filler { 238 | type: "constant" 239 | value: 0.0 240 | } 241 | } 242 | } 243 | layer { 244 | name: "conv4/dw/relu" 245 | type: "ReLU" 246 | bottom: "conv4/dw" 247 | top: "conv4/dw" 248 | } 249 | layer { 250 | name: "conv4" 251 | type: "Convolution" 252 | bottom: "conv4/dw" 253 | top: "conv4" 254 | param { 255 | lr_mult: 1.0 256 | decay_mult: 1.0 257 | } 258 | convolution_param { 259 | num_output: 256 260 | bias_term: false 261 | kernel_size: 1 262 | weight_filler { 263 | type: "msra" 264 | } 265 | bias_filler { 266 | type: "constant" 267 | value: 0.0 268 | } 269 | } 270 | } 271 | layer { 272 | name: "conv4/relu" 273 | type: "ReLU" 274 | bottom: "conv4" 275 | top: "conv4" 276 | } 277 | layer { 278 | name: "conv5/dw" 279 | type: "Convolution" 280 | bottom: "conv4" 281 | top: "conv5/dw" 282 | param { 283 | lr_mult: 1.0 284 | decay_mult: 1.0 285 | } 286 | convolution_param { 287 | num_output: 256 288 | bias_term: false 289 | pad: 1 290 | kernel_size: 3 291 | group: 256 292 | engine: CAFFE 293 | weight_filler { 294 | type: "msra" 295 | } 296 | bias_filler { 297 | type: "constant" 298 | value: 0.0 299 | } 300 | } 301 | } 302 | layer { 303 | name: "conv5/dw/relu" 304 | type: "ReLU" 305 | bottom: "conv5/dw" 306 | top: "conv5/dw" 307 | } 308 | layer { 309 | name: "conv5" 310 | type: "Convolution" 311 | bottom: "conv5/dw" 312 | top: "conv5" 313 | param { 314 | lr_mult: 1.0 315 | decay_mult: 1.0 316 | } 317 | convolution_param { 318 | num_output: 256 319 | bias_term: false 320 | kernel_size: 1 321 | weight_filler { 322 | type: "msra" 323 | } 324 | bias_filler { 325 | type: "constant" 326 | value: 0.0 327 | } 328 | } 329 | } 330 | layer { 331 | name: "conv5/relu" 332 | type: "ReLU" 333 | bottom: "conv5" 334 | top: "conv5" 335 | } 336 | layer { 337 | name: "conv6/dw" 338 | type: "Convolution" 339 | bottom: "conv5" 340 | top: "conv6/dw" 341 | param { 342 | lr_mult: 1.0 343 | decay_mult: 1.0 344 | } 345 | convolution_param { 346 | num_output: 256 347 | bias_term: false 348 | pad: 1 349 | kernel_size: 3 350 | stride: 2 351 | group: 256 352 | engine: CAFFE 353 | weight_filler { 354 | type: "msra" 355 | } 356 | bias_filler { 357 | type: "constant" 358 | value: 0.0 359 | } 360 | } 361 | } 362 | layer { 363 | name: "conv6/dw/relu" 364 | type: "ReLU" 365 | bottom: "conv6/dw" 366 | top: "conv6/dw" 367 | } 368 | layer { 369 | name: "conv6" 370 | type: "Convolution" 371 | bottom: "conv6/dw" 372 | top: "conv6" 373 | param { 374 | lr_mult: 1.0 375 | decay_mult: 1.0 376 | } 377 | convolution_param { 378 | num_output: 512 379 | bias_term: false 380 | kernel_size: 1 381 | weight_filler { 382 | type: "msra" 383 | } 384 | bias_filler { 385 | type: "constant" 386 | value: 0.0 387 | } 388 | } 389 | } 390 | layer { 391 | name: "conv6/relu" 392 | type: "ReLU" 393 | bottom: "conv6" 394 | top: "conv6" 395 | } 396 | layer { 397 | name: "conv7/dw" 398 | type: "Convolution" 399 | bottom: "conv6" 400 | top: "conv7/dw" 401 | param { 402 | lr_mult: 1.0 403 | decay_mult: 1.0 404 | } 405 | convolution_param { 406 | num_output: 512 407 | bias_term: false 408 | pad: 1 409 | kernel_size: 3 410 | group: 512 411 | engine: CAFFE 412 | weight_filler { 413 | type: "msra" 414 | } 415 | bias_filler { 416 | type: "constant" 417 | value: 0.0 418 | } 419 | } 420 | } 421 | layer { 422 | name: "conv7/dw/relu" 423 | type: "ReLU" 424 | bottom: "conv7/dw" 425 | top: "conv7/dw" 426 | } 427 | layer { 428 | name: "conv7" 429 | type: "Convolution" 430 | bottom: "conv7/dw" 431 | top: "conv7" 432 | param { 433 | lr_mult: 1.0 434 | decay_mult: 1.0 435 | } 436 | convolution_param { 437 | num_output: 512 438 | bias_term: false 439 | kernel_size: 1 440 | weight_filler { 441 | type: "msra" 442 | } 443 | bias_filler { 444 | type: "constant" 445 | value: 0.0 446 | } 447 | } 448 | } 449 | layer { 450 | name: "conv7/relu" 451 | type: "ReLU" 452 | bottom: "conv7" 453 | top: "conv7" 454 | } 455 | layer { 456 | name: "conv8/dw" 457 | type: "Convolution" 458 | bottom: "conv7" 459 | top: "conv8/dw" 460 | param { 461 | lr_mult: 1.0 462 | decay_mult: 1.0 463 | } 464 | convolution_param { 465 | num_output: 512 466 | bias_term: false 467 | pad: 1 468 | kernel_size: 3 469 | group: 512 470 | engine: CAFFE 471 | weight_filler { 472 | type: "msra" 473 | } 474 | bias_filler { 475 | type: "constant" 476 | value: 0.0 477 | } 478 | } 479 | } 480 | layer { 481 | name: "conv8/dw/relu" 482 | type: "ReLU" 483 | bottom: "conv8/dw" 484 | top: "conv8/dw" 485 | } 486 | layer { 487 | name: "conv8" 488 | type: "Convolution" 489 | bottom: "conv8/dw" 490 | top: "conv8" 491 | param { 492 | lr_mult: 1.0 493 | decay_mult: 1.0 494 | } 495 | convolution_param { 496 | num_output: 512 497 | bias_term: false 498 | kernel_size: 1 499 | weight_filler { 500 | type: "msra" 501 | } 502 | bias_filler { 503 | type: "constant" 504 | value: 0.0 505 | } 506 | } 507 | } 508 | layer { 509 | name: "conv8/relu" 510 | type: "ReLU" 511 | bottom: "conv8" 512 | top: "conv8" 513 | } 514 | layer { 515 | name: "conv9/dw" 516 | type: "Convolution" 517 | bottom: "conv8" 518 | top: "conv9/dw" 519 | param { 520 | lr_mult: 1.0 521 | decay_mult: 1.0 522 | } 523 | convolution_param { 524 | num_output: 512 525 | bias_term: false 526 | pad: 1 527 | kernel_size: 3 528 | group: 512 529 | engine: CAFFE 530 | weight_filler { 531 | type: "msra" 532 | } 533 | bias_filler { 534 | type: "constant" 535 | value: 0.0 536 | } 537 | } 538 | } 539 | layer { 540 | name: "conv9/dw/relu" 541 | type: "ReLU" 542 | bottom: "conv9/dw" 543 | top: "conv9/dw" 544 | } 545 | layer { 546 | name: "conv9" 547 | type: "Convolution" 548 | bottom: "conv9/dw" 549 | top: "conv9" 550 | param { 551 | lr_mult: 1.0 552 | decay_mult: 1.0 553 | } 554 | convolution_param { 555 | num_output: 512 556 | bias_term: false 557 | kernel_size: 1 558 | weight_filler { 559 | type: "msra" 560 | } 561 | bias_filler { 562 | type: "constant" 563 | value: 0.0 564 | } 565 | } 566 | } 567 | layer { 568 | name: "conv9/relu" 569 | type: "ReLU" 570 | bottom: "conv9" 571 | top: "conv9" 572 | } 573 | layer { 574 | name: "conv10/dw" 575 | type: "Convolution" 576 | bottom: "conv9" 577 | top: "conv10/dw" 578 | param { 579 | lr_mult: 1.0 580 | decay_mult: 1.0 581 | } 582 | convolution_param { 583 | num_output: 512 584 | bias_term: false 585 | pad: 1 586 | kernel_size: 3 587 | group: 512 588 | engine: CAFFE 589 | weight_filler { 590 | type: "msra" 591 | } 592 | bias_filler { 593 | type: "constant" 594 | value: 0.0 595 | } 596 | } 597 | } 598 | layer { 599 | name: "conv10/dw/relu" 600 | type: "ReLU" 601 | bottom: "conv10/dw" 602 | top: "conv10/dw" 603 | } 604 | layer { 605 | name: "conv10" 606 | type: "Convolution" 607 | bottom: "conv10/dw" 608 | top: "conv10" 609 | param { 610 | lr_mult: 1.0 611 | decay_mult: 1.0 612 | } 613 | convolution_param { 614 | num_output: 512 615 | bias_term: false 616 | kernel_size: 1 617 | weight_filler { 618 | type: "msra" 619 | } 620 | bias_filler { 621 | type: "constant" 622 | value: 0.0 623 | } 624 | } 625 | } 626 | layer { 627 | name: "conv10/relu" 628 | type: "ReLU" 629 | bottom: "conv10" 630 | top: "conv10" 631 | } 632 | layer { 633 | name: "conv11/dw" 634 | type: "Convolution" 635 | bottom: "conv10" 636 | top: "conv11/dw" 637 | param { 638 | lr_mult: 1.0 639 | decay_mult: 1.0 640 | } 641 | convolution_param { 642 | num_output: 512 643 | bias_term: false 644 | pad: 1 645 | kernel_size: 3 646 | group: 512 647 | engine: CAFFE 648 | weight_filler { 649 | type: "msra" 650 | } 651 | bias_filler { 652 | type: "constant" 653 | value: 0.0 654 | } 655 | } 656 | } 657 | layer { 658 | name: "conv11/dw/relu" 659 | type: "ReLU" 660 | bottom: "conv11/dw" 661 | top: "conv11/dw" 662 | } 663 | layer { 664 | name: "conv11" 665 | type: "Convolution" 666 | bottom: "conv11/dw" 667 | top: "conv11" 668 | param { 669 | lr_mult: 1.0 670 | decay_mult: 1.0 671 | } 672 | convolution_param { 673 | num_output: 512 674 | bias_term: false 675 | kernel_size: 1 676 | weight_filler { 677 | type: "msra" 678 | } 679 | bias_filler { 680 | type: "constant" 681 | value: 0.0 682 | } 683 | } 684 | } 685 | layer { 686 | name: "conv11/relu" 687 | type: "ReLU" 688 | bottom: "conv11" 689 | top: "conv11" 690 | } 691 | layer { 692 | name: "conv12/dw" 693 | type: "Convolution" 694 | bottom: "conv11" 695 | top: "conv12/dw" 696 | param { 697 | lr_mult: 1.0 698 | decay_mult: 1.0 699 | } 700 | convolution_param { 701 | num_output: 512 702 | bias_term: false 703 | pad: 1 704 | kernel_size: 3 705 | stride: 2 706 | group: 512 707 | engine: CAFFE 708 | weight_filler { 709 | type: "msra" 710 | } 711 | bias_filler { 712 | type: "constant" 713 | value: 0.0 714 | } 715 | } 716 | } 717 | layer { 718 | name: "conv12/dw/relu" 719 | type: "ReLU" 720 | bottom: "conv12/dw" 721 | top: "conv12/dw" 722 | } 723 | layer { 724 | name: "conv12" 725 | type: "Convolution" 726 | bottom: "conv12/dw" 727 | top: "conv12" 728 | param { 729 | lr_mult: 1.0 730 | decay_mult: 1.0 731 | } 732 | convolution_param { 733 | num_output: 1024 734 | bias_term: false 735 | kernel_size: 1 736 | weight_filler { 737 | type: "msra" 738 | } 739 | bias_filler { 740 | type: "constant" 741 | value: 0.0 742 | } 743 | } 744 | } 745 | layer { 746 | name: "conv12/relu" 747 | type: "ReLU" 748 | bottom: "conv12" 749 | top: "conv12" 750 | } 751 | layer { 752 | name: "conv13/dw" 753 | type: "Convolution" 754 | bottom: "conv12" 755 | top: "conv13/dw" 756 | param { 757 | lr_mult: 1.0 758 | decay_mult: 1.0 759 | } 760 | convolution_param { 761 | num_output: 1024 762 | bias_term: false 763 | pad: 1 764 | kernel_size: 3 765 | group: 1024 766 | engine: CAFFE 767 | weight_filler { 768 | type: "msra" 769 | } 770 | bias_filler { 771 | type: "constant" 772 | value: 0.0 773 | } 774 | } 775 | } 776 | layer { 777 | name: "conv13/dw/relu" 778 | type: "ReLU" 779 | bottom: "conv13/dw" 780 | top: "conv13/dw" 781 | } 782 | layer { 783 | name: "conv13" 784 | type: "Convolution" 785 | bottom: "conv13/dw" 786 | top: "conv13" 787 | param { 788 | lr_mult: 1.0 789 | decay_mult: 1.0 790 | } 791 | convolution_param { 792 | num_output: 1024 793 | bias_term: false 794 | kernel_size: 1 795 | weight_filler { 796 | type: "msra" 797 | } 798 | bias_filler { 799 | type: "constant" 800 | value: 0.0 801 | } 802 | } 803 | } 804 | layer { 805 | name: "conv13/relu" 806 | type: "ReLU" 807 | bottom: "conv13" 808 | top: "conv13" 809 | } 810 | layer { 811 | name: "conv14_1" 812 | type: "Convolution" 813 | bottom: "conv13" 814 | top: "conv14_1" 815 | param { 816 | lr_mult: 1.0 817 | decay_mult: 1.0 818 | } 819 | convolution_param { 820 | num_output: 256 821 | bias_term: false 822 | kernel_size: 1 823 | weight_filler { 824 | type: "msra" 825 | } 826 | bias_filler { 827 | type: "constant" 828 | value: 0.0 829 | } 830 | } 831 | } 832 | layer { 833 | name: "conv14_1/relu" 834 | type: "ReLU" 835 | bottom: "conv14_1" 836 | top: "conv14_1" 837 | } 838 | layer { 839 | name: "conv14_2" 840 | type: "Convolution" 841 | bottom: "conv14_1" 842 | top: "conv14_2" 843 | param { 844 | lr_mult: 1.0 845 | decay_mult: 1.0 846 | } 847 | convolution_param { 848 | num_output: 512 849 | bias_term: false 850 | pad: 1 851 | kernel_size: 3 852 | stride: 2 853 | weight_filler { 854 | type: "msra" 855 | } 856 | bias_filler { 857 | type: "constant" 858 | value: 0.0 859 | } 860 | } 861 | } 862 | layer { 863 | name: "conv14_2/relu" 864 | type: "ReLU" 865 | bottom: "conv14_2" 866 | top: "conv14_2" 867 | } 868 | layer { 869 | name: "conv15_1" 870 | type: "Convolution" 871 | bottom: "conv14_2" 872 | top: "conv15_1" 873 | param { 874 | lr_mult: 1.0 875 | decay_mult: 1.0 876 | } 877 | convolution_param { 878 | num_output: 128 879 | bias_term: false 880 | kernel_size: 1 881 | weight_filler { 882 | type: "msra" 883 | } 884 | bias_filler { 885 | type: "constant" 886 | value: 0.0 887 | } 888 | } 889 | } 890 | layer { 891 | name: "conv15_1/relu" 892 | type: "ReLU" 893 | bottom: "conv15_1" 894 | top: "conv15_1" 895 | } 896 | layer { 897 | name: "conv15_2" 898 | type: "Convolution" 899 | bottom: "conv15_1" 900 | top: "conv15_2" 901 | param { 902 | lr_mult: 1.0 903 | decay_mult: 1.0 904 | } 905 | convolution_param { 906 | num_output: 256 907 | bias_term: false 908 | pad: 1 909 | kernel_size: 3 910 | stride: 2 911 | weight_filler { 912 | type: "msra" 913 | } 914 | bias_filler { 915 | type: "constant" 916 | value: 0.0 917 | } 918 | } 919 | } 920 | layer { 921 | name: "conv15_2/relu" 922 | type: "ReLU" 923 | bottom: "conv15_2" 924 | top: "conv15_2" 925 | } 926 | layer { 927 | name: "conv16_1" 928 | type: "Convolution" 929 | bottom: "conv15_2" 930 | top: "conv16_1" 931 | param { 932 | lr_mult: 1.0 933 | decay_mult: 1.0 934 | } 935 | convolution_param { 936 | num_output: 128 937 | bias_term: false 938 | kernel_size: 1 939 | weight_filler { 940 | type: "msra" 941 | } 942 | bias_filler { 943 | type: "constant" 944 | value: 0.0 945 | } 946 | } 947 | } 948 | layer { 949 | name: "conv16_1/relu" 950 | type: "ReLU" 951 | bottom: "conv16_1" 952 | top: "conv16_1" 953 | } 954 | layer { 955 | name: "conv16_2" 956 | type: "Convolution" 957 | bottom: "conv16_1" 958 | top: "conv16_2" 959 | param { 960 | lr_mult: 1.0 961 | decay_mult: 1.0 962 | } 963 | convolution_param { 964 | num_output: 256 965 | bias_term: false 966 | pad: 1 967 | kernel_size: 3 968 | stride: 2 969 | weight_filler { 970 | type: "msra" 971 | } 972 | bias_filler { 973 | type: "constant" 974 | value: 0.0 975 | } 976 | } 977 | } 978 | layer { 979 | name: "conv16_2/relu" 980 | type: "ReLU" 981 | bottom: "conv16_2" 982 | top: "conv16_2" 983 | } 984 | layer { 985 | name: "conv17_1" 986 | type: "Convolution" 987 | bottom: "conv16_2" 988 | top: "conv17_1" 989 | param { 990 | lr_mult: 1.0 991 | decay_mult: 1.0 992 | } 993 | convolution_param { 994 | num_output: 64 995 | bias_term: false 996 | kernel_size: 1 997 | weight_filler { 998 | type: "msra" 999 | } 1000 | bias_filler { 1001 | type: "constant" 1002 | value: 0.0 1003 | } 1004 | } 1005 | } 1006 | layer { 1007 | name: "conv17_1/relu" 1008 | type: "ReLU" 1009 | bottom: "conv17_1" 1010 | top: "conv17_1" 1011 | } 1012 | layer { 1013 | name: "conv17_2" 1014 | type: "Convolution" 1015 | bottom: "conv17_1" 1016 | top: "conv17_2" 1017 | param { 1018 | lr_mult: 1.0 1019 | decay_mult: 1.0 1020 | } 1021 | convolution_param { 1022 | num_output: 128 1023 | bias_term: false 1024 | pad: 1 1025 | kernel_size: 3 1026 | stride: 2 1027 | weight_filler { 1028 | type: "msra" 1029 | } 1030 | bias_filler { 1031 | type: "constant" 1032 | value: 0.0 1033 | } 1034 | } 1035 | } 1036 | layer { 1037 | name: "conv17_2/relu" 1038 | type: "ReLU" 1039 | bottom: "conv17_2" 1040 | top: "conv17_2" 1041 | } 1042 | layer { 1043 | name: "conv11_mbox_loc" 1044 | type: "Convolution" 1045 | bottom: "conv11" 1046 | top: "conv11_mbox_loc" 1047 | param { 1048 | lr_mult: 1.0 1049 | decay_mult: 1.0 1050 | } 1051 | param { 1052 | lr_mult: 2.0 1053 | decay_mult: 0.0 1054 | } 1055 | convolution_param { 1056 | num_output: 12 1057 | kernel_size: 1 1058 | weight_filler { 1059 | type: "msra" 1060 | } 1061 | bias_filler { 1062 | type: "constant" 1063 | value: 0.0 1064 | } 1065 | } 1066 | } 1067 | layer { 1068 | name: "conv11_mbox_loc_perm" 1069 | type: "Permute" 1070 | bottom: "conv11_mbox_loc" 1071 | top: "conv11_mbox_loc_perm" 1072 | permute_param { 1073 | order: 0 1074 | order: 2 1075 | order: 3 1076 | order: 1 1077 | } 1078 | } 1079 | layer { 1080 | name: "conv11_mbox_loc_flat" 1081 | type: "Flatten" 1082 | bottom: "conv11_mbox_loc_perm" 1083 | top: "conv11_mbox_loc_flat" 1084 | flatten_param { 1085 | axis: 1 1086 | } 1087 | } 1088 | layer { 1089 | name: "conv11_mbox_conf_new" 1090 | type: "Convolution" 1091 | bottom: "conv11" 1092 | top: "conv11_mbox_conf" 1093 | param { 1094 | lr_mult: 1.0 1095 | decay_mult: 1.0 1096 | } 1097 | param { 1098 | lr_mult: 2.0 1099 | decay_mult: 0.0 1100 | } 1101 | convolution_param { 1102 | num_output: 18 1103 | kernel_size: 1 1104 | weight_filler { 1105 | type: "msra" 1106 | } 1107 | bias_filler { 1108 | type: "constant" 1109 | value: 0.0 1110 | } 1111 | } 1112 | } 1113 | layer { 1114 | name: "conv11_mbox_conf_perm" 1115 | type: "Permute" 1116 | bottom: "conv11_mbox_conf" 1117 | top: "conv11_mbox_conf_perm" 1118 | permute_param { 1119 | order: 0 1120 | order: 2 1121 | order: 3 1122 | order: 1 1123 | } 1124 | } 1125 | layer { 1126 | name: "conv11_mbox_conf_flat" 1127 | type: "Flatten" 1128 | bottom: "conv11_mbox_conf_perm" 1129 | top: "conv11_mbox_conf_flat" 1130 | flatten_param { 1131 | axis: 1 1132 | } 1133 | } 1134 | layer { 1135 | name: "conv11_mbox_priorbox" 1136 | type: "PriorBox" 1137 | bottom: "conv11" 1138 | bottom: "data" 1139 | top: "conv11_mbox_priorbox" 1140 | prior_box_param { 1141 | min_size: 30.0 1142 | aspect_ratio: 2.0 1143 | flip: true 1144 | clip: false 1145 | variance: 0.1 1146 | variance: 0.1 1147 | variance: 0.2 1148 | variance: 0.2 1149 | offset: 0.5 1150 | } 1151 | } 1152 | layer { 1153 | name: "conv13_mbox_loc" 1154 | type: "Convolution" 1155 | bottom: "conv13" 1156 | top: "conv13_mbox_loc" 1157 | param { 1158 | lr_mult: 1.0 1159 | decay_mult: 1.0 1160 | } 1161 | param { 1162 | lr_mult: 2.0 1163 | decay_mult: 0.0 1164 | } 1165 | convolution_param { 1166 | num_output: 24 1167 | kernel_size: 1 1168 | weight_filler { 1169 | type: "msra" 1170 | } 1171 | bias_filler { 1172 | type: "constant" 1173 | value: 0.0 1174 | } 1175 | } 1176 | } 1177 | layer { 1178 | name: "conv13_mbox_loc_perm" 1179 | type: "Permute" 1180 | bottom: "conv13_mbox_loc" 1181 | top: "conv13_mbox_loc_perm" 1182 | permute_param { 1183 | order: 0 1184 | order: 2 1185 | order: 3 1186 | order: 1 1187 | } 1188 | } 1189 | layer { 1190 | name: "conv13_mbox_loc_flat" 1191 | type: "Flatten" 1192 | bottom: "conv13_mbox_loc_perm" 1193 | top: "conv13_mbox_loc_flat" 1194 | flatten_param { 1195 | axis: 1 1196 | } 1197 | } 1198 | layer { 1199 | name: "conv13_mbox_conf_new" 1200 | type: "Convolution" 1201 | bottom: "conv13" 1202 | top: "conv13_mbox_conf" 1203 | param { 1204 | lr_mult: 1.0 1205 | decay_mult: 1.0 1206 | } 1207 | param { 1208 | lr_mult: 2.0 1209 | decay_mult: 0.0 1210 | } 1211 | convolution_param { 1212 | num_output: 36 1213 | kernel_size: 1 1214 | weight_filler { 1215 | type: "msra" 1216 | } 1217 | bias_filler { 1218 | type: "constant" 1219 | value: 0.0 1220 | } 1221 | } 1222 | } 1223 | layer { 1224 | name: "conv13_mbox_conf_perm" 1225 | type: "Permute" 1226 | bottom: "conv13_mbox_conf" 1227 | top: "conv13_mbox_conf_perm" 1228 | permute_param { 1229 | order: 0 1230 | order: 2 1231 | order: 3 1232 | order: 1 1233 | } 1234 | } 1235 | layer { 1236 | name: "conv13_mbox_conf_flat" 1237 | type: "Flatten" 1238 | bottom: "conv13_mbox_conf_perm" 1239 | top: "conv13_mbox_conf_flat" 1240 | flatten_param { 1241 | axis: 1 1242 | } 1243 | } 1244 | layer { 1245 | name: "conv13_mbox_priorbox" 1246 | type: "PriorBox" 1247 | bottom: "conv13" 1248 | bottom: "data" 1249 | top: "conv13_mbox_priorbox" 1250 | prior_box_param { 1251 | min_size: 60.0 1252 | max_size: 100.0 1253 | aspect_ratio: 2.0 1254 | aspect_ratio: 3.0 1255 | flip: true 1256 | clip: false 1257 | variance: 0.1 1258 | variance: 0.1 1259 | variance: 0.2 1260 | variance: 0.2 1261 | offset: 0.5 1262 | } 1263 | } 1264 | layer { 1265 | name: "conv14_2_mbox_loc" 1266 | type: "Convolution" 1267 | bottom: "conv14_2" 1268 | top: "conv14_2_mbox_loc" 1269 | param { 1270 | lr_mult: 1.0 1271 | decay_mult: 1.0 1272 | } 1273 | param { 1274 | lr_mult: 2.0 1275 | decay_mult: 0.0 1276 | } 1277 | convolution_param { 1278 | num_output: 24 1279 | kernel_size: 1 1280 | weight_filler { 1281 | type: "msra" 1282 | } 1283 | bias_filler { 1284 | type: "constant" 1285 | value: 0.0 1286 | } 1287 | } 1288 | } 1289 | layer { 1290 | name: "conv14_2_mbox_loc_perm" 1291 | type: "Permute" 1292 | bottom: "conv14_2_mbox_loc" 1293 | top: "conv14_2_mbox_loc_perm" 1294 | permute_param { 1295 | order: 0 1296 | order: 2 1297 | order: 3 1298 | order: 1 1299 | } 1300 | } 1301 | layer { 1302 | name: "conv14_2_mbox_loc_flat" 1303 | type: "Flatten" 1304 | bottom: "conv14_2_mbox_loc_perm" 1305 | top: "conv14_2_mbox_loc_flat" 1306 | flatten_param { 1307 | axis: 1 1308 | } 1309 | } 1310 | layer { 1311 | name: "conv14_2_mbox_conf_new" 1312 | type: "Convolution" 1313 | bottom: "conv14_2" 1314 | top: "conv14_2_mbox_conf" 1315 | param { 1316 | lr_mult: 1.0 1317 | decay_mult: 1.0 1318 | } 1319 | param { 1320 | lr_mult: 2.0 1321 | decay_mult: 0.0 1322 | } 1323 | convolution_param { 1324 | num_output: 36 1325 | kernel_size: 1 1326 | weight_filler { 1327 | type: "msra" 1328 | } 1329 | bias_filler { 1330 | type: "constant" 1331 | value: 0.0 1332 | } 1333 | } 1334 | } 1335 | layer { 1336 | name: "conv14_2_mbox_conf_perm" 1337 | type: "Permute" 1338 | bottom: "conv14_2_mbox_conf" 1339 | top: "conv14_2_mbox_conf_perm" 1340 | permute_param { 1341 | order: 0 1342 | order: 2 1343 | order: 3 1344 | order: 1 1345 | } 1346 | } 1347 | layer { 1348 | name: "conv14_2_mbox_conf_flat" 1349 | type: "Flatten" 1350 | bottom: "conv14_2_mbox_conf_perm" 1351 | top: "conv14_2_mbox_conf_flat" 1352 | flatten_param { 1353 | axis: 1 1354 | } 1355 | } 1356 | layer { 1357 | name: "conv14_2_mbox_priorbox" 1358 | type: "PriorBox" 1359 | bottom: "conv14_2" 1360 | bottom: "data" 1361 | top: "conv14_2_mbox_priorbox" 1362 | prior_box_param { 1363 | min_size: 100.0 1364 | max_size: 140.0 1365 | aspect_ratio: 2.0 1366 | aspect_ratio: 3.0 1367 | flip: true 1368 | clip: false 1369 | variance: 0.1 1370 | variance: 0.1 1371 | variance: 0.2 1372 | variance: 0.2 1373 | offset: 0.5 1374 | } 1375 | } 1376 | layer { 1377 | name: "conv15_2_mbox_loc" 1378 | type: "Convolution" 1379 | bottom: "conv15_2" 1380 | top: "conv15_2_mbox_loc" 1381 | param { 1382 | lr_mult: 1.0 1383 | decay_mult: 1.0 1384 | } 1385 | param { 1386 | lr_mult: 2.0 1387 | decay_mult: 0.0 1388 | } 1389 | convolution_param { 1390 | num_output: 24 1391 | kernel_size: 1 1392 | weight_filler { 1393 | type: "msra" 1394 | } 1395 | bias_filler { 1396 | type: "constant" 1397 | value: 0.0 1398 | } 1399 | } 1400 | } 1401 | layer { 1402 | name: "conv15_2_mbox_loc_perm" 1403 | type: "Permute" 1404 | bottom: "conv15_2_mbox_loc" 1405 | top: "conv15_2_mbox_loc_perm" 1406 | permute_param { 1407 | order: 0 1408 | order: 2 1409 | order: 3 1410 | order: 1 1411 | } 1412 | } 1413 | layer { 1414 | name: "conv15_2_mbox_loc_flat" 1415 | type: "Flatten" 1416 | bottom: "conv15_2_mbox_loc_perm" 1417 | top: "conv15_2_mbox_loc_flat" 1418 | flatten_param { 1419 | axis: 1 1420 | } 1421 | } 1422 | layer { 1423 | name: "conv15_2_mbox_conf_new" 1424 | type: "Convolution" 1425 | bottom: "conv15_2" 1426 | top: "conv15_2_mbox_conf" 1427 | param { 1428 | lr_mult: 1.0 1429 | decay_mult: 1.0 1430 | } 1431 | param { 1432 | lr_mult: 2.0 1433 | decay_mult: 0.0 1434 | } 1435 | convolution_param { 1436 | num_output: 36 1437 | kernel_size: 1 1438 | weight_filler { 1439 | type: "msra" 1440 | } 1441 | bias_filler { 1442 | type: "constant" 1443 | value: 0.0 1444 | } 1445 | } 1446 | } 1447 | layer { 1448 | name: "conv15_2_mbox_conf_perm" 1449 | type: "Permute" 1450 | bottom: "conv15_2_mbox_conf" 1451 | top: "conv15_2_mbox_conf_perm" 1452 | permute_param { 1453 | order: 0 1454 | order: 2 1455 | order: 3 1456 | order: 1 1457 | } 1458 | } 1459 | layer { 1460 | name: "conv15_2_mbox_conf_flat" 1461 | type: "Flatten" 1462 | bottom: "conv15_2_mbox_conf_perm" 1463 | top: "conv15_2_mbox_conf_flat" 1464 | flatten_param { 1465 | axis: 1 1466 | } 1467 | } 1468 | layer { 1469 | name: "conv15_2_mbox_priorbox" 1470 | type: "PriorBox" 1471 | bottom: "conv15_2" 1472 | bottom: "data" 1473 | top: "conv15_2_mbox_priorbox" 1474 | prior_box_param { 1475 | min_size: 140.0 1476 | max_size: 200.0 1477 | aspect_ratio: 2.0 1478 | aspect_ratio: 3.0 1479 | flip: true 1480 | clip: false 1481 | variance: 0.1 1482 | variance: 0.1 1483 | variance: 0.2 1484 | variance: 0.2 1485 | offset: 0.5 1486 | } 1487 | } 1488 | layer { 1489 | name: "conv16_2_mbox_loc" 1490 | type: "Convolution" 1491 | bottom: "conv16_2" 1492 | top: "conv16_2_mbox_loc" 1493 | param { 1494 | lr_mult: 1.0 1495 | decay_mult: 1.0 1496 | } 1497 | param { 1498 | lr_mult: 2.0 1499 | decay_mult: 0.0 1500 | } 1501 | convolution_param { 1502 | num_output: 24 1503 | kernel_size: 1 1504 | weight_filler { 1505 | type: "msra" 1506 | } 1507 | bias_filler { 1508 | type: "constant" 1509 | value: 0.0 1510 | } 1511 | } 1512 | } 1513 | layer { 1514 | name: "conv16_2_mbox_loc_perm" 1515 | type: "Permute" 1516 | bottom: "conv16_2_mbox_loc" 1517 | top: "conv16_2_mbox_loc_perm" 1518 | permute_param { 1519 | order: 0 1520 | order: 2 1521 | order: 3 1522 | order: 1 1523 | } 1524 | } 1525 | layer { 1526 | name: "conv16_2_mbox_loc_flat" 1527 | type: "Flatten" 1528 | bottom: "conv16_2_mbox_loc_perm" 1529 | top: "conv16_2_mbox_loc_flat" 1530 | flatten_param { 1531 | axis: 1 1532 | } 1533 | } 1534 | layer { 1535 | name: "conv16_2_mbox_conf_new" 1536 | type: "Convolution" 1537 | bottom: "conv16_2" 1538 | top: "conv16_2_mbox_conf" 1539 | param { 1540 | lr_mult: 1.0 1541 | decay_mult: 1.0 1542 | } 1543 | param { 1544 | lr_mult: 2.0 1545 | decay_mult: 0.0 1546 | } 1547 | convolution_param { 1548 | num_output: 36 1549 | kernel_size: 1 1550 | weight_filler { 1551 | type: "msra" 1552 | } 1553 | bias_filler { 1554 | type: "constant" 1555 | value: 0.0 1556 | } 1557 | } 1558 | } 1559 | layer { 1560 | name: "conv16_2_mbox_conf_perm" 1561 | type: "Permute" 1562 | bottom: "conv16_2_mbox_conf" 1563 | top: "conv16_2_mbox_conf_perm" 1564 | permute_param { 1565 | order: 0 1566 | order: 2 1567 | order: 3 1568 | order: 1 1569 | } 1570 | } 1571 | layer { 1572 | name: "conv16_2_mbox_conf_flat" 1573 | type: "Flatten" 1574 | bottom: "conv16_2_mbox_conf_perm" 1575 | top: "conv16_2_mbox_conf_flat" 1576 | flatten_param { 1577 | axis: 1 1578 | } 1579 | } 1580 | layer { 1581 | name: "conv16_2_mbox_priorbox" 1582 | type: "PriorBox" 1583 | bottom: "conv16_2" 1584 | bottom: "data" 1585 | top: "conv16_2_mbox_priorbox" 1586 | prior_box_param { 1587 | min_size: 200.0 1588 | max_size: 240.0 1589 | aspect_ratio: 2.0 1590 | aspect_ratio: 3.0 1591 | flip: true 1592 | clip: false 1593 | variance: 0.1 1594 | variance: 0.1 1595 | variance: 0.2 1596 | variance: 0.2 1597 | offset: 0.5 1598 | } 1599 | } 1600 | layer { 1601 | name: "conv17_2_mbox_loc" 1602 | type: "Convolution" 1603 | bottom: "conv17_2" 1604 | top: "conv17_2_mbox_loc" 1605 | param { 1606 | lr_mult: 1.0 1607 | decay_mult: 1.0 1608 | } 1609 | param { 1610 | lr_mult: 2.0 1611 | decay_mult: 0.0 1612 | } 1613 | convolution_param { 1614 | num_output: 24 1615 | kernel_size: 1 1616 | weight_filler { 1617 | type: "msra" 1618 | } 1619 | bias_filler { 1620 | type: "constant" 1621 | value: 0.0 1622 | } 1623 | } 1624 | } 1625 | layer { 1626 | name: "conv17_2_mbox_loc_perm" 1627 | type: "Permute" 1628 | bottom: "conv17_2_mbox_loc" 1629 | top: "conv17_2_mbox_loc_perm" 1630 | permute_param { 1631 | order: 0 1632 | order: 2 1633 | order: 3 1634 | order: 1 1635 | } 1636 | } 1637 | layer { 1638 | name: "conv17_2_mbox_loc_flat" 1639 | type: "Flatten" 1640 | bottom: "conv17_2_mbox_loc_perm" 1641 | top: "conv17_2_mbox_loc_flat" 1642 | flatten_param { 1643 | axis: 1 1644 | } 1645 | } 1646 | layer { 1647 | name: "conv17_2_mbox_conf_new" 1648 | type: "Convolution" 1649 | bottom: "conv17_2" 1650 | top: "conv17_2_mbox_conf" 1651 | param { 1652 | lr_mult: 1.0 1653 | decay_mult: 1.0 1654 | } 1655 | param { 1656 | lr_mult: 2.0 1657 | decay_mult: 0.0 1658 | } 1659 | convolution_param { 1660 | num_output: 36 1661 | kernel_size: 1 1662 | weight_filler { 1663 | type: "msra" 1664 | } 1665 | bias_filler { 1666 | type: "constant" 1667 | value: 0.0 1668 | } 1669 | } 1670 | } 1671 | layer { 1672 | name: "conv17_2_mbox_conf_perm" 1673 | type: "Permute" 1674 | bottom: "conv17_2_mbox_conf" 1675 | top: "conv17_2_mbox_conf_perm" 1676 | permute_param { 1677 | order: 0 1678 | order: 2 1679 | order: 3 1680 | order: 1 1681 | } 1682 | } 1683 | layer { 1684 | name: "conv17_2_mbox_conf_flat" 1685 | type: "Flatten" 1686 | bottom: "conv17_2_mbox_conf_perm" 1687 | top: "conv17_2_mbox_conf_flat" 1688 | flatten_param { 1689 | axis: 1 1690 | } 1691 | } 1692 | layer { 1693 | name: "conv17_2_mbox_priorbox" 1694 | type: "PriorBox" 1695 | bottom: "conv17_2" 1696 | bottom: "data" 1697 | top: "conv17_2_mbox_priorbox" 1698 | prior_box_param { 1699 | min_size: 240.0 1700 | max_size: 300.0 1701 | aspect_ratio: 2.0 1702 | aspect_ratio: 3.0 1703 | flip: true 1704 | clip: false 1705 | variance: 0.1 1706 | variance: 0.1 1707 | variance: 0.2 1708 | variance: 0.2 1709 | offset: 0.5 1710 | } 1711 | } 1712 | layer { 1713 | name: "mbox_loc" 1714 | type: "Concat" 1715 | bottom: "conv11_mbox_loc_flat" 1716 | bottom: "conv13_mbox_loc_flat" 1717 | bottom: "conv14_2_mbox_loc_flat" 1718 | bottom: "conv15_2_mbox_loc_flat" 1719 | bottom: "conv16_2_mbox_loc_flat" 1720 | bottom: "conv17_2_mbox_loc_flat" 1721 | top: "mbox_loc" 1722 | concat_param { 1723 | axis: 1 1724 | } 1725 | } 1726 | layer { 1727 | name: "mbox_conf" 1728 | type: "Concat" 1729 | bottom: "conv11_mbox_conf_flat" 1730 | bottom: "conv13_mbox_conf_flat" 1731 | bottom: "conv14_2_mbox_conf_flat" 1732 | bottom: "conv15_2_mbox_conf_flat" 1733 | bottom: "conv16_2_mbox_conf_flat" 1734 | bottom: "conv17_2_mbox_conf_flat" 1735 | top: "mbox_conf" 1736 | concat_param { 1737 | axis: 1 1738 | } 1739 | } 1740 | layer { 1741 | name: "mbox_priorbox" 1742 | type: "Concat" 1743 | bottom: "conv11_mbox_priorbox" 1744 | bottom: "conv13_mbox_priorbox" 1745 | bottom: "conv14_2_mbox_priorbox" 1746 | bottom: "conv15_2_mbox_priorbox" 1747 | bottom: "conv16_2_mbox_priorbox" 1748 | bottom: "conv17_2_mbox_priorbox" 1749 | top: "mbox_priorbox" 1750 | concat_param { 1751 | axis: 2 1752 | } 1753 | } 1754 | layer { 1755 | name: "mbox_conf_reshape" 1756 | type: "Reshape" 1757 | bottom: "mbox_conf" 1758 | top: "mbox_conf_reshape" 1759 | reshape_param { 1760 | shape { 1761 | dim: 0 1762 | dim: -1 1763 | dim: 6 1764 | } 1765 | } 1766 | } 1767 | layer { 1768 | name: "mbox_conf_softmax" 1769 | type: "Softmax" 1770 | bottom: "mbox_conf_reshape" 1771 | top: "mbox_conf_softmax" 1772 | softmax_param { 1773 | axis: 2 1774 | } 1775 | } 1776 | layer { 1777 | name: "mbox_conf_flatten" 1778 | type: "Flatten" 1779 | bottom: "mbox_conf_softmax" 1780 | top: "mbox_conf_flatten" 1781 | flatten_param { 1782 | axis: 1 1783 | } 1784 | } 1785 | layer { 1786 | name: "detection_out" 1787 | type: "DetectionOutput" 1788 | bottom: "mbox_loc" 1789 | bottom: "mbox_conf_flatten" 1790 | bottom: "mbox_priorbox" 1791 | top: "detection_out" 1792 | include { 1793 | phase: TEST 1794 | } 1795 | detection_output_param { 1796 | num_classes: 6 1797 | share_location: true 1798 | background_label_id: 0 1799 | nms_param { 1800 | nms_threshold: 0.45 1801 | top_k: 100 1802 | } 1803 | code_type: CENTER_SIZE 1804 | keep_top_k: 100 1805 | confidence_threshold: 0.25 1806 | } 1807 | } 1808 | -------------------------------------------------------------------------------- /SSD/MobileNet/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/MobileNet/train.prototxt" 2 | test_net: "models/MobileNet/test.prototxt" 3 | test_iter: 673 4 | test_interval: 10000 5 | base_lr: 0.0005 6 | display: 10 7 | max_iter: 120000 8 | lr_policy: "multistep" 9 | gamma: 0.5 10 | weight_decay: 0.00005 11 | snapshot: 1000 12 | snapshot_prefix: "models/MobileNet/MobileNetSSD_deploy" 13 | solver_mode: CPU 14 | debug_info: false 15 | snapshot_after_train: true 16 | test_initialization: false 17 | average_loss: 10 18 | stepvalue: 20000 19 | stepvalue: 40000 20 | iter_size: 1 21 | type: "RMSProp" 22 | eval_type: "detection" 23 | ap_version: "11point" -------------------------------------------------------------------------------- /SSD/MobileNet/solver_test.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/MobileNet/train.prototxt" 2 | test_net: "models/MobileNet/test.prototxt" 3 | test_iter: 673 4 | test_interval: 10000 5 | base_lr: 0.0005 6 | display: 10 7 | max_iter: 0 8 | lr_policy: "multistep" 9 | gamma: 0.5 10 | weight_decay: 0.00005 11 | snapshot: 0 12 | snapshot_prefix: "snapshot/mobilenet" 13 | solver_mode: CPU 14 | debug_info: false 15 | snapshot_after_train: false 16 | test_initialization: true 17 | average_loss: 10 18 | stepvalue: 20000 19 | stepvalue: 40000 20 | iter_size: 1 21 | type: "RMSProp" 22 | eval_type: "detection" 23 | ap_version: "11point" -------------------------------------------------------------------------------- /SSD/MobileNet_V2/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/MobileNetV2/train_voc.prototxt" 2 | #test_net: "models/MobileNetV2/test.prototxt" 3 | #test_iter: 673 4 | #test_interval: 10000 5 | base_lr: 0.0005 6 | display: 10 7 | max_iter: 50000 8 | lr_policy: "multistep" 9 | gamma: 0.5 10 | weight_decay: 0.000004 11 | snapshot: 1000 12 | snapshot_prefix: "models/MobileNetV2/MobileNetSSD_deploy" 13 | solver_mode: GPU 14 | debug_info: false 15 | snapshot_after_train: true 16 | test_initialization: false 17 | average_loss: 10 18 | stepvalue: 8000 19 | stepvalue: 16000 20 | stepvalue: 32000 21 | iter_size: 1 22 | type: "RMSProp" 23 | eval_type: "detection" 24 | ap_version: "11point" -------------------------------------------------------------------------------- /YOLO/voc.data: -------------------------------------------------------------------------------- 1 | classes= 5 2 | train = data/voc/2017_trainval.txt 3 | valid = data/voc/2017_test.txt 4 | names = data/voc.names 5 | backup = backup 6 | 7 | -------------------------------------------------------------------------------- /YOLO/voc.names: -------------------------------------------------------------------------------- 1 | bicycle 2 | car 3 | motorbike 4 | person 5 | cones -------------------------------------------------------------------------------- /YOLO/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=32 7 | subdivisions=4 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 50020 21 | policy=steps 22 | steps=40000,45000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=30 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | #anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | anchors = 5,7, 11,13, 18,29, 40,41, 119,148, 289,253 136 | classes=5 137 | #classes=80 138 | num=6 139 | jitter=.3 140 | ignore_thresh = .7 141 | truth_thresh = 1 142 | random=1 143 | 144 | [route] 145 | layers = -4 146 | 147 | [convolutional] 148 | batch_normalize=1 149 | filters=128 150 | size=1 151 | stride=1 152 | pad=1 153 | activation=leaky 154 | 155 | [upsample] 156 | stride=2 157 | 158 | [route] 159 | layers = -1, 8 160 | 161 | [convolutional] 162 | batch_normalize=1 163 | filters=256 164 | size=3 165 | stride=1 166 | pad=1 167 | activation=leaky 168 | 169 | [convolutional] 170 | size=1 171 | stride=1 172 | pad=1 173 | filters=30 174 | activation=linear 175 | 176 | [yolo] 177 | mask = 0,1,2 178 | #anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 179 | #anchors = 5,7, 7,5, 18,29, 29,18,37,58,58,37,81,82, 135,169, 344,319 180 | anchors = 5,7, 11,13, 18,29, 40,41, 119,148, 289,253 181 | classes=5 182 | num=6 183 | jitter=.3 184 | ignore_thresh = .7 185 | truth_thresh = 1 186 | random=1 187 | -------------------------------------------------------------------------------- /YOLO/yolov3-tiny_final.weights: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eric612/Vehicle-Detection/caedb24b289b1c4774b85ecc15f60cf6b040bec6/YOLO/yolov3-tiny_final.weights -------------------------------------------------------------------------------- /YOLO/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=32 7 | subdivisions=16 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 50020 21 | policy=steps 22 | steps=40000,45000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=30 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | #anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | anchors = 5,7, 11,13, 18,29, 40,41, 119,148, 289,253 611 | classes=5 612 | num=9 613 | jitter=.3 614 | ignore_thresh = .7 615 | truth_thresh = 1 616 | random=1 617 | 618 | 619 | [route] 620 | layers = -4 621 | 622 | [convolutional] 623 | batch_normalize=1 624 | filters=256 625 | size=1 626 | stride=1 627 | pad=1 628 | activation=leaky 629 | 630 | [upsample] 631 | stride=2 632 | 633 | [route] 634 | layers = -1, 61 635 | 636 | 637 | 638 | [convolutional] 639 | batch_normalize=1 640 | filters=256 641 | size=1 642 | stride=1 643 | pad=1 644 | activation=leaky 645 | 646 | [convolutional] 647 | batch_normalize=1 648 | size=3 649 | stride=1 650 | pad=1 651 | filters=512 652 | activation=leaky 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [convolutional] 663 | batch_normalize=1 664 | size=3 665 | stride=1 666 | pad=1 667 | filters=512 668 | activation=leaky 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | size=1 688 | stride=1 689 | pad=1 690 | filters=30 691 | activation=linear 692 | 693 | 694 | [yolo] 695 | mask = 3,4,5 696 | #anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 697 | anchors = 5,7, 11,13, 18,29, 40,41, 119,148, 289,253 698 | classes=5 699 | num=9 700 | jitter=.3 701 | ignore_thresh = .7 702 | truth_thresh = 1 703 | random=1 704 | 705 | 706 | 707 | [route] 708 | layers = -4 709 | 710 | [convolutional] 711 | batch_normalize=1 712 | filters=128 713 | size=1 714 | stride=1 715 | pad=1 716 | activation=leaky 717 | 718 | [upsample] 719 | stride=2 720 | 721 | [route] 722 | layers = -1, 36 723 | 724 | 725 | 726 | [convolutional] 727 | batch_normalize=1 728 | filters=128 729 | size=1 730 | stride=1 731 | pad=1 732 | activation=leaky 733 | 734 | [convolutional] 735 | batch_normalize=1 736 | size=3 737 | stride=1 738 | pad=1 739 | filters=256 740 | activation=leaky 741 | 742 | [convolutional] 743 | batch_normalize=1 744 | filters=128 745 | size=1 746 | stride=1 747 | pad=1 748 | activation=leaky 749 | 750 | [convolutional] 751 | batch_normalize=1 752 | size=3 753 | stride=1 754 | pad=1 755 | filters=256 756 | activation=leaky 757 | 758 | [convolutional] 759 | batch_normalize=1 760 | filters=128 761 | size=1 762 | stride=1 763 | pad=1 764 | activation=leaky 765 | 766 | [convolutional] 767 | batch_normalize=1 768 | size=3 769 | stride=1 770 | pad=1 771 | filters=256 772 | activation=leaky 773 | 774 | [convolutional] 775 | size=1 776 | stride=1 777 | pad=1 778 | filters=30 779 | activation=linear 780 | 781 | 782 | [yolo] 783 | mask = 0,1,2 784 | #anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 785 | anchors = 5,7, 11,13, 18,29, 40,41, 119,148, 289,253 786 | classes=5 787 | num=9 788 | jitter=.3 789 | ignore_thresh = .7 790 | truth_thresh = 1 791 | random=1 792 | 793 | -------------------------------------------------------------------------------- /faster_rcnn_end2end_avs/VGG19/faster_rcnn_end2end/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "../../models/intel_optimized_models/faster-rcnn/pascal_voc/VGG19/faster_rcnn_end2end/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 50000 6 | display: 20 7 | average_loss: 100 8 | # iter_size: 1 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | # We disable standard caffe solver snapshotting and implement our own snapshot 12 | # function 13 | snapshot: 0 14 | # We still use the snapshot prefix, though 15 | snapshot_prefix: "vgg19_faster_rcnn" 16 | iter_size: 2 17 | -------------------------------------------------------------------------------- /faster_rcnn_end2end_avs/VGG19/faster_rcnn_end2end/test.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_19_layer" 2 | 3 | input: "data" 4 | input_shape { 5 | dim: 1 6 | dim: 3 7 | dim: 672 8 | dim: 672 9 | } 10 | 11 | input: "im_info" 12 | input_shape { 13 | dim: 1 14 | dim: 3 15 | } 16 | 17 | layer { 18 | bottom: "data" 19 | top: "conv1_1" 20 | name: "conv1_1" 21 | type: "Convolution" 22 | param { 23 | lr_mult: 0 24 | decay_mult: 0 25 | } 26 | param { 27 | lr_mult: 0 28 | decay_mult: 0 29 | } 30 | convolution_param { 31 | num_output: 64 32 | pad: 1 33 | kernel_size: 3 34 | } 35 | } 36 | layer { 37 | bottom: "conv1_1" 38 | top: "conv1_1" 39 | name: "relu1_1" 40 | type: "ReLU" 41 | } 42 | layer { 43 | bottom: "conv1_1" 44 | top: "conv1_2" 45 | name: "conv1_2" 46 | type: "Convolution" 47 | param { 48 | lr_mult: 0 49 | decay_mult: 0 50 | } 51 | param { 52 | lr_mult: 0 53 | decay_mult: 0 54 | } 55 | convolution_param { 56 | num_output: 64 57 | pad: 1 58 | kernel_size: 3 59 | } 60 | } 61 | layer { 62 | bottom: "conv1_2" 63 | top: "conv1_2" 64 | name: "relu1_2" 65 | type: "ReLU" 66 | } 67 | layer { 68 | bottom: "conv1_2" 69 | top: "pool1" 70 | name: "pool1" 71 | type: "Pooling" 72 | pooling_param { 73 | pool: MAX 74 | kernel_size: 2 75 | stride: 2 76 | } 77 | } 78 | layer { 79 | bottom: "pool1" 80 | top: "conv2_1" 81 | name: "conv2_1" 82 | type: "Convolution" 83 | param { 84 | lr_mult: 0 85 | decay_mult: 0 86 | } 87 | param { 88 | lr_mult: 0 89 | decay_mult: 0 90 | } 91 | convolution_param { 92 | num_output: 128 93 | pad: 1 94 | kernel_size: 3 95 | } 96 | } 97 | layer { 98 | bottom: "conv2_1" 99 | top: "conv2_1" 100 | name: "relu2_1" 101 | type: "ReLU" 102 | } 103 | layer { 104 | bottom: "conv2_1" 105 | top: "conv2_2" 106 | name: "conv2_2" 107 | type: "Convolution" 108 | param { 109 | lr_mult: 0 110 | decay_mult: 0 111 | } 112 | param { 113 | lr_mult: 0 114 | decay_mult: 0 115 | } 116 | convolution_param { 117 | num_output: 128 118 | pad: 1 119 | kernel_size: 3 120 | } 121 | } 122 | layer { 123 | bottom: "conv2_2" 124 | top: "conv2_2" 125 | name: "relu2_2" 126 | type: "ReLU" 127 | } 128 | layer { 129 | bottom: "conv2_2" 130 | top: "pool2" 131 | name: "pool2" 132 | type: "Pooling" 133 | pooling_param { 134 | pool: MAX 135 | kernel_size: 2 136 | stride: 2 137 | } 138 | } 139 | layer { 140 | bottom: "pool2" 141 | top: "conv3_1" 142 | name: "conv3_1" 143 | type: "Convolution" 144 | param { 145 | lr_mult: 1 146 | decay_mult: 1 147 | } 148 | param { 149 | lr_mult: 2 150 | decay_mult: 0 151 | } 152 | convolution_param { 153 | num_output: 256 154 | pad: 1 155 | kernel_size: 3 156 | } 157 | } 158 | layer { 159 | bottom: "conv3_1" 160 | top: "conv3_1" 161 | name: "relu3_1" 162 | type: "ReLU" 163 | } 164 | layer { 165 | bottom: "conv3_1" 166 | top: "conv3_2" 167 | name: "conv3_2" 168 | type: "Convolution" 169 | param { 170 | lr_mult: 1 171 | decay_mult: 1 172 | } 173 | param { 174 | lr_mult: 2 175 | decay_mult: 0 176 | } 177 | convolution_param { 178 | num_output: 256 179 | pad: 1 180 | kernel_size: 3 181 | } 182 | } 183 | layer { 184 | bottom: "conv3_2" 185 | top: "conv3_2" 186 | name: "relu3_2" 187 | type: "ReLU" 188 | } 189 | layer { 190 | bottom: "conv3_2" 191 | top: "conv3_3" 192 | name: "conv3_3" 193 | type: "Convolution" 194 | param { 195 | lr_mult: 1 196 | decay_mult: 1 197 | } 198 | param { 199 | lr_mult: 2 200 | decay_mult: 0 201 | } 202 | convolution_param { 203 | num_output: 256 204 | pad: 1 205 | kernel_size: 3 206 | } 207 | } 208 | layer { 209 | bottom: "conv3_3" 210 | top: "conv3_3" 211 | name: "relu3_3" 212 | type: "ReLU" 213 | } 214 | layer { 215 | bottom: "conv3_3" 216 | top: "conv3_4" 217 | name: "conv3_4" 218 | type: "Convolution" 219 | param { 220 | lr_mult: 1 221 | decay_mult: 1 222 | } 223 | param { 224 | lr_mult: 2 225 | decay_mult: 0 226 | } 227 | convolution_param { 228 | num_output: 256 229 | pad: 1 230 | kernel_size: 3 231 | } 232 | } 233 | layer { 234 | bottom: "conv3_4" 235 | top: "conv3_4" 236 | name: "relu3_4" 237 | type: "ReLU" 238 | } 239 | layer { 240 | bottom: "conv3_4" 241 | top: "pool3" 242 | name: "pool3" 243 | type: "Pooling" 244 | pooling_param { 245 | pool: MAX 246 | kernel_size: 2 247 | stride: 2 248 | } 249 | } 250 | layer { 251 | bottom: "pool3" 252 | top: "conv4_1" 253 | name: "conv4_1" 254 | type: "Convolution" 255 | param { 256 | lr_mult: 1 257 | decay_mult: 1 258 | } 259 | param { 260 | lr_mult: 2 261 | decay_mult: 0 262 | } 263 | convolution_param { 264 | num_output: 512 265 | pad: 1 266 | kernel_size: 3 267 | } 268 | } 269 | layer { 270 | bottom: "conv4_1" 271 | top: "conv4_1" 272 | name: "relu4_1" 273 | type: "ReLU" 274 | } 275 | layer { 276 | bottom: "conv4_1" 277 | top: "conv4_2" 278 | name: "conv4_2" 279 | type: "Convolution" 280 | param { 281 | lr_mult: 1 282 | decay_mult: 1 283 | } 284 | param { 285 | lr_mult: 2 286 | decay_mult: 0 287 | } 288 | convolution_param { 289 | num_output: 512 290 | pad: 1 291 | kernel_size: 3 292 | } 293 | } 294 | layer { 295 | bottom: "conv4_2" 296 | top: "conv4_2" 297 | name: "relu4_2" 298 | type: "ReLU" 299 | } 300 | layer { 301 | bottom: "conv4_2" 302 | top: "conv4_3" 303 | name: "conv4_3" 304 | type: "Convolution" 305 | param { 306 | lr_mult: 1 307 | decay_mult: 1 308 | } 309 | param { 310 | lr_mult: 2 311 | decay_mult: 0 312 | } 313 | convolution_param { 314 | num_output: 512 315 | pad: 1 316 | kernel_size: 3 317 | } 318 | } 319 | layer { 320 | bottom: "conv4_3" 321 | top: "conv4_3" 322 | name: "relu4_3" 323 | type: "ReLU" 324 | } 325 | layer { 326 | bottom: "conv4_3" 327 | top: "conv4_4" 328 | name: "conv4_4" 329 | type: "Convolution" 330 | param { 331 | lr_mult: 1 332 | decay_mult: 1 333 | } 334 | param { 335 | lr_mult: 2 336 | decay_mult: 0 337 | } 338 | convolution_param { 339 | num_output: 512 340 | pad: 1 341 | kernel_size: 3 342 | } 343 | } 344 | layer { 345 | bottom: "conv4_4" 346 | top: "conv4_4" 347 | name: "relu4_4" 348 | type: "ReLU" 349 | } 350 | layer { 351 | bottom: "conv4_4" 352 | top: "pool4" 353 | name: "pool4" 354 | type: "Pooling" 355 | pooling_param { 356 | pool: MAX 357 | kernel_size: 2 358 | stride: 2 359 | } 360 | } 361 | layer { 362 | bottom: "pool4" 363 | top: "conv5_1" 364 | name: "conv5_1" 365 | type: "Convolution" 366 | param { 367 | lr_mult: 1 368 | decay_mult: 1 369 | } 370 | param { 371 | lr_mult: 2 372 | decay_mult: 0 373 | } 374 | convolution_param { 375 | num_output: 512 376 | pad: 1 377 | kernel_size: 3 378 | } 379 | } 380 | layer { 381 | bottom: "conv5_1" 382 | top: "conv5_1" 383 | name: "relu5_1" 384 | type: "ReLU" 385 | } 386 | layer { 387 | bottom: "conv5_1" 388 | top: "conv5_2" 389 | name: "conv5_2" 390 | type: "Convolution" 391 | param { 392 | lr_mult: 1 393 | decay_mult: 1 394 | } 395 | param { 396 | lr_mult: 2 397 | decay_mult: 0 398 | } 399 | convolution_param { 400 | num_output: 512 401 | pad: 1 402 | kernel_size: 3 403 | } 404 | } 405 | layer { 406 | bottom: "conv5_2" 407 | top: "conv5_2" 408 | name: "relu5_2" 409 | type: "ReLU" 410 | } 411 | layer { 412 | bottom: "conv5_2" 413 | top: "conv5_3" 414 | name: "conv5_3" 415 | type: "Convolution" 416 | param { 417 | lr_mult: 1 418 | decay_mult: 1 419 | } 420 | param { 421 | lr_mult: 2 422 | decay_mult: 0 423 | } 424 | convolution_param { 425 | num_output: 512 426 | pad: 1 427 | kernel_size: 3 428 | } 429 | } 430 | layer { 431 | bottom: "conv5_3" 432 | top: "conv5_3" 433 | name: "relu5_3" 434 | type: "ReLU" 435 | } 436 | layer { 437 | bottom: "conv5_3" 438 | top: "conv5_4" 439 | name: "conv5_4" 440 | type: "Convolution" 441 | param { 442 | lr_mult: 1 443 | decay_mult: 1 444 | } 445 | param { 446 | lr_mult: 2 447 | decay_mult: 0 448 | } 449 | convolution_param { 450 | num_output: 512 451 | pad: 1 452 | kernel_size: 3 453 | } 454 | } 455 | layer { 456 | bottom: "conv5_4" 457 | top: "conv5_4" 458 | name: "relu5_4" 459 | type: "ReLU" 460 | } 461 | #========= RPN ============ 462 | 463 | layer { 464 | name: "rpn_conv/3x3" 465 | type: "Convolution" 466 | bottom: "conv5_4" 467 | top: "rpn/output" 468 | param { lr_mult: 1.0 decay_mult: 1.0 } 469 | param { lr_mult: 2.0 decay_mult: 0 } 470 | convolution_param { 471 | num_output: 512 472 | kernel_size: 3 pad: 1 stride: 1 473 | weight_filler { type: "gaussian" std: 0.01 } 474 | bias_filler { type: "constant" value: 0 } 475 | } 476 | } 477 | layer { 478 | name: "rpn_relu/3x3" 479 | type: "ReLU" 480 | bottom: "rpn/output" 481 | top: "rpn/output" 482 | } 483 | 484 | layer { 485 | name: "rpn_cls_score" 486 | type: "Convolution" 487 | bottom: "rpn/output" 488 | top: "rpn_cls_score" 489 | param { lr_mult: 1.0 decay_mult: 1.0 } 490 | param { lr_mult: 2.0 decay_mult: 0 } 491 | convolution_param { 492 | num_output: 18 # 2(bg/fg) * 9(anchors) 493 | kernel_size: 1 pad: 0 stride: 1 494 | weight_filler { type: "gaussian" std: 0.01 } 495 | bias_filler { type: "constant" value: 0 } 496 | } 497 | } 498 | layer { 499 | name: "rpn_bbox_pred" 500 | type: "Convolution" 501 | bottom: "rpn/output" 502 | top: "rpn_bbox_pred" 503 | param { lr_mult: 1.0 decay_mult: 1.0 } 504 | param { lr_mult: 2.0 decay_mult: 0 } 505 | convolution_param { 506 | num_output: 36 # 4 * 9(anchors) 507 | kernel_size: 1 pad: 0 stride: 1 508 | weight_filler { type: "gaussian" std: 0.01 } 509 | bias_filler { type: "constant" value: 0 } 510 | } 511 | } 512 | layer { 513 | bottom: "rpn_cls_score" 514 | top: "rpn_cls_score_reshape" 515 | name: "rpn_cls_score_reshape" 516 | type: "Reshape" 517 | reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } 518 | } 519 | 520 | #========= RoI Proposal ============ 521 | 522 | layer { 523 | name: "rpn_cls_prob" 524 | type: "Softmax" 525 | bottom: "rpn_cls_score_reshape" 526 | top: "rpn_cls_prob" 527 | } 528 | layer { 529 | name: 'rpn_cls_prob_reshape' 530 | type: 'Reshape' 531 | bottom: 'rpn_cls_prob' 532 | top: 'rpn_cls_prob_reshape' 533 | reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } 534 | } 535 | layer { 536 | name: 'proposal' 537 | type: 'Python' 538 | bottom: 'rpn_cls_prob_reshape' 539 | bottom: 'rpn_bbox_pred' 540 | bottom: 'im_info' 541 | top: 'rois' 542 | python_param { 543 | module: 'rpn.proposal_layer' 544 | layer: 'ProposalLayer' 545 | param_str: "'feat_stride': 16" 546 | } 547 | } 548 | 549 | #========= RCNN ============ 550 | 551 | layer { 552 | name: "roi_pool5" 553 | type: "ROIPooling" 554 | bottom: "conv5_4" 555 | bottom: "rois" 556 | top: "pool5" 557 | roi_pooling_param { 558 | pooled_w: 7 559 | pooled_h: 7 560 | spatial_scale: 0.0625 # 1/16 561 | } 562 | } 563 | layer { 564 | name: "fc6" 565 | type: "InnerProduct" 566 | bottom: "pool5" 567 | top: "fc6" 568 | param { 569 | lr_mult: 1 570 | decay_mult: 1 571 | } 572 | param { 573 | lr_mult: 2 574 | decay_mult: 0 575 | } 576 | inner_product_param { 577 | num_output: 4096 578 | } 579 | } 580 | layer { 581 | name: "relu6" 582 | type: "ReLU" 583 | bottom: "fc6" 584 | top: "fc6" 585 | } 586 | layer { 587 | name: "drop6" 588 | type: "Dropout" 589 | bottom: "fc6" 590 | top: "fc6" 591 | dropout_param { 592 | dropout_ratio: 0.5 593 | } 594 | } 595 | layer { 596 | name: "fc7" 597 | type: "InnerProduct" 598 | bottom: "fc6" 599 | top: "fc7" 600 | param { 601 | lr_mult: 1 602 | decay_mult: 1 603 | } 604 | param { 605 | lr_mult: 2 606 | decay_mult: 0 607 | } 608 | inner_product_param { 609 | num_output: 4096 610 | } 611 | } 612 | layer { 613 | name: "relu7" 614 | type: "ReLU" 615 | bottom: "fc7" 616 | top: "fc7" 617 | } 618 | layer { 619 | name: "drop7" 620 | type: "Dropout" 621 | bottom: "fc7" 622 | top: "fc7" 623 | dropout_param { 624 | dropout_ratio: 0.5 625 | } 626 | } 627 | layer { 628 | name: "cls_score2" 629 | type: "InnerProduct" 630 | bottom: "fc7" 631 | top: "cls_score2" 632 | param { 633 | lr_mult: 1 634 | decay_mult: 1 635 | } 636 | param { 637 | lr_mult: 2 638 | decay_mult: 0 639 | } 640 | inner_product_param { 641 | num_output: 6 642 | weight_filler { 643 | type: "gaussian" 644 | std: 0.01 645 | } 646 | bias_filler { 647 | type: "constant" 648 | value: 0 649 | } 650 | } 651 | } 652 | layer { 653 | name: "bbox_pred" 654 | type: "InnerProduct" 655 | bottom: "fc7" 656 | top: "bbox_pred" 657 | param { 658 | lr_mult: 1 659 | decay_mult: 1 660 | } 661 | param { 662 | lr_mult: 2 663 | decay_mult: 0 664 | } 665 | inner_product_param { 666 | num_output: 24 667 | weight_filler { 668 | type: "gaussian" 669 | std: 0.001 670 | } 671 | bias_filler { 672 | type: "constant" 673 | value: 0 674 | } 675 | } 676 | } 677 | layer { 678 | name: "cls_prob" 679 | type: "Softmax" 680 | bottom: "cls_score2" 681 | top: "cls_prob" 682 | } 683 | -------------------------------------------------------------------------------- /faster_rcnn_end2end_avs/VGG19/faster_rcnn_end2end/train.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_19_layers" 2 | layer { 3 | name: 'input-data' 4 | type: 'Python' 5 | top: 'data' 6 | top: 'im_info' 7 | top: 'gt_boxes' 8 | python_param { 9 | module: 'roi_data_layer.layer' 10 | layer: 'RoIDataLayer' 11 | param_str: "'num_classes': 6" 12 | } 13 | } 14 | layer { 15 | bottom: "data" 16 | top: "conv1_1" 17 | name: "conv1_1" 18 | type: "Convolution" 19 | param { 20 | lr_mult: 0 21 | decay_mult: 0 22 | } 23 | param { 24 | lr_mult: 0 25 | decay_mult: 0 26 | } 27 | convolution_param { 28 | num_output: 64 29 | pad: 1 30 | kernel_size: 3 31 | } 32 | } 33 | layer { 34 | bottom: "conv1_1" 35 | top: "conv1_1" 36 | name: "relu1_1" 37 | type: "ReLU" 38 | } 39 | layer { 40 | bottom: "conv1_1" 41 | top: "conv1_2" 42 | name: "conv1_2" 43 | type: "Convolution" 44 | param { 45 | lr_mult: 0 46 | decay_mult: 0 47 | } 48 | param { 49 | lr_mult: 0 50 | decay_mult: 0 51 | } 52 | convolution_param { 53 | num_output: 64 54 | pad: 1 55 | kernel_size: 3 56 | } 57 | } 58 | layer { 59 | bottom: "conv1_2" 60 | top: "conv1_2" 61 | name: "relu1_2" 62 | type: "ReLU" 63 | } 64 | layer { 65 | bottom: "conv1_2" 66 | top: "pool1" 67 | name: "pool1" 68 | type: "Pooling" 69 | pooling_param { 70 | pool: MAX 71 | kernel_size: 2 72 | stride: 2 73 | } 74 | } 75 | layer { 76 | bottom: "pool1" 77 | top: "conv2_1" 78 | name: "conv2_1" 79 | type: "Convolution" 80 | param { 81 | lr_mult: 0 82 | decay_mult: 0 83 | } 84 | param { 85 | lr_mult: 0 86 | decay_mult: 0 87 | } 88 | convolution_param { 89 | num_output: 128 90 | pad: 1 91 | kernel_size: 3 92 | } 93 | } 94 | layer { 95 | bottom: "conv2_1" 96 | top: "conv2_1" 97 | name: "relu2_1" 98 | type: "ReLU" 99 | } 100 | layer { 101 | bottom: "conv2_1" 102 | top: "conv2_2" 103 | name: "conv2_2" 104 | type: "Convolution" 105 | param { 106 | lr_mult: 0 107 | decay_mult: 0 108 | } 109 | param { 110 | lr_mult: 0 111 | decay_mult: 0 112 | } 113 | convolution_param { 114 | num_output: 128 115 | pad: 1 116 | kernel_size: 3 117 | } 118 | } 119 | layer { 120 | bottom: "conv2_2" 121 | top: "conv2_2" 122 | name: "relu2_2" 123 | type: "ReLU" 124 | } 125 | layer { 126 | bottom: "conv2_2" 127 | top: "pool2" 128 | name: "pool2" 129 | type: "Pooling" 130 | pooling_param { 131 | pool: MAX 132 | kernel_size: 2 133 | stride: 2 134 | } 135 | } 136 | layer { 137 | bottom: "pool2" 138 | top: "conv3_1" 139 | name: "conv3_1" 140 | type: "Convolution" 141 | param { 142 | lr_mult: 1 143 | } 144 | param { 145 | lr_mult: 2 146 | } 147 | convolution_param { 148 | num_output: 256 149 | pad: 1 150 | kernel_size: 3 151 | } 152 | } 153 | layer { 154 | bottom: "conv3_1" 155 | top: "conv3_1" 156 | name: "relu3_1" 157 | type: "ReLU" 158 | } 159 | layer { 160 | bottom: "conv3_1" 161 | top: "conv3_2" 162 | name: "conv3_2" 163 | type: "Convolution" 164 | param { 165 | lr_mult: 1 166 | } 167 | param { 168 | lr_mult: 2 169 | } 170 | convolution_param { 171 | num_output: 256 172 | pad: 1 173 | kernel_size: 3 174 | } 175 | } 176 | layer { 177 | bottom: "conv3_2" 178 | top: "conv3_2" 179 | name: "relu3_2" 180 | type: "ReLU" 181 | } 182 | layer { 183 | bottom: "conv3_2" 184 | top: "conv3_3" 185 | name: "conv3_3" 186 | type: "Convolution" 187 | param { 188 | lr_mult: 1 189 | } 190 | param { 191 | lr_mult: 2 192 | } 193 | convolution_param { 194 | num_output: 256 195 | pad: 1 196 | kernel_size: 3 197 | } 198 | } 199 | layer { 200 | bottom: "conv3_3" 201 | top: "conv3_3" 202 | name: "relu3_3" 203 | type: "ReLU" 204 | } 205 | layer { 206 | bottom: "conv3_3" 207 | top: "conv3_4" 208 | name: "conv3_4" 209 | type: "Convolution" 210 | param { 211 | lr_mult: 1 212 | } 213 | param { 214 | lr_mult: 2 215 | } 216 | convolution_param { 217 | num_output: 256 218 | pad: 1 219 | kernel_size: 3 220 | } 221 | } 222 | layer { 223 | bottom: "conv3_4" 224 | top: "conv3_4" 225 | name: "relu3_4" 226 | type: "ReLU" 227 | } 228 | layer { 229 | bottom: "conv3_4" 230 | top: "pool3" 231 | name: "pool3" 232 | type: "Pooling" 233 | pooling_param { 234 | pool: MAX 235 | kernel_size: 2 236 | stride: 2 237 | } 238 | } 239 | layer { 240 | bottom: "pool3" 241 | top: "conv4_1" 242 | name: "conv4_1" 243 | type: "Convolution" 244 | param { 245 | lr_mult: 1 246 | } 247 | param { 248 | lr_mult: 2 249 | } 250 | convolution_param { 251 | num_output: 512 252 | pad: 1 253 | kernel_size: 3 254 | } 255 | } 256 | layer { 257 | bottom: "conv4_1" 258 | top: "conv4_1" 259 | name: "relu4_1" 260 | type: "ReLU" 261 | } 262 | layer { 263 | bottom: "conv4_1" 264 | top: "conv4_2" 265 | name: "conv4_2" 266 | type: "Convolution" 267 | param { 268 | lr_mult: 1 269 | } 270 | param { 271 | lr_mult: 2 272 | } 273 | convolution_param { 274 | num_output: 512 275 | pad: 1 276 | kernel_size: 3 277 | } 278 | } 279 | layer { 280 | bottom: "conv4_2" 281 | top: "conv4_2" 282 | name: "relu4_2" 283 | type: "ReLU" 284 | } 285 | layer { 286 | bottom: "conv4_2" 287 | top: "conv4_3" 288 | name: "conv4_3" 289 | type: "Convolution" 290 | param { 291 | lr_mult: 1 292 | } 293 | param { 294 | lr_mult: 2 295 | } 296 | convolution_param { 297 | num_output: 512 298 | pad: 1 299 | kernel_size: 3 300 | } 301 | } 302 | layer { 303 | bottom: "conv4_3" 304 | top: "conv4_3" 305 | name: "relu4_3" 306 | type: "ReLU" 307 | } 308 | layer { 309 | bottom: "conv4_3" 310 | top: "conv4_4" 311 | name: "conv4_4" 312 | type: "Convolution" 313 | param { 314 | lr_mult: 1 315 | } 316 | param { 317 | lr_mult: 2 318 | } 319 | convolution_param { 320 | num_output: 512 321 | pad: 1 322 | kernel_size: 3 323 | } 324 | } 325 | layer { 326 | bottom: "conv4_4" 327 | top: "conv4_4" 328 | name: "relu4_4" 329 | type: "ReLU" 330 | } 331 | layer { 332 | bottom: "conv4_4" 333 | top: "pool4" 334 | name: "pool4" 335 | type: "Pooling" 336 | pooling_param { 337 | pool: MAX 338 | kernel_size: 2 339 | stride: 2 340 | } 341 | } 342 | layer { 343 | bottom: "pool4" 344 | top: "conv5_1" 345 | name: "conv5_1" 346 | type: "Convolution" 347 | param { 348 | lr_mult: 1 349 | } 350 | param { 351 | lr_mult: 2 352 | } 353 | convolution_param { 354 | num_output: 512 355 | pad: 1 356 | kernel_size: 3 357 | } 358 | } 359 | layer { 360 | bottom: "conv5_1" 361 | top: "conv5_1" 362 | name: "relu5_1" 363 | type: "ReLU" 364 | } 365 | layer { 366 | bottom: "conv5_1" 367 | top: "conv5_2" 368 | name: "conv5_2" 369 | type: "Convolution" 370 | param { 371 | lr_mult: 1 372 | } 373 | param { 374 | lr_mult: 2 375 | } 376 | convolution_param { 377 | num_output: 512 378 | pad: 1 379 | kernel_size: 3 380 | } 381 | } 382 | layer { 383 | bottom: "conv5_2" 384 | top: "conv5_2" 385 | name: "relu5_2" 386 | type: "ReLU" 387 | } 388 | layer { 389 | bottom: "conv5_2" 390 | top: "conv5_3" 391 | name: "conv5_3" 392 | type: "Convolution" 393 | param { 394 | lr_mult: 1 395 | } 396 | param { 397 | lr_mult: 2 398 | } 399 | convolution_param { 400 | num_output: 512 401 | pad: 1 402 | kernel_size: 3 403 | } 404 | } 405 | layer { 406 | bottom: "conv5_3" 407 | top: "conv5_3" 408 | name: "relu5_3" 409 | type: "ReLU" 410 | } 411 | layer { 412 | bottom: "conv5_3" 413 | top: "conv5_4" 414 | name: "conv5_4" 415 | type: "Convolution" 416 | param { 417 | lr_mult: 1 418 | } 419 | param { 420 | lr_mult: 2 421 | } 422 | convolution_param { 423 | num_output: 512 424 | pad: 1 425 | kernel_size: 3 426 | } 427 | } 428 | layer { 429 | bottom: "conv5_4" 430 | top: "conv5_4" 431 | name: "relu5_4" 432 | type: "ReLU" 433 | } 434 | #========= RPN ============ 435 | 436 | layer { 437 | name: "rpn_conv/3x3" 438 | type: "Convolution" 439 | bottom: "conv5_4" 440 | top: "rpn/output" 441 | param { lr_mult: 1.0 } 442 | param { lr_mult: 2.0 } 443 | convolution_param { 444 | num_output: 512 445 | kernel_size: 3 pad: 1 stride: 1 446 | weight_filler { type: "gaussian" std: 0.01 } 447 | bias_filler { type: "constant" value: 0 } 448 | } 449 | } 450 | layer { 451 | name: "rpn_relu/3x3" 452 | type: "ReLU" 453 | bottom: "rpn/output" 454 | top: "rpn/output" 455 | } 456 | 457 | layer { 458 | name: "rpn_cls_score" 459 | type: "Convolution" 460 | bottom: "rpn/output" 461 | top: "rpn_cls_score" 462 | param { lr_mult: 1.0 } 463 | param { lr_mult: 2.0 } 464 | convolution_param { 465 | num_output: 18 # 2(bg/fg) * 9(anchors) 466 | kernel_size: 1 pad: 0 stride: 1 467 | weight_filler { type: "gaussian" std: 0.01 } 468 | bias_filler { type: "constant" value: 0 } 469 | } 470 | } 471 | 472 | layer { 473 | name: "rpn_bbox_pred" 474 | type: "Convolution" 475 | bottom: "rpn/output" 476 | top: "rpn_bbox_pred" 477 | param { lr_mult: 1.0 } 478 | param { lr_mult: 2.0 } 479 | convolution_param { 480 | num_output: 36 # 4 * 9(anchors) 481 | kernel_size: 1 pad: 0 stride: 1 482 | weight_filler { type: "gaussian" std: 0.01 } 483 | bias_filler { type: "constant" value: 0 } 484 | } 485 | } 486 | 487 | layer { 488 | bottom: "rpn_cls_score" 489 | top: "rpn_cls_score_reshape" 490 | name: "rpn_cls_score_reshape" 491 | type: "Reshape" 492 | reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } 493 | } 494 | 495 | layer { 496 | name: 'rpn-data' 497 | type: 'Python' 498 | bottom: 'rpn_cls_score' 499 | bottom: 'gt_boxes' 500 | bottom: 'im_info' 501 | bottom: 'data' 502 | top: 'rpn_labels' 503 | top: 'rpn_bbox_targets' 504 | top: 'rpn_bbox_inside_weights' 505 | top: 'rpn_bbox_outside_weights' 506 | python_param { 507 | module: 'rpn.anchor_target_layer' 508 | layer: 'AnchorTargetLayer' 509 | param_str: "'feat_stride': 16" 510 | } 511 | } 512 | 513 | layer { 514 | name: "rpn_loss_cls" 515 | type: "SoftmaxWithLoss" 516 | bottom: "rpn_cls_score_reshape" 517 | bottom: "rpn_labels" 518 | propagate_down: 1 519 | propagate_down: 0 520 | top: "rpn_cls_loss" 521 | loss_weight: 1 522 | loss_param { 523 | ignore_label: -1 524 | normalize: true 525 | } 526 | } 527 | 528 | layer { 529 | name: "rpn_loss_bbox" 530 | type: "SmoothL1Loss" 531 | bottom: "rpn_bbox_pred" 532 | bottom: "rpn_bbox_targets" 533 | bottom: 'rpn_bbox_inside_weights' 534 | bottom: 'rpn_bbox_outside_weights' 535 | top: "rpn_loss_bbox" 536 | loss_weight: 1 537 | smooth_l1_loss_param { sigma: 3.0 } 538 | } 539 | 540 | #========= RoI Proposal ============ 541 | 542 | layer { 543 | name: "rpn_cls_prob" 544 | type: "Softmax" 545 | bottom: "rpn_cls_score_reshape" 546 | top: "rpn_cls_prob" 547 | } 548 | 549 | layer { 550 | name: 'rpn_cls_prob_reshape' 551 | type: 'Reshape' 552 | bottom: 'rpn_cls_prob' 553 | top: 'rpn_cls_prob_reshape' 554 | reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } 555 | } 556 | 557 | layer { 558 | name: 'proposal' 559 | type: 'Python' 560 | bottom: 'rpn_cls_prob_reshape' 561 | bottom: 'rpn_bbox_pred' 562 | bottom: 'im_info' 563 | top: 'rpn_rois' 564 | # top: 'rpn_scores' 565 | python_param { 566 | module: 'rpn.proposal_layer' 567 | layer: 'ProposalLayer' 568 | param_str: "'feat_stride': 16" 569 | } 570 | } 571 | 572 | #layer { 573 | # name: 'debug-data' 574 | # type: 'Python' 575 | # bottom: 'data' 576 | # bottom: 'rpn_rois' 577 | # bottom: 'rpn_scores' 578 | # python_param { 579 | # module: 'rpn.debug_layer' 580 | # layer: 'RPNDebugLayer' 581 | # } 582 | #} 583 | 584 | layer { 585 | name: 'roi-data' 586 | type: 'Python' 587 | bottom: 'rpn_rois' 588 | bottom: 'gt_boxes' 589 | top: 'rois' 590 | top: 'labels' 591 | top: 'bbox_targets' 592 | top: 'bbox_inside_weights' 593 | top: 'bbox_outside_weights' 594 | python_param { 595 | module: 'rpn.proposal_target_layer' 596 | layer: 'ProposalTargetLayer' 597 | param_str: "'num_classes': 6" 598 | } 599 | } 600 | 601 | #========= RCNN ============ 602 | 603 | layer { 604 | name: "roi_pool5" 605 | type: "ROIPooling" 606 | bottom: "conv5_4" 607 | bottom: "rois" 608 | top: "pool5" 609 | roi_pooling_param { 610 | pooled_w: 7 611 | pooled_h: 7 612 | spatial_scale: 0.0625 # 1/16 613 | } 614 | } 615 | layer { 616 | name: "fc6" 617 | type: "InnerProduct" 618 | bottom: "pool5" 619 | top: "fc6" 620 | param { 621 | lr_mult: 1 622 | } 623 | param { 624 | lr_mult: 2 625 | } 626 | inner_product_param { 627 | num_output: 4096 628 | } 629 | } 630 | layer { 631 | name: "relu6" 632 | type: "ReLU" 633 | bottom: "fc6" 634 | top: "fc6" 635 | } 636 | layer { 637 | name: "drop6" 638 | type: "Dropout" 639 | bottom: "fc6" 640 | top: "fc6" 641 | dropout_param { 642 | dropout_ratio: 0.5 643 | } 644 | } 645 | layer { 646 | name: "fc7" 647 | type: "InnerProduct" 648 | bottom: "fc6" 649 | top: "fc7" 650 | param { 651 | lr_mult: 1 652 | } 653 | param { 654 | lr_mult: 2 655 | } 656 | inner_product_param { 657 | num_output: 4096 658 | } 659 | } 660 | layer { 661 | name: "relu7" 662 | type: "ReLU" 663 | bottom: "fc7" 664 | top: "fc7" 665 | } 666 | layer { 667 | name: "drop7" 668 | type: "Dropout" 669 | bottom: "fc7" 670 | top: "fc7" 671 | dropout_param { 672 | dropout_ratio: 0.5 673 | } 674 | } 675 | layer { 676 | name: "cls_score2" 677 | type: "InnerProduct" 678 | bottom: "fc7" 679 | top: "cls_score2" 680 | param { 681 | lr_mult: 1 682 | } 683 | param { 684 | lr_mult: 2 685 | } 686 | inner_product_param { 687 | num_output: 6 688 | weight_filler { 689 | type: "gaussian" 690 | std: 0.01 691 | } 692 | bias_filler { 693 | type: "constant" 694 | value: 0 695 | } 696 | } 697 | } 698 | layer { 699 | name: "bbox_pred2" 700 | type: "InnerProduct" 701 | bottom: "fc7" 702 | top: "bbox_pred2" 703 | param { 704 | lr_mult: 1 705 | } 706 | param { 707 | lr_mult: 2 708 | } 709 | inner_product_param { 710 | num_output: 24 711 | weight_filler { 712 | type: "gaussian" 713 | std: 0.001 714 | } 715 | bias_filler { 716 | type: "constant" 717 | value: 0 718 | } 719 | } 720 | } 721 | layer { 722 | name: "loss_cls" 723 | type: "SoftmaxWithLoss" 724 | bottom: "cls_score2" 725 | bottom: "labels" 726 | propagate_down: 1 727 | propagate_down: 0 728 | top: "loss_cls" 729 | loss_weight: 1 730 | } 731 | layer { 732 | name: "loss_bbox" 733 | type: "SmoothL1Loss" 734 | bottom: "bbox_pred2" 735 | bottom: "bbox_targets" 736 | bottom: "bbox_inside_weights" 737 | bottom: "bbox_outside_weights" 738 | top: "loss_bbox" 739 | loss_weight: 1 740 | } 741 | -------------------------------------------------------------------------------- /faster_rcnn_end2end_avs/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "../../models/intel_optimized_models/faster-rcnn/pascal_voc/VGG16/faster_rcnn_end2end_avs/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 50000 6 | display: 20 7 | average_loss: 100 8 | # iter_size: 1 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | # We disable standard caffe solver snapshotting and implement our own snapshot 12 | # function 13 | snapshot: 0 14 | # We still use the snapshot prefix, though 15 | snapshot_prefix: "vgg16_faster_rcnn" 16 | iter_size: 2 17 | -------------------------------------------------------------------------------- /faster_rcnn_end2end_avs/test.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | 3 | input: "data" 4 | input_shape { 5 | dim: 1 6 | dim: 3 7 | dim: 224 8 | dim: 224 9 | } 10 | 11 | input: "im_info" 12 | input_shape { 13 | dim: 1 14 | dim: 3 15 | } 16 | 17 | layer { 18 | name: "conv1_1" 19 | type: "Convolution" 20 | bottom: "data" 21 | top: "conv1_1" 22 | param { 23 | lr_mult: 0 24 | decay_mult: 0 25 | } 26 | param { 27 | lr_mult: 0 28 | decay_mult: 0 29 | } 30 | convolution_param { 31 | num_output: 64 32 | pad: 1 33 | kernel_size: 3 34 | } 35 | } 36 | layer { 37 | name: "relu1_1" 38 | type: "ReLU" 39 | bottom: "conv1_1" 40 | top: "conv1_1" 41 | } 42 | layer { 43 | name: "conv1_2" 44 | type: "Convolution" 45 | bottom: "conv1_1" 46 | top: "conv1_2" 47 | param { 48 | lr_mult: 0 49 | decay_mult: 0 50 | } 51 | param { 52 | lr_mult: 0 53 | decay_mult: 0 54 | } 55 | convolution_param { 56 | num_output: 64 57 | pad: 1 58 | kernel_size: 3 59 | } 60 | } 61 | layer { 62 | name: "relu1_2" 63 | type: "ReLU" 64 | bottom: "conv1_2" 65 | top: "conv1_2" 66 | } 67 | layer { 68 | name: "pool1" 69 | type: "Pooling" 70 | bottom: "conv1_2" 71 | top: "pool1" 72 | pooling_param { 73 | pool: MAX 74 | kernel_size: 2 75 | stride: 2 76 | } 77 | } 78 | layer { 79 | name: "conv2_1" 80 | type: "Convolution" 81 | bottom: "pool1" 82 | top: "conv2_1" 83 | param { 84 | lr_mult: 0 85 | decay_mult: 0 86 | } 87 | param { 88 | lr_mult: 0 89 | decay_mult: 0 90 | } 91 | convolution_param { 92 | num_output: 128 93 | pad: 1 94 | kernel_size: 3 95 | } 96 | } 97 | layer { 98 | name: "relu2_1" 99 | type: "ReLU" 100 | bottom: "conv2_1" 101 | top: "conv2_1" 102 | } 103 | layer { 104 | name: "conv2_2" 105 | type: "Convolution" 106 | bottom: "conv2_1" 107 | top: "conv2_2" 108 | param { 109 | lr_mult: 0 110 | decay_mult: 0 111 | } 112 | param { 113 | lr_mult: 0 114 | decay_mult: 0 115 | } 116 | convolution_param { 117 | num_output: 128 118 | pad: 1 119 | kernel_size: 3 120 | } 121 | } 122 | layer { 123 | name: "relu2_2" 124 | type: "ReLU" 125 | bottom: "conv2_2" 126 | top: "conv2_2" 127 | } 128 | layer { 129 | name: "pool2" 130 | type: "Pooling" 131 | bottom: "conv2_2" 132 | top: "pool2" 133 | pooling_param { 134 | pool: MAX 135 | kernel_size: 2 136 | stride: 2 137 | } 138 | } 139 | layer { 140 | name: "conv3_1" 141 | type: "Convolution" 142 | bottom: "pool2" 143 | top: "conv3_1" 144 | param { 145 | lr_mult: 1 146 | decay_mult: 1 147 | } 148 | param { 149 | lr_mult: 2 150 | decay_mult: 0 151 | } 152 | convolution_param { 153 | num_output: 256 154 | pad: 1 155 | kernel_size: 3 156 | } 157 | } 158 | layer { 159 | name: "relu3_1" 160 | type: "ReLU" 161 | bottom: "conv3_1" 162 | top: "conv3_1" 163 | } 164 | layer { 165 | name: "conv3_2" 166 | type: "Convolution" 167 | bottom: "conv3_1" 168 | top: "conv3_2" 169 | param { 170 | lr_mult: 1 171 | decay_mult: 1 172 | } 173 | param { 174 | lr_mult: 2 175 | decay_mult: 0 176 | } 177 | convolution_param { 178 | num_output: 256 179 | pad: 1 180 | kernel_size: 3 181 | } 182 | } 183 | layer { 184 | name: "relu3_2" 185 | type: "ReLU" 186 | bottom: "conv3_2" 187 | top: "conv3_2" 188 | } 189 | layer { 190 | name: "conv3_3" 191 | type: "Convolution" 192 | bottom: "conv3_2" 193 | top: "conv3_3" 194 | param { 195 | lr_mult: 1 196 | decay_mult: 1 197 | } 198 | param { 199 | lr_mult: 2 200 | decay_mult: 0 201 | } 202 | convolution_param { 203 | num_output: 256 204 | pad: 1 205 | kernel_size: 3 206 | } 207 | } 208 | layer { 209 | name: "relu3_3" 210 | type: "ReLU" 211 | bottom: "conv3_3" 212 | top: "conv3_3" 213 | } 214 | layer { 215 | name: "pool3" 216 | type: "Pooling" 217 | bottom: "conv3_3" 218 | top: "pool3" 219 | pooling_param { 220 | pool: MAX 221 | kernel_size: 2 222 | stride: 2 223 | } 224 | } 225 | layer { 226 | name: "conv4_1" 227 | type: "Convolution" 228 | bottom: "pool3" 229 | top: "conv4_1" 230 | param { 231 | lr_mult: 1 232 | decay_mult: 1 233 | } 234 | param { 235 | lr_mult: 2 236 | decay_mult: 0 237 | } 238 | convolution_param { 239 | num_output: 512 240 | pad: 1 241 | kernel_size: 3 242 | } 243 | } 244 | layer { 245 | name: "relu4_1" 246 | type: "ReLU" 247 | bottom: "conv4_1" 248 | top: "conv4_1" 249 | } 250 | layer { 251 | name: "conv4_2" 252 | type: "Convolution" 253 | bottom: "conv4_1" 254 | top: "conv4_2" 255 | param { 256 | lr_mult: 1 257 | decay_mult: 1 258 | } 259 | param { 260 | lr_mult: 2 261 | decay_mult: 0 262 | } 263 | convolution_param { 264 | num_output: 512 265 | pad: 1 266 | kernel_size: 3 267 | } 268 | } 269 | layer { 270 | name: "relu4_2" 271 | type: "ReLU" 272 | bottom: "conv4_2" 273 | top: "conv4_2" 274 | } 275 | layer { 276 | name: "conv4_3" 277 | type: "Convolution" 278 | bottom: "conv4_2" 279 | top: "conv4_3" 280 | param { 281 | lr_mult: 1 282 | decay_mult: 1 283 | } 284 | param { 285 | lr_mult: 2 286 | decay_mult: 0 287 | } 288 | convolution_param { 289 | num_output: 512 290 | pad: 1 291 | kernel_size: 3 292 | } 293 | } 294 | layer { 295 | name: "relu4_3" 296 | type: "ReLU" 297 | bottom: "conv4_3" 298 | top: "conv4_3" 299 | } 300 | layer { 301 | name: "pool4" 302 | type: "Pooling" 303 | bottom: "conv4_3" 304 | top: "pool4" 305 | pooling_param { 306 | pool: MAX 307 | kernel_size: 2 308 | stride: 2 309 | } 310 | } 311 | layer { 312 | name: "conv5_1" 313 | type: "Convolution" 314 | bottom: "pool4" 315 | top: "conv5_1" 316 | param { 317 | lr_mult: 1 318 | decay_mult: 1 319 | } 320 | param { 321 | lr_mult: 2 322 | decay_mult: 0 323 | } 324 | convolution_param { 325 | num_output: 512 326 | pad: 1 327 | kernel_size: 3 328 | } 329 | } 330 | layer { 331 | name: "relu5_1" 332 | type: "ReLU" 333 | bottom: "conv5_1" 334 | top: "conv5_1" 335 | } 336 | layer { 337 | name: "conv5_2" 338 | type: "Convolution" 339 | bottom: "conv5_1" 340 | top: "conv5_2" 341 | param { 342 | lr_mult: 1 343 | decay_mult: 1 344 | } 345 | param { 346 | lr_mult: 2 347 | decay_mult: 0 348 | } 349 | convolution_param { 350 | num_output: 512 351 | pad: 1 352 | kernel_size: 3 353 | } 354 | } 355 | layer { 356 | name: "relu5_2" 357 | type: "ReLU" 358 | bottom: "conv5_2" 359 | top: "conv5_2" 360 | } 361 | layer { 362 | name: "conv5_3" 363 | type: "Convolution" 364 | bottom: "conv5_2" 365 | top: "conv5_3" 366 | param { 367 | lr_mult: 1 368 | decay_mult: 1 369 | } 370 | param { 371 | lr_mult: 2 372 | decay_mult: 0 373 | } 374 | convolution_param { 375 | num_output: 512 376 | pad: 1 377 | kernel_size: 3 378 | } 379 | } 380 | layer { 381 | name: "relu5_3" 382 | type: "ReLU" 383 | bottom: "conv5_3" 384 | top: "conv5_3" 385 | } 386 | 387 | #========= RPN ============ 388 | 389 | layer { 390 | name: "rpn_conv/3x3" 391 | type: "Convolution" 392 | bottom: "conv5_3" 393 | top: "rpn/output" 394 | param { lr_mult: 1.0 decay_mult: 1.0 } 395 | param { lr_mult: 2.0 decay_mult: 0 } 396 | convolution_param { 397 | num_output: 512 398 | kernel_size: 3 pad: 1 stride: 1 399 | weight_filler { type: "gaussian" std: 0.01 } 400 | bias_filler { type: "constant" value: 0 } 401 | } 402 | } 403 | layer { 404 | name: "rpn_relu/3x3" 405 | type: "ReLU" 406 | bottom: "rpn/output" 407 | top: "rpn/output" 408 | } 409 | 410 | layer { 411 | name: "rpn_cls_score" 412 | type: "Convolution" 413 | bottom: "rpn/output" 414 | top: "rpn_cls_score" 415 | param { lr_mult: 1.0 decay_mult: 1.0 } 416 | param { lr_mult: 2.0 decay_mult: 0 } 417 | convolution_param { 418 | num_output: 18 # 2(bg/fg) * 9(anchors) 419 | kernel_size: 1 pad: 0 stride: 1 420 | weight_filler { type: "gaussian" std: 0.01 } 421 | bias_filler { type: "constant" value: 0 } 422 | } 423 | } 424 | layer { 425 | name: "rpn_bbox_pred" 426 | type: "Convolution" 427 | bottom: "rpn/output" 428 | top: "rpn_bbox_pred" 429 | param { lr_mult: 1.0 decay_mult: 1.0 } 430 | param { lr_mult: 2.0 decay_mult: 0 } 431 | convolution_param { 432 | num_output: 36 # 4 * 9(anchors) 433 | kernel_size: 1 pad: 0 stride: 1 434 | weight_filler { type: "gaussian" std: 0.01 } 435 | bias_filler { type: "constant" value: 0 } 436 | } 437 | } 438 | layer { 439 | bottom: "rpn_cls_score" 440 | top: "rpn_cls_score_reshape" 441 | name: "rpn_cls_score_reshape" 442 | type: "Reshape" 443 | reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } 444 | } 445 | 446 | #========= RoI Proposal ============ 447 | 448 | layer { 449 | name: "rpn_cls_prob" 450 | type: "Softmax" 451 | bottom: "rpn_cls_score_reshape" 452 | top: "rpn_cls_prob" 453 | } 454 | layer { 455 | name: 'rpn_cls_prob_reshape' 456 | type: 'Reshape' 457 | bottom: 'rpn_cls_prob' 458 | top: 'rpn_cls_prob_reshape' 459 | reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } 460 | } 461 | layer { 462 | name: 'proposal' 463 | type: 'Python' 464 | bottom: 'rpn_cls_prob_reshape' 465 | bottom: 'rpn_bbox_pred' 466 | bottom: 'im_info' 467 | top: 'rois' 468 | python_param { 469 | module: 'rpn.proposal_layer' 470 | layer: 'ProposalLayer' 471 | param_str: "'feat_stride': 16" 472 | } 473 | } 474 | 475 | #========= RCNN ============ 476 | 477 | layer { 478 | name: "roi_pool5" 479 | type: "ROIPooling" 480 | bottom: "conv5_3" 481 | bottom: "rois" 482 | top: "pool5" 483 | roi_pooling_param { 484 | pooled_w: 7 485 | pooled_h: 7 486 | spatial_scale: 0.0625 # 1/16 487 | } 488 | } 489 | layer { 490 | name: "fc6" 491 | type: "InnerProduct" 492 | bottom: "pool5" 493 | top: "fc6" 494 | param { 495 | lr_mult: 1 496 | decay_mult: 1 497 | } 498 | param { 499 | lr_mult: 2 500 | decay_mult: 0 501 | } 502 | inner_product_param { 503 | num_output: 4096 504 | } 505 | } 506 | layer { 507 | name: "relu6" 508 | type: "ReLU" 509 | bottom: "fc6" 510 | top: "fc6" 511 | } 512 | layer { 513 | name: "drop6" 514 | type: "Dropout" 515 | bottom: "fc6" 516 | top: "fc6" 517 | dropout_param { 518 | dropout_ratio: 0.5 519 | } 520 | } 521 | layer { 522 | name: "fc7" 523 | type: "InnerProduct" 524 | bottom: "fc6" 525 | top: "fc7" 526 | param { 527 | lr_mult: 1 528 | decay_mult: 1 529 | } 530 | param { 531 | lr_mult: 2 532 | decay_mult: 0 533 | } 534 | inner_product_param { 535 | num_output: 4096 536 | } 537 | } 538 | layer { 539 | name: "relu7" 540 | type: "ReLU" 541 | bottom: "fc7" 542 | top: "fc7" 543 | } 544 | layer { 545 | name: "drop7" 546 | type: "Dropout" 547 | bottom: "fc7" 548 | top: "fc7" 549 | dropout_param { 550 | dropout_ratio: 0.5 551 | } 552 | } 553 | layer { 554 | name: "cls_score" 555 | type: "InnerProduct" 556 | bottom: "fc7" 557 | top: "cls_score" 558 | param { 559 | lr_mult: 1 560 | decay_mult: 1 561 | } 562 | param { 563 | lr_mult: 2 564 | decay_mult: 0 565 | } 566 | inner_product_param { 567 | num_output: 21 568 | weight_filler { 569 | type: "gaussian" 570 | std: 0.01 571 | } 572 | bias_filler { 573 | type: "constant" 574 | value: 0 575 | } 576 | } 577 | } 578 | layer { 579 | name: "bbox_pred" 580 | type: "InnerProduct" 581 | bottom: "fc7" 582 | top: "bbox_pred" 583 | param { 584 | lr_mult: 1 585 | decay_mult: 1 586 | } 587 | param { 588 | lr_mult: 2 589 | decay_mult: 0 590 | } 591 | inner_product_param { 592 | num_output: 84 593 | weight_filler { 594 | type: "gaussian" 595 | std: 0.001 596 | } 597 | bias_filler { 598 | type: "constant" 599 | value: 0 600 | } 601 | } 602 | } 603 | layer { 604 | name: "cls_prob" 605 | type: "Softmax" 606 | bottom: "cls_score" 607 | top: "cls_prob" 608 | } 609 | -------------------------------------------------------------------------------- /faster_rcnn_end2end_avs/train.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | layer { 3 | name: 'input-data' 4 | type: 'Python' 5 | top: 'data' 6 | top: 'im_info' 7 | top: 'gt_boxes' 8 | python_param { 9 | module: 'roi_data_layer.layer' 10 | layer: 'RoIDataLayer' 11 | param_str: "'num_classes': 6" 12 | } 13 | } 14 | 15 | layer { 16 | name: "conv1_1" 17 | type: "Convolution" 18 | bottom: "data" 19 | top: "conv1_1" 20 | param { 21 | lr_mult: 0 22 | decay_mult: 0 23 | } 24 | param { 25 | lr_mult: 0 26 | decay_mult: 0 27 | } 28 | convolution_param { 29 | num_output: 64 30 | pad: 1 31 | kernel_size: 3 32 | } 33 | } 34 | layer { 35 | name: "relu1_1" 36 | type: "ReLU" 37 | bottom: "conv1_1" 38 | top: "conv1_1" 39 | } 40 | layer { 41 | name: "conv1_2" 42 | type: "Convolution" 43 | bottom: "conv1_1" 44 | top: "conv1_2" 45 | param { 46 | lr_mult: 0 47 | decay_mult: 0 48 | } 49 | param { 50 | lr_mult: 0 51 | decay_mult: 0 52 | } 53 | convolution_param { 54 | num_output: 64 55 | pad: 1 56 | kernel_size: 3 57 | } 58 | } 59 | layer { 60 | name: "relu1_2" 61 | type: "ReLU" 62 | bottom: "conv1_2" 63 | top: "conv1_2" 64 | } 65 | layer { 66 | name: "pool1" 67 | type: "Pooling" 68 | bottom: "conv1_2" 69 | top: "pool1" 70 | pooling_param { 71 | pool: MAX 72 | kernel_size: 2 73 | stride: 2 74 | } 75 | } 76 | layer { 77 | name: "conv2_1" 78 | type: "Convolution" 79 | bottom: "pool1" 80 | top: "conv2_1" 81 | param { 82 | lr_mult: 0 83 | decay_mult: 0 84 | } 85 | param { 86 | lr_mult: 0 87 | decay_mult: 0 88 | } 89 | convolution_param { 90 | num_output: 128 91 | pad: 1 92 | kernel_size: 3 93 | } 94 | } 95 | layer { 96 | name: "relu2_1" 97 | type: "ReLU" 98 | bottom: "conv2_1" 99 | top: "conv2_1" 100 | } 101 | layer { 102 | name: "conv2_2" 103 | type: "Convolution" 104 | bottom: "conv2_1" 105 | top: "conv2_2" 106 | param { 107 | lr_mult: 0 108 | decay_mult: 0 109 | } 110 | param { 111 | lr_mult: 0 112 | decay_mult: 0 113 | } 114 | convolution_param { 115 | num_output: 128 116 | pad: 1 117 | kernel_size: 3 118 | } 119 | } 120 | layer { 121 | name: "relu2_2" 122 | type: "ReLU" 123 | bottom: "conv2_2" 124 | top: "conv2_2" 125 | } 126 | layer { 127 | name: "pool2" 128 | type: "Pooling" 129 | bottom: "conv2_2" 130 | top: "pool2" 131 | pooling_param { 132 | pool: MAX 133 | kernel_size: 2 134 | stride: 2 135 | } 136 | } 137 | layer { 138 | name: "conv3_1" 139 | type: "Convolution" 140 | bottom: "pool2" 141 | top: "conv3_1" 142 | param { 143 | lr_mult: 1 144 | } 145 | param { 146 | lr_mult: 2 147 | } 148 | convolution_param { 149 | num_output: 256 150 | pad: 1 151 | kernel_size: 3 152 | } 153 | } 154 | layer { 155 | name: "relu3_1" 156 | type: "ReLU" 157 | bottom: "conv3_1" 158 | top: "conv3_1" 159 | } 160 | layer { 161 | name: "conv3_2" 162 | type: "Convolution" 163 | bottom: "conv3_1" 164 | top: "conv3_2" 165 | param { 166 | lr_mult: 1 167 | } 168 | param { 169 | lr_mult: 2 170 | } 171 | convolution_param { 172 | num_output: 256 173 | pad: 1 174 | kernel_size: 3 175 | } 176 | } 177 | layer { 178 | name: "relu3_2" 179 | type: "ReLU" 180 | bottom: "conv3_2" 181 | top: "conv3_2" 182 | } 183 | layer { 184 | name: "conv3_3" 185 | type: "Convolution" 186 | bottom: "conv3_2" 187 | top: "conv3_3" 188 | param { 189 | lr_mult: 1 190 | } 191 | param { 192 | lr_mult: 2 193 | } 194 | convolution_param { 195 | num_output: 256 196 | pad: 1 197 | kernel_size: 3 198 | } 199 | } 200 | layer { 201 | name: "relu3_3" 202 | type: "ReLU" 203 | bottom: "conv3_3" 204 | top: "conv3_3" 205 | } 206 | layer { 207 | name: "pool3" 208 | type: "Pooling" 209 | bottom: "conv3_3" 210 | top: "pool3" 211 | pooling_param { 212 | pool: MAX 213 | kernel_size: 2 214 | stride: 2 215 | } 216 | } 217 | layer { 218 | name: "conv4_1" 219 | type: "Convolution" 220 | bottom: "pool3" 221 | top: "conv4_1" 222 | param { 223 | lr_mult: 1 224 | } 225 | param { 226 | lr_mult: 2 227 | } 228 | convolution_param { 229 | num_output: 512 230 | pad: 1 231 | kernel_size: 3 232 | } 233 | } 234 | layer { 235 | name: "relu4_1" 236 | type: "ReLU" 237 | bottom: "conv4_1" 238 | top: "conv4_1" 239 | } 240 | layer { 241 | name: "conv4_2" 242 | type: "Convolution" 243 | bottom: "conv4_1" 244 | top: "conv4_2" 245 | param { 246 | lr_mult: 1 247 | } 248 | param { 249 | lr_mult: 2 250 | } 251 | convolution_param { 252 | num_output: 512 253 | pad: 1 254 | kernel_size: 3 255 | } 256 | } 257 | layer { 258 | name: "relu4_2" 259 | type: "ReLU" 260 | bottom: "conv4_2" 261 | top: "conv4_2" 262 | } 263 | layer { 264 | name: "conv4_3" 265 | type: "Convolution" 266 | bottom: "conv4_2" 267 | top: "conv4_3" 268 | param { 269 | lr_mult: 1 270 | } 271 | param { 272 | lr_mult: 2 273 | } 274 | convolution_param { 275 | num_output: 512 276 | pad: 1 277 | kernel_size: 3 278 | } 279 | } 280 | layer { 281 | name: "relu4_3" 282 | type: "ReLU" 283 | bottom: "conv4_3" 284 | top: "conv4_3" 285 | } 286 | layer { 287 | name: "pool4" 288 | type: "Pooling" 289 | bottom: "conv4_3" 290 | top: "pool4" 291 | pooling_param { 292 | pool: MAX 293 | kernel_size: 2 294 | stride: 2 295 | } 296 | } 297 | layer { 298 | name: "conv5_1" 299 | type: "Convolution" 300 | bottom: "pool4" 301 | top: "conv5_1" 302 | param { 303 | lr_mult: 1 304 | } 305 | param { 306 | lr_mult: 2 307 | } 308 | convolution_param { 309 | num_output: 512 310 | pad: 1 311 | kernel_size: 3 312 | } 313 | } 314 | layer { 315 | name: "relu5_1" 316 | type: "ReLU" 317 | bottom: "conv5_1" 318 | top: "conv5_1" 319 | } 320 | layer { 321 | name: "conv5_2" 322 | type: "Convolution" 323 | bottom: "conv5_1" 324 | top: "conv5_2" 325 | param { 326 | lr_mult: 1 327 | } 328 | param { 329 | lr_mult: 2 330 | } 331 | convolution_param { 332 | num_output: 512 333 | pad: 1 334 | kernel_size: 3 335 | } 336 | } 337 | layer { 338 | name: "relu5_2" 339 | type: "ReLU" 340 | bottom: "conv5_2" 341 | top: "conv5_2" 342 | } 343 | layer { 344 | name: "conv5_3" 345 | type: "Convolution" 346 | bottom: "conv5_2" 347 | top: "conv5_3" 348 | param { 349 | lr_mult: 1 350 | } 351 | param { 352 | lr_mult: 2 353 | } 354 | convolution_param { 355 | num_output: 512 356 | pad: 1 357 | kernel_size: 3 358 | } 359 | } 360 | layer { 361 | name: "relu5_3" 362 | type: "ReLU" 363 | bottom: "conv5_3" 364 | top: "conv5_3" 365 | } 366 | 367 | #========= RPN ============ 368 | 369 | layer { 370 | name: "rpn_conv/3x3" 371 | type: "Convolution" 372 | bottom: "conv5_3" 373 | top: "rpn/output" 374 | param { lr_mult: 1.0 } 375 | param { lr_mult: 2.0 } 376 | convolution_param { 377 | num_output: 512 378 | kernel_size: 3 pad: 1 stride: 1 379 | weight_filler { type: "gaussian" std: 0.01 } 380 | bias_filler { type: "constant" value: 0 } 381 | } 382 | } 383 | layer { 384 | name: "rpn_relu/3x3" 385 | type: "ReLU" 386 | bottom: "rpn/output" 387 | top: "rpn/output" 388 | } 389 | 390 | layer { 391 | name: "rpn_cls_score" 392 | type: "Convolution" 393 | bottom: "rpn/output" 394 | top: "rpn_cls_score" 395 | param { lr_mult: 1.0 } 396 | param { lr_mult: 2.0 } 397 | convolution_param { 398 | num_output: 18 # 2(bg/fg) * 9(anchors) 399 | kernel_size: 1 pad: 0 stride: 1 400 | weight_filler { type: "gaussian" std: 0.01 } 401 | bias_filler { type: "constant" value: 0 } 402 | } 403 | } 404 | 405 | layer { 406 | name: "rpn_bbox_pred" 407 | type: "Convolution" 408 | bottom: "rpn/output" 409 | top: "rpn_bbox_pred" 410 | param { lr_mult: 1.0 } 411 | param { lr_mult: 2.0 } 412 | convolution_param { 413 | num_output: 36 # 4 * 9(anchors) 414 | kernel_size: 1 pad: 0 stride: 1 415 | weight_filler { type: "gaussian" std: 0.01 } 416 | bias_filler { type: "constant" value: 0 } 417 | } 418 | } 419 | 420 | layer { 421 | bottom: "rpn_cls_score" 422 | top: "rpn_cls_score_reshape" 423 | name: "rpn_cls_score_reshape" 424 | type: "Reshape" 425 | reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } 426 | } 427 | 428 | layer { 429 | name: 'rpn-data' 430 | type: 'Python' 431 | bottom: 'rpn_cls_score' 432 | bottom: 'gt_boxes' 433 | bottom: 'im_info' 434 | bottom: 'data' 435 | top: 'rpn_labels' 436 | top: 'rpn_bbox_targets' 437 | top: 'rpn_bbox_inside_weights' 438 | top: 'rpn_bbox_outside_weights' 439 | python_param { 440 | module: 'rpn.anchor_target_layer' 441 | layer: 'AnchorTargetLayer' 442 | param_str: "'feat_stride': 16" 443 | } 444 | } 445 | 446 | layer { 447 | name: "rpn_loss_cls" 448 | type: "SoftmaxWithLoss" 449 | bottom: "rpn_cls_score_reshape" 450 | bottom: "rpn_labels" 451 | propagate_down: 1 452 | propagate_down: 0 453 | top: "rpn_cls_loss" 454 | loss_weight: 1 455 | loss_param { 456 | ignore_label: -1 457 | normalize: true 458 | } 459 | } 460 | 461 | layer { 462 | name: "rpn_loss_bbox" 463 | type: "SmoothL1Loss" 464 | bottom: "rpn_bbox_pred" 465 | bottom: "rpn_bbox_targets" 466 | bottom: 'rpn_bbox_inside_weights' 467 | bottom: 'rpn_bbox_outside_weights' 468 | top: "rpn_loss_bbox" 469 | loss_weight: 1 470 | smooth_l1_loss_param { sigma: 3.0 } 471 | } 472 | 473 | #========= RoI Proposal ============ 474 | 475 | layer { 476 | name: "rpn_cls_prob" 477 | type: "Softmax" 478 | bottom: "rpn_cls_score_reshape" 479 | top: "rpn_cls_prob" 480 | } 481 | 482 | layer { 483 | name: 'rpn_cls_prob_reshape' 484 | type: 'Reshape' 485 | bottom: 'rpn_cls_prob' 486 | top: 'rpn_cls_prob_reshape' 487 | reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } 488 | } 489 | 490 | layer { 491 | name: 'proposal' 492 | type: 'Python' 493 | bottom: 'rpn_cls_prob_reshape' 494 | bottom: 'rpn_bbox_pred' 495 | bottom: 'im_info' 496 | top: 'rpn_rois' 497 | # top: 'rpn_scores' 498 | python_param { 499 | module: 'rpn.proposal_layer' 500 | layer: 'ProposalLayer' 501 | param_str: "'feat_stride': 16" 502 | } 503 | } 504 | 505 | #layer { 506 | # name: 'debug-data' 507 | # type: 'Python' 508 | # bottom: 'data' 509 | # bottom: 'rpn_rois' 510 | # bottom: 'rpn_scores' 511 | # python_param { 512 | # module: 'rpn.debug_layer' 513 | # layer: 'RPNDebugLayer' 514 | # } 515 | #} 516 | 517 | layer { 518 | name: 'roi-data' 519 | type: 'Python' 520 | bottom: 'rpn_rois' 521 | bottom: 'gt_boxes' 522 | top: 'rois' 523 | top: 'labels' 524 | top: 'bbox_targets' 525 | top: 'bbox_inside_weights' 526 | top: 'bbox_outside_weights' 527 | python_param { 528 | module: 'rpn.proposal_target_layer' 529 | layer: 'ProposalTargetLayer' 530 | param_str: "'num_classes': 21" 531 | } 532 | } 533 | 534 | #========= RCNN ============ 535 | 536 | layer { 537 | name: "roi_pool5" 538 | type: "ROIPooling" 539 | bottom: "conv5_3" 540 | bottom: "rois" 541 | top: "pool5" 542 | roi_pooling_param { 543 | pooled_w: 7 544 | pooled_h: 7 545 | spatial_scale: 0.0625 # 1/16 546 | } 547 | } 548 | layer { 549 | name: "fc6" 550 | type: "InnerProduct" 551 | bottom: "pool5" 552 | top: "fc6" 553 | param { 554 | lr_mult: 1 555 | } 556 | param { 557 | lr_mult: 2 558 | } 559 | inner_product_param { 560 | num_output: 4096 561 | } 562 | } 563 | layer { 564 | name: "relu6" 565 | type: "ReLU" 566 | bottom: "fc6" 567 | top: "fc6" 568 | } 569 | layer { 570 | name: "drop6" 571 | type: "Dropout" 572 | bottom: "fc6" 573 | top: "fc6" 574 | dropout_param { 575 | dropout_ratio: 0.5 576 | } 577 | } 578 | layer { 579 | name: "fc7" 580 | type: "InnerProduct" 581 | bottom: "fc6" 582 | top: "fc7" 583 | param { 584 | lr_mult: 1 585 | } 586 | param { 587 | lr_mult: 2 588 | } 589 | inner_product_param { 590 | num_output: 4096 591 | } 592 | } 593 | layer { 594 | name: "relu7" 595 | type: "ReLU" 596 | bottom: "fc7" 597 | top: "fc7" 598 | } 599 | layer { 600 | name: "drop7" 601 | type: "Dropout" 602 | bottom: "fc7" 603 | top: "fc7" 604 | dropout_param { 605 | dropout_ratio: 0.5 606 | } 607 | } 608 | layer { 609 | name: "cls_score" 610 | type: "InnerProduct" 611 | bottom: "fc7" 612 | top: "cls_score" 613 | param { 614 | lr_mult: 1 615 | } 616 | param { 617 | lr_mult: 2 618 | } 619 | inner_product_param { 620 | num_output: 6 621 | weight_filler { 622 | type: "gaussian" 623 | std: 0.01 624 | } 625 | bias_filler { 626 | type: "constant" 627 | value: 0 628 | } 629 | } 630 | } 631 | layer { 632 | name: "bbox_pred" 633 | type: "InnerProduct" 634 | bottom: "fc7" 635 | top: "bbox_pred" 636 | param { 637 | lr_mult: 1 638 | } 639 | param { 640 | lr_mult: 2 641 | } 642 | inner_product_param { 643 | num_output: 84 644 | weight_filler { 645 | type: "gaussian" 646 | std: 0.001 647 | } 648 | bias_filler { 649 | type: "constant" 650 | value: 0 651 | } 652 | } 653 | } 654 | layer { 655 | name: "loss_cls" 656 | type: "SoftmaxWithLoss" 657 | bottom: "cls_score" 658 | bottom: "labels" 659 | propagate_down: 1 660 | propagate_down: 0 661 | top: "loss_cls" 662 | loss_weight: 1 663 | } 664 | layer { 665 | name: "loss_bbox" 666 | type: "SmoothL1Loss" 667 | bottom: "bbox_pred" 668 | bottom: "bbox_targets" 669 | bottom: "bbox_inside_weights" 670 | bottom: "bbox_outside_weights" 671 | top: "loss_bbox" 672 | loss_weight: 1 673 | } 674 | --------------------------------------------------------------------------------