├── CMakeLists.txt ├── LICENSE ├── README.md ├── include └── centroidtracker.h ├── model ├── deploy.prototxt └── res10_300x300_ssd_iter_140000.caffemodel └── src ├── centroidtracker.cpp └── main.cpp /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(TRACKER) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | #ASAN flags 7 | #set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address -fsanitize=leak -fno-omit-frame-pointer -g") 8 | #set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fsanitize=address -fsanitize=leak -fno-omit-frame-pointer -g") 9 | #set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address -fsanitize=leak -fno-omit-frame-pointer") 10 | #set(CMAKE_MODULE_LINKER_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address -fsanitize=leak -fno-omit-frame-pointer") 11 | 12 | find_package(OpenCV REQUIRED) 13 | 14 | include_directories(include) 15 | 16 | add_executable(TRACKER 17 | include/centroidtracker.h 18 | src/centroidtracker.cpp 19 | src/main.cpp) 20 | 21 | target_link_libraries(TRACKER PUBLIC ${OpenCV_LIBS}) 22 | 23 | 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Pratheek Balakrishna 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Centroid-Object-Tracking 2 | 3 | C++ implementation of the simple MOT from https://www.pyimagesearch.com/2018/07/23/simple-object-tracking-with-opencv/ 4 | 5 | #### Setup: 6 | - mkdir build 7 | - cd build 8 | - cmake .. 9 | - make 10 | - ./TRACKER 11 | -------------------------------------------------------------------------------- /include/centroidtracker.h: -------------------------------------------------------------------------------- 1 | /* 2 | Created by pratheek on 2019-11-27. 3 | */ 4 | 5 | #ifndef C___CENTROIDTRACKER_H 6 | #define C___CENTROIDTRACKER_H 7 | 8 | #endif //C___CENTROIDTRACKER_H 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | class CentroidTracker { 18 | public: 19 | explicit CentroidTracker(int maxDisappeared); 20 | 21 | void register_Object(int cX, int cY); 22 | 23 | std::vector>> update(std::vector> boxes); 24 | 25 | // 26 | std::vector>> objects; 27 | 28 | //make buffer for path tracking 29 | std::map>> path_keeper; 30 | private: 31 | int maxDisappeared; 32 | 33 | int nextObjectID; 34 | 35 | static double calcDistance(double x1, double y1, double x2, double y2); 36 | 37 | // 38 | std::map disappeared; 39 | }; 40 | -------------------------------------------------------------------------------- /model/deploy.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 300 6 | dim: 300 7 | } 8 | 9 | layer { 10 | name: "data_bn" 11 | type: "BatchNorm" 12 | bottom: "data" 13 | top: "data_bn" 14 | param { 15 | lr_mult: 0.0 16 | } 17 | param { 18 | lr_mult: 0.0 19 | } 20 | param { 21 | lr_mult: 0.0 22 | } 23 | } 24 | layer { 25 | name: "data_scale" 26 | type: "Scale" 27 | bottom: "data_bn" 28 | top: "data_bn" 29 | param { 30 | lr_mult: 1.0 31 | decay_mult: 1.0 32 | } 33 | param { 34 | lr_mult: 2.0 35 | decay_mult: 1.0 36 | } 37 | scale_param { 38 | bias_term: true 39 | } 40 | } 41 | layer { 42 | name: "conv1_h" 43 | type: "Convolution" 44 | bottom: "data_bn" 45 | top: "conv1_h" 46 | param { 47 | lr_mult: 1.0 48 | decay_mult: 1.0 49 | } 50 | param { 51 | lr_mult: 2.0 52 | decay_mult: 1.0 53 | } 54 | convolution_param { 55 | num_output: 32 56 | pad: 3 57 | kernel_size: 7 58 | stride: 2 59 | weight_filler { 60 | type: "msra" 61 | variance_norm: FAN_OUT 62 | } 63 | bias_filler { 64 | type: "constant" 65 | value: 0.0 66 | } 67 | } 68 | } 69 | layer { 70 | name: "conv1_bn_h" 71 | type: "BatchNorm" 72 | bottom: "conv1_h" 73 | top: "conv1_h" 74 | param { 75 | lr_mult: 0.0 76 | } 77 | param { 78 | lr_mult: 0.0 79 | } 80 | param { 81 | lr_mult: 0.0 82 | } 83 | } 84 | layer { 85 | name: "conv1_scale_h" 86 | type: "Scale" 87 | bottom: "conv1_h" 88 | top: "conv1_h" 89 | param { 90 | lr_mult: 1.0 91 | decay_mult: 1.0 92 | } 93 | param { 94 | lr_mult: 2.0 95 | decay_mult: 1.0 96 | } 97 | scale_param { 98 | bias_term: true 99 | } 100 | } 101 | layer { 102 | name: "conv1_relu" 103 | type: "ReLU" 104 | bottom: "conv1_h" 105 | top: "conv1_h" 106 | } 107 | layer { 108 | name: "conv1_pool" 109 | type: "Pooling" 110 | bottom: "conv1_h" 111 | top: "conv1_pool" 112 | pooling_param { 113 | kernel_size: 3 114 | stride: 2 115 | } 116 | } 117 | layer { 118 | name: "layer_64_1_conv1_h" 119 | type: "Convolution" 120 | bottom: "conv1_pool" 121 | top: "layer_64_1_conv1_h" 122 | param { 123 | lr_mult: 1.0 124 | decay_mult: 1.0 125 | } 126 | convolution_param { 127 | num_output: 32 128 | bias_term: false 129 | pad: 1 130 | kernel_size: 3 131 | stride: 1 132 | weight_filler { 133 | type: "msra" 134 | } 135 | bias_filler { 136 | type: "constant" 137 | value: 0.0 138 | } 139 | } 140 | } 141 | layer { 142 | name: "layer_64_1_bn2_h" 143 | type: "BatchNorm" 144 | bottom: "layer_64_1_conv1_h" 145 | top: "layer_64_1_conv1_h" 146 | param { 147 | lr_mult: 0.0 148 | } 149 | param { 150 | lr_mult: 0.0 151 | } 152 | param { 153 | lr_mult: 0.0 154 | } 155 | } 156 | layer { 157 | name: "layer_64_1_scale2_h" 158 | type: "Scale" 159 | bottom: "layer_64_1_conv1_h" 160 | top: "layer_64_1_conv1_h" 161 | param { 162 | lr_mult: 1.0 163 | decay_mult: 1.0 164 | } 165 | param { 166 | lr_mult: 2.0 167 | decay_mult: 1.0 168 | } 169 | scale_param { 170 | bias_term: true 171 | } 172 | } 173 | layer { 174 | name: "layer_64_1_relu2" 175 | type: "ReLU" 176 | bottom: "layer_64_1_conv1_h" 177 | top: "layer_64_1_conv1_h" 178 | } 179 | layer { 180 | name: "layer_64_1_conv2_h" 181 | type: "Convolution" 182 | bottom: "layer_64_1_conv1_h" 183 | top: "layer_64_1_conv2_h" 184 | param { 185 | lr_mult: 1.0 186 | decay_mult: 1.0 187 | } 188 | convolution_param { 189 | num_output: 32 190 | bias_term: false 191 | pad: 1 192 | kernel_size: 3 193 | stride: 1 194 | weight_filler { 195 | type: "msra" 196 | } 197 | bias_filler { 198 | type: "constant" 199 | value: 0.0 200 | } 201 | } 202 | } 203 | layer { 204 | name: "layer_64_1_sum" 205 | type: "Eltwise" 206 | bottom: "layer_64_1_conv2_h" 207 | bottom: "conv1_pool" 208 | top: "layer_64_1_sum" 209 | } 210 | layer { 211 | name: "layer_128_1_bn1_h" 212 | type: "BatchNorm" 213 | bottom: "layer_64_1_sum" 214 | top: "layer_128_1_bn1_h" 215 | param { 216 | lr_mult: 0.0 217 | } 218 | param { 219 | lr_mult: 0.0 220 | } 221 | param { 222 | lr_mult: 0.0 223 | } 224 | } 225 | layer { 226 | name: "layer_128_1_scale1_h" 227 | type: "Scale" 228 | bottom: "layer_128_1_bn1_h" 229 | top: "layer_128_1_bn1_h" 230 | param { 231 | lr_mult: 1.0 232 | decay_mult: 1.0 233 | } 234 | param { 235 | lr_mult: 2.0 236 | decay_mult: 1.0 237 | } 238 | scale_param { 239 | bias_term: true 240 | } 241 | } 242 | layer { 243 | name: "layer_128_1_relu1" 244 | type: "ReLU" 245 | bottom: "layer_128_1_bn1_h" 246 | top: "layer_128_1_bn1_h" 247 | } 248 | layer { 249 | name: "layer_128_1_conv1_h" 250 | type: "Convolution" 251 | bottom: "layer_128_1_bn1_h" 252 | top: "layer_128_1_conv1_h" 253 | param { 254 | lr_mult: 1.0 255 | decay_mult: 1.0 256 | } 257 | convolution_param { 258 | num_output: 128 259 | bias_term: false 260 | pad: 1 261 | kernel_size: 3 262 | stride: 2 263 | weight_filler { 264 | type: "msra" 265 | } 266 | bias_filler { 267 | type: "constant" 268 | value: 0.0 269 | } 270 | } 271 | } 272 | layer { 273 | name: "layer_128_1_bn2" 274 | type: "BatchNorm" 275 | bottom: "layer_128_1_conv1_h" 276 | top: "layer_128_1_conv1_h" 277 | param { 278 | lr_mult: 0.0 279 | } 280 | param { 281 | lr_mult: 0.0 282 | } 283 | param { 284 | lr_mult: 0.0 285 | } 286 | } 287 | layer { 288 | name: "layer_128_1_scale2" 289 | type: "Scale" 290 | bottom: "layer_128_1_conv1_h" 291 | top: "layer_128_1_conv1_h" 292 | param { 293 | lr_mult: 1.0 294 | decay_mult: 1.0 295 | } 296 | param { 297 | lr_mult: 2.0 298 | decay_mult: 1.0 299 | } 300 | scale_param { 301 | bias_term: true 302 | } 303 | } 304 | layer { 305 | name: "layer_128_1_relu2" 306 | type: "ReLU" 307 | bottom: "layer_128_1_conv1_h" 308 | top: "layer_128_1_conv1_h" 309 | } 310 | layer { 311 | name: "layer_128_1_conv2" 312 | type: "Convolution" 313 | bottom: "layer_128_1_conv1_h" 314 | top: "layer_128_1_conv2" 315 | param { 316 | lr_mult: 1.0 317 | decay_mult: 1.0 318 | } 319 | convolution_param { 320 | num_output: 128 321 | bias_term: false 322 | pad: 1 323 | kernel_size: 3 324 | stride: 1 325 | weight_filler { 326 | type: "msra" 327 | } 328 | bias_filler { 329 | type: "constant" 330 | value: 0.0 331 | } 332 | } 333 | } 334 | layer { 335 | name: "layer_128_1_conv_expand_h" 336 | type: "Convolution" 337 | bottom: "layer_128_1_bn1_h" 338 | top: "layer_128_1_conv_expand_h" 339 | param { 340 | lr_mult: 1.0 341 | decay_mult: 1.0 342 | } 343 | convolution_param { 344 | num_output: 128 345 | bias_term: false 346 | pad: 0 347 | kernel_size: 1 348 | stride: 2 349 | weight_filler { 350 | type: "msra" 351 | } 352 | bias_filler { 353 | type: "constant" 354 | value: 0.0 355 | } 356 | } 357 | } 358 | layer { 359 | name: "layer_128_1_sum" 360 | type: "Eltwise" 361 | bottom: "layer_128_1_conv2" 362 | bottom: "layer_128_1_conv_expand_h" 363 | top: "layer_128_1_sum" 364 | } 365 | layer { 366 | name: "layer_256_1_bn1" 367 | type: "BatchNorm" 368 | bottom: "layer_128_1_sum" 369 | top: "layer_256_1_bn1" 370 | param { 371 | lr_mult: 0.0 372 | } 373 | param { 374 | lr_mult: 0.0 375 | } 376 | param { 377 | lr_mult: 0.0 378 | } 379 | } 380 | layer { 381 | name: "layer_256_1_scale1" 382 | type: "Scale" 383 | bottom: "layer_256_1_bn1" 384 | top: "layer_256_1_bn1" 385 | param { 386 | lr_mult: 1.0 387 | decay_mult: 1.0 388 | } 389 | param { 390 | lr_mult: 2.0 391 | decay_mult: 1.0 392 | } 393 | scale_param { 394 | bias_term: true 395 | } 396 | } 397 | layer { 398 | name: "layer_256_1_relu1" 399 | type: "ReLU" 400 | bottom: "layer_256_1_bn1" 401 | top: "layer_256_1_bn1" 402 | } 403 | layer { 404 | name: "layer_256_1_conv1" 405 | type: "Convolution" 406 | bottom: "layer_256_1_bn1" 407 | top: "layer_256_1_conv1" 408 | param { 409 | lr_mult: 1.0 410 | decay_mult: 1.0 411 | } 412 | convolution_param { 413 | num_output: 256 414 | bias_term: false 415 | pad: 1 416 | kernel_size: 3 417 | stride: 2 418 | weight_filler { 419 | type: "msra" 420 | } 421 | bias_filler { 422 | type: "constant" 423 | value: 0.0 424 | } 425 | } 426 | } 427 | layer { 428 | name: "layer_256_1_bn2" 429 | type: "BatchNorm" 430 | bottom: "layer_256_1_conv1" 431 | top: "layer_256_1_conv1" 432 | param { 433 | lr_mult: 0.0 434 | } 435 | param { 436 | lr_mult: 0.0 437 | } 438 | param { 439 | lr_mult: 0.0 440 | } 441 | } 442 | layer { 443 | name: "layer_256_1_scale2" 444 | type: "Scale" 445 | bottom: "layer_256_1_conv1" 446 | top: "layer_256_1_conv1" 447 | param { 448 | lr_mult: 1.0 449 | decay_mult: 1.0 450 | } 451 | param { 452 | lr_mult: 2.0 453 | decay_mult: 1.0 454 | } 455 | scale_param { 456 | bias_term: true 457 | } 458 | } 459 | layer { 460 | name: "layer_256_1_relu2" 461 | type: "ReLU" 462 | bottom: "layer_256_1_conv1" 463 | top: "layer_256_1_conv1" 464 | } 465 | layer { 466 | name: "layer_256_1_conv2" 467 | type: "Convolution" 468 | bottom: "layer_256_1_conv1" 469 | top: "layer_256_1_conv2" 470 | param { 471 | lr_mult: 1.0 472 | decay_mult: 1.0 473 | } 474 | convolution_param { 475 | num_output: 256 476 | bias_term: false 477 | pad: 1 478 | kernel_size: 3 479 | stride: 1 480 | weight_filler { 481 | type: "msra" 482 | } 483 | bias_filler { 484 | type: "constant" 485 | value: 0.0 486 | } 487 | } 488 | } 489 | layer { 490 | name: "layer_256_1_conv_expand" 491 | type: "Convolution" 492 | bottom: "layer_256_1_bn1" 493 | top: "layer_256_1_conv_expand" 494 | param { 495 | lr_mult: 1.0 496 | decay_mult: 1.0 497 | } 498 | convolution_param { 499 | num_output: 256 500 | bias_term: false 501 | pad: 0 502 | kernel_size: 1 503 | stride: 2 504 | weight_filler { 505 | type: "msra" 506 | } 507 | bias_filler { 508 | type: "constant" 509 | value: 0.0 510 | } 511 | } 512 | } 513 | layer { 514 | name: "layer_256_1_sum" 515 | type: "Eltwise" 516 | bottom: "layer_256_1_conv2" 517 | bottom: "layer_256_1_conv_expand" 518 | top: "layer_256_1_sum" 519 | } 520 | layer { 521 | name: "layer_512_1_bn1" 522 | type: "BatchNorm" 523 | bottom: "layer_256_1_sum" 524 | top: "layer_512_1_bn1" 525 | param { 526 | lr_mult: 0.0 527 | } 528 | param { 529 | lr_mult: 0.0 530 | } 531 | param { 532 | lr_mult: 0.0 533 | } 534 | } 535 | layer { 536 | name: "layer_512_1_scale1" 537 | type: "Scale" 538 | bottom: "layer_512_1_bn1" 539 | top: "layer_512_1_bn1" 540 | param { 541 | lr_mult: 1.0 542 | decay_mult: 1.0 543 | } 544 | param { 545 | lr_mult: 2.0 546 | decay_mult: 1.0 547 | } 548 | scale_param { 549 | bias_term: true 550 | } 551 | } 552 | layer { 553 | name: "layer_512_1_relu1" 554 | type: "ReLU" 555 | bottom: "layer_512_1_bn1" 556 | top: "layer_512_1_bn1" 557 | } 558 | layer { 559 | name: "layer_512_1_conv1_h" 560 | type: "Convolution" 561 | bottom: "layer_512_1_bn1" 562 | top: "layer_512_1_conv1_h" 563 | param { 564 | lr_mult: 1.0 565 | decay_mult: 1.0 566 | } 567 | convolution_param { 568 | num_output: 128 569 | bias_term: false 570 | pad: 1 571 | kernel_size: 3 572 | stride: 1 # 2 573 | weight_filler { 574 | type: "msra" 575 | } 576 | bias_filler { 577 | type: "constant" 578 | value: 0.0 579 | } 580 | } 581 | } 582 | layer { 583 | name: "layer_512_1_bn2_h" 584 | type: "BatchNorm" 585 | bottom: "layer_512_1_conv1_h" 586 | top: "layer_512_1_conv1_h" 587 | param { 588 | lr_mult: 0.0 589 | } 590 | param { 591 | lr_mult: 0.0 592 | } 593 | param { 594 | lr_mult: 0.0 595 | } 596 | } 597 | layer { 598 | name: "layer_512_1_scale2_h" 599 | type: "Scale" 600 | bottom: "layer_512_1_conv1_h" 601 | top: "layer_512_1_conv1_h" 602 | param { 603 | lr_mult: 1.0 604 | decay_mult: 1.0 605 | } 606 | param { 607 | lr_mult: 2.0 608 | decay_mult: 1.0 609 | } 610 | scale_param { 611 | bias_term: true 612 | } 613 | } 614 | layer { 615 | name: "layer_512_1_relu2" 616 | type: "ReLU" 617 | bottom: "layer_512_1_conv1_h" 618 | top: "layer_512_1_conv1_h" 619 | } 620 | layer { 621 | name: "layer_512_1_conv2_h" 622 | type: "Convolution" 623 | bottom: "layer_512_1_conv1_h" 624 | top: "layer_512_1_conv2_h" 625 | param { 626 | lr_mult: 1.0 627 | decay_mult: 1.0 628 | } 629 | convolution_param { 630 | num_output: 256 631 | bias_term: false 632 | pad: 2 # 1 633 | kernel_size: 3 634 | stride: 1 635 | dilation: 2 636 | weight_filler { 637 | type: "msra" 638 | } 639 | bias_filler { 640 | type: "constant" 641 | value: 0.0 642 | } 643 | } 644 | } 645 | layer { 646 | name: "layer_512_1_conv_expand_h" 647 | type: "Convolution" 648 | bottom: "layer_512_1_bn1" 649 | top: "layer_512_1_conv_expand_h" 650 | param { 651 | lr_mult: 1.0 652 | decay_mult: 1.0 653 | } 654 | convolution_param { 655 | num_output: 256 656 | bias_term: false 657 | pad: 0 658 | kernel_size: 1 659 | stride: 1 # 2 660 | weight_filler { 661 | type: "msra" 662 | } 663 | bias_filler { 664 | type: "constant" 665 | value: 0.0 666 | } 667 | } 668 | } 669 | layer { 670 | name: "layer_512_1_sum" 671 | type: "Eltwise" 672 | bottom: "layer_512_1_conv2_h" 673 | bottom: "layer_512_1_conv_expand_h" 674 | top: "layer_512_1_sum" 675 | } 676 | layer { 677 | name: "last_bn_h" 678 | type: "BatchNorm" 679 | bottom: "layer_512_1_sum" 680 | top: "layer_512_1_sum" 681 | param { 682 | lr_mult: 0.0 683 | } 684 | param { 685 | lr_mult: 0.0 686 | } 687 | param { 688 | lr_mult: 0.0 689 | } 690 | } 691 | layer { 692 | name: "last_scale_h" 693 | type: "Scale" 694 | bottom: "layer_512_1_sum" 695 | top: "layer_512_1_sum" 696 | param { 697 | lr_mult: 1.0 698 | decay_mult: 1.0 699 | } 700 | param { 701 | lr_mult: 2.0 702 | decay_mult: 1.0 703 | } 704 | scale_param { 705 | bias_term: true 706 | } 707 | } 708 | layer { 709 | name: "last_relu" 710 | type: "ReLU" 711 | bottom: "layer_512_1_sum" 712 | top: "fc7" 713 | } 714 | 715 | layer { 716 | name: "conv6_1_h" 717 | type: "Convolution" 718 | bottom: "fc7" 719 | top: "conv6_1_h" 720 | param { 721 | lr_mult: 1 722 | decay_mult: 1 723 | } 724 | param { 725 | lr_mult: 2 726 | decay_mult: 0 727 | } 728 | convolution_param { 729 | num_output: 128 730 | pad: 0 731 | kernel_size: 1 732 | stride: 1 733 | weight_filler { 734 | type: "xavier" 735 | } 736 | bias_filler { 737 | type: "constant" 738 | value: 0 739 | } 740 | } 741 | } 742 | layer { 743 | name: "conv6_1_relu" 744 | type: "ReLU" 745 | bottom: "conv6_1_h" 746 | top: "conv6_1_h" 747 | } 748 | layer { 749 | name: "conv6_2_h" 750 | type: "Convolution" 751 | bottom: "conv6_1_h" 752 | top: "conv6_2_h" 753 | param { 754 | lr_mult: 1 755 | decay_mult: 1 756 | } 757 | param { 758 | lr_mult: 2 759 | decay_mult: 0 760 | } 761 | convolution_param { 762 | num_output: 256 763 | pad: 1 764 | kernel_size: 3 765 | stride: 2 766 | weight_filler { 767 | type: "xavier" 768 | } 769 | bias_filler { 770 | type: "constant" 771 | value: 0 772 | } 773 | } 774 | } 775 | layer { 776 | name: "conv6_2_relu" 777 | type: "ReLU" 778 | bottom: "conv6_2_h" 779 | top: "conv6_2_h" 780 | } 781 | layer { 782 | name: "conv7_1_h" 783 | type: "Convolution" 784 | bottom: "conv6_2_h" 785 | top: "conv7_1_h" 786 | param { 787 | lr_mult: 1 788 | decay_mult: 1 789 | } 790 | param { 791 | lr_mult: 2 792 | decay_mult: 0 793 | } 794 | convolution_param { 795 | num_output: 64 796 | pad: 0 797 | kernel_size: 1 798 | stride: 1 799 | weight_filler { 800 | type: "xavier" 801 | } 802 | bias_filler { 803 | type: "constant" 804 | value: 0 805 | } 806 | } 807 | } 808 | layer { 809 | name: "conv7_1_relu" 810 | type: "ReLU" 811 | bottom: "conv7_1_h" 812 | top: "conv7_1_h" 813 | } 814 | layer { 815 | name: "conv7_2_h" 816 | type: "Convolution" 817 | bottom: "conv7_1_h" 818 | top: "conv7_2_h" 819 | param { 820 | lr_mult: 1 821 | decay_mult: 1 822 | } 823 | param { 824 | lr_mult: 2 825 | decay_mult: 0 826 | } 827 | convolution_param { 828 | num_output: 128 829 | pad: 1 830 | kernel_size: 3 831 | stride: 2 832 | weight_filler { 833 | type: "xavier" 834 | } 835 | bias_filler { 836 | type: "constant" 837 | value: 0 838 | } 839 | } 840 | } 841 | layer { 842 | name: "conv7_2_relu" 843 | type: "ReLU" 844 | bottom: "conv7_2_h" 845 | top: "conv7_2_h" 846 | } 847 | layer { 848 | name: "conv8_1_h" 849 | type: "Convolution" 850 | bottom: "conv7_2_h" 851 | top: "conv8_1_h" 852 | param { 853 | lr_mult: 1 854 | decay_mult: 1 855 | } 856 | param { 857 | lr_mult: 2 858 | decay_mult: 0 859 | } 860 | convolution_param { 861 | num_output: 64 862 | pad: 0 863 | kernel_size: 1 864 | stride: 1 865 | weight_filler { 866 | type: "xavier" 867 | } 868 | bias_filler { 869 | type: "constant" 870 | value: 0 871 | } 872 | } 873 | } 874 | layer { 875 | name: "conv8_1_relu" 876 | type: "ReLU" 877 | bottom: "conv8_1_h" 878 | top: "conv8_1_h" 879 | } 880 | layer { 881 | name: "conv8_2_h" 882 | type: "Convolution" 883 | bottom: "conv8_1_h" 884 | top: "conv8_2_h" 885 | param { 886 | lr_mult: 1 887 | decay_mult: 1 888 | } 889 | param { 890 | lr_mult: 2 891 | decay_mult: 0 892 | } 893 | convolution_param { 894 | num_output: 128 895 | pad: 1 896 | kernel_size: 3 897 | stride: 1 898 | weight_filler { 899 | type: "xavier" 900 | } 901 | bias_filler { 902 | type: "constant" 903 | value: 0 904 | } 905 | } 906 | } 907 | layer { 908 | name: "conv8_2_relu" 909 | type: "ReLU" 910 | bottom: "conv8_2_h" 911 | top: "conv8_2_h" 912 | } 913 | layer { 914 | name: "conv9_1_h" 915 | type: "Convolution" 916 | bottom: "conv8_2_h" 917 | top: "conv9_1_h" 918 | param { 919 | lr_mult: 1 920 | decay_mult: 1 921 | } 922 | param { 923 | lr_mult: 2 924 | decay_mult: 0 925 | } 926 | convolution_param { 927 | num_output: 64 928 | pad: 0 929 | kernel_size: 1 930 | stride: 1 931 | weight_filler { 932 | type: "xavier" 933 | } 934 | bias_filler { 935 | type: "constant" 936 | value: 0 937 | } 938 | } 939 | } 940 | layer { 941 | name: "conv9_1_relu" 942 | type: "ReLU" 943 | bottom: "conv9_1_h" 944 | top: "conv9_1_h" 945 | } 946 | layer { 947 | name: "conv9_2_h" 948 | type: "Convolution" 949 | bottom: "conv9_1_h" 950 | top: "conv9_2_h" 951 | param { 952 | lr_mult: 1 953 | decay_mult: 1 954 | } 955 | param { 956 | lr_mult: 2 957 | decay_mult: 0 958 | } 959 | convolution_param { 960 | num_output: 128 961 | pad: 1 962 | kernel_size: 3 963 | stride: 1 964 | weight_filler { 965 | type: "xavier" 966 | } 967 | bias_filler { 968 | type: "constant" 969 | value: 0 970 | } 971 | } 972 | } 973 | layer { 974 | name: "conv9_2_relu" 975 | type: "ReLU" 976 | bottom: "conv9_2_h" 977 | top: "conv9_2_h" 978 | } 979 | layer { 980 | name: "conv4_3_norm" 981 | type: "Normalize" 982 | bottom: "layer_256_1_bn1" 983 | top: "conv4_3_norm" 984 | norm_param { 985 | across_spatial: false 986 | scale_filler { 987 | type: "constant" 988 | value: 20 989 | } 990 | channel_shared: false 991 | } 992 | } 993 | layer { 994 | name: "conv4_3_norm_mbox_loc" 995 | type: "Convolution" 996 | bottom: "conv4_3_norm" 997 | top: "conv4_3_norm_mbox_loc" 998 | param { 999 | lr_mult: 1 1000 | decay_mult: 1 1001 | } 1002 | param { 1003 | lr_mult: 2 1004 | decay_mult: 0 1005 | } 1006 | convolution_param { 1007 | num_output: 16 1008 | pad: 1 1009 | kernel_size: 3 1010 | stride: 1 1011 | weight_filler { 1012 | type: "xavier" 1013 | } 1014 | bias_filler { 1015 | type: "constant" 1016 | value: 0 1017 | } 1018 | } 1019 | } 1020 | layer { 1021 | name: "conv4_3_norm_mbox_loc_perm" 1022 | type: "Permute" 1023 | bottom: "conv4_3_norm_mbox_loc" 1024 | top: "conv4_3_norm_mbox_loc_perm" 1025 | permute_param { 1026 | order: 0 1027 | order: 2 1028 | order: 3 1029 | order: 1 1030 | } 1031 | } 1032 | layer { 1033 | name: "conv4_3_norm_mbox_loc_flat" 1034 | type: "Flatten" 1035 | bottom: "conv4_3_norm_mbox_loc_perm" 1036 | top: "conv4_3_norm_mbox_loc_flat" 1037 | flatten_param { 1038 | axis: 1 1039 | } 1040 | } 1041 | layer { 1042 | name: "conv4_3_norm_mbox_conf" 1043 | type: "Convolution" 1044 | bottom: "conv4_3_norm" 1045 | top: "conv4_3_norm_mbox_conf" 1046 | param { 1047 | lr_mult: 1 1048 | decay_mult: 1 1049 | } 1050 | param { 1051 | lr_mult: 2 1052 | decay_mult: 0 1053 | } 1054 | convolution_param { 1055 | num_output: 8 # 84 1056 | pad: 1 1057 | kernel_size: 3 1058 | stride: 1 1059 | weight_filler { 1060 | type: "xavier" 1061 | } 1062 | bias_filler { 1063 | type: "constant" 1064 | value: 0 1065 | } 1066 | } 1067 | } 1068 | layer { 1069 | name: "conv4_3_norm_mbox_conf_perm" 1070 | type: "Permute" 1071 | bottom: "conv4_3_norm_mbox_conf" 1072 | top: "conv4_3_norm_mbox_conf_perm" 1073 | permute_param { 1074 | order: 0 1075 | order: 2 1076 | order: 3 1077 | order: 1 1078 | } 1079 | } 1080 | layer { 1081 | name: "conv4_3_norm_mbox_conf_flat" 1082 | type: "Flatten" 1083 | bottom: "conv4_3_norm_mbox_conf_perm" 1084 | top: "conv4_3_norm_mbox_conf_flat" 1085 | flatten_param { 1086 | axis: 1 1087 | } 1088 | } 1089 | layer { 1090 | name: "conv4_3_norm_mbox_priorbox" 1091 | type: "PriorBox" 1092 | bottom: "conv4_3_norm" 1093 | bottom: "data" 1094 | top: "conv4_3_norm_mbox_priorbox" 1095 | prior_box_param { 1096 | min_size: 30.0 1097 | max_size: 60.0 1098 | aspect_ratio: 2 1099 | flip: true 1100 | clip: false 1101 | variance: 0.1 1102 | variance: 0.1 1103 | variance: 0.2 1104 | variance: 0.2 1105 | step: 8 1106 | offset: 0.5 1107 | } 1108 | } 1109 | layer { 1110 | name: "fc7_mbox_loc" 1111 | type: "Convolution" 1112 | bottom: "fc7" 1113 | top: "fc7_mbox_loc" 1114 | param { 1115 | lr_mult: 1 1116 | decay_mult: 1 1117 | } 1118 | param { 1119 | lr_mult: 2 1120 | decay_mult: 0 1121 | } 1122 | convolution_param { 1123 | num_output: 24 1124 | pad: 1 1125 | kernel_size: 3 1126 | stride: 1 1127 | weight_filler { 1128 | type: "xavier" 1129 | } 1130 | bias_filler { 1131 | type: "constant" 1132 | value: 0 1133 | } 1134 | } 1135 | } 1136 | layer { 1137 | name: "fc7_mbox_loc_perm" 1138 | type: "Permute" 1139 | bottom: "fc7_mbox_loc" 1140 | top: "fc7_mbox_loc_perm" 1141 | permute_param { 1142 | order: 0 1143 | order: 2 1144 | order: 3 1145 | order: 1 1146 | } 1147 | } 1148 | layer { 1149 | name: "fc7_mbox_loc_flat" 1150 | type: "Flatten" 1151 | bottom: "fc7_mbox_loc_perm" 1152 | top: "fc7_mbox_loc_flat" 1153 | flatten_param { 1154 | axis: 1 1155 | } 1156 | } 1157 | layer { 1158 | name: "fc7_mbox_conf" 1159 | type: "Convolution" 1160 | bottom: "fc7" 1161 | top: "fc7_mbox_conf" 1162 | param { 1163 | lr_mult: 1 1164 | decay_mult: 1 1165 | } 1166 | param { 1167 | lr_mult: 2 1168 | decay_mult: 0 1169 | } 1170 | convolution_param { 1171 | num_output: 12 # 126 1172 | pad: 1 1173 | kernel_size: 3 1174 | stride: 1 1175 | weight_filler { 1176 | type: "xavier" 1177 | } 1178 | bias_filler { 1179 | type: "constant" 1180 | value: 0 1181 | } 1182 | } 1183 | } 1184 | layer { 1185 | name: "fc7_mbox_conf_perm" 1186 | type: "Permute" 1187 | bottom: "fc7_mbox_conf" 1188 | top: "fc7_mbox_conf_perm" 1189 | permute_param { 1190 | order: 0 1191 | order: 2 1192 | order: 3 1193 | order: 1 1194 | } 1195 | } 1196 | layer { 1197 | name: "fc7_mbox_conf_flat" 1198 | type: "Flatten" 1199 | bottom: "fc7_mbox_conf_perm" 1200 | top: "fc7_mbox_conf_flat" 1201 | flatten_param { 1202 | axis: 1 1203 | } 1204 | } 1205 | layer { 1206 | name: "fc7_mbox_priorbox" 1207 | type: "PriorBox" 1208 | bottom: "fc7" 1209 | bottom: "data" 1210 | top: "fc7_mbox_priorbox" 1211 | prior_box_param { 1212 | min_size: 60.0 1213 | max_size: 111.0 1214 | aspect_ratio: 2 1215 | aspect_ratio: 3 1216 | flip: true 1217 | clip: false 1218 | variance: 0.1 1219 | variance: 0.1 1220 | variance: 0.2 1221 | variance: 0.2 1222 | step: 16 1223 | offset: 0.5 1224 | } 1225 | } 1226 | layer { 1227 | name: "conv6_2_mbox_loc" 1228 | type: "Convolution" 1229 | bottom: "conv6_2_h" 1230 | top: "conv6_2_mbox_loc" 1231 | param { 1232 | lr_mult: 1 1233 | decay_mult: 1 1234 | } 1235 | param { 1236 | lr_mult: 2 1237 | decay_mult: 0 1238 | } 1239 | convolution_param { 1240 | num_output: 24 1241 | pad: 1 1242 | kernel_size: 3 1243 | stride: 1 1244 | weight_filler { 1245 | type: "xavier" 1246 | } 1247 | bias_filler { 1248 | type: "constant" 1249 | value: 0 1250 | } 1251 | } 1252 | } 1253 | layer { 1254 | name: "conv6_2_mbox_loc_perm" 1255 | type: "Permute" 1256 | bottom: "conv6_2_mbox_loc" 1257 | top: "conv6_2_mbox_loc_perm" 1258 | permute_param { 1259 | order: 0 1260 | order: 2 1261 | order: 3 1262 | order: 1 1263 | } 1264 | } 1265 | layer { 1266 | name: "conv6_2_mbox_loc_flat" 1267 | type: "Flatten" 1268 | bottom: "conv6_2_mbox_loc_perm" 1269 | top: "conv6_2_mbox_loc_flat" 1270 | flatten_param { 1271 | axis: 1 1272 | } 1273 | } 1274 | layer { 1275 | name: "conv6_2_mbox_conf" 1276 | type: "Convolution" 1277 | bottom: "conv6_2_h" 1278 | top: "conv6_2_mbox_conf" 1279 | param { 1280 | lr_mult: 1 1281 | decay_mult: 1 1282 | } 1283 | param { 1284 | lr_mult: 2 1285 | decay_mult: 0 1286 | } 1287 | convolution_param { 1288 | num_output: 12 # 126 1289 | pad: 1 1290 | kernel_size: 3 1291 | stride: 1 1292 | weight_filler { 1293 | type: "xavier" 1294 | } 1295 | bias_filler { 1296 | type: "constant" 1297 | value: 0 1298 | } 1299 | } 1300 | } 1301 | layer { 1302 | name: "conv6_2_mbox_conf_perm" 1303 | type: "Permute" 1304 | bottom: "conv6_2_mbox_conf" 1305 | top: "conv6_2_mbox_conf_perm" 1306 | permute_param { 1307 | order: 0 1308 | order: 2 1309 | order: 3 1310 | order: 1 1311 | } 1312 | } 1313 | layer { 1314 | name: "conv6_2_mbox_conf_flat" 1315 | type: "Flatten" 1316 | bottom: "conv6_2_mbox_conf_perm" 1317 | top: "conv6_2_mbox_conf_flat" 1318 | flatten_param { 1319 | axis: 1 1320 | } 1321 | } 1322 | layer { 1323 | name: "conv6_2_mbox_priorbox" 1324 | type: "PriorBox" 1325 | bottom: "conv6_2_h" 1326 | bottom: "data" 1327 | top: "conv6_2_mbox_priorbox" 1328 | prior_box_param { 1329 | min_size: 111.0 1330 | max_size: 162.0 1331 | aspect_ratio: 2 1332 | aspect_ratio: 3 1333 | flip: true 1334 | clip: false 1335 | variance: 0.1 1336 | variance: 0.1 1337 | variance: 0.2 1338 | variance: 0.2 1339 | step: 32 1340 | offset: 0.5 1341 | } 1342 | } 1343 | layer { 1344 | name: "conv7_2_mbox_loc" 1345 | type: "Convolution" 1346 | bottom: "conv7_2_h" 1347 | top: "conv7_2_mbox_loc" 1348 | param { 1349 | lr_mult: 1 1350 | decay_mult: 1 1351 | } 1352 | param { 1353 | lr_mult: 2 1354 | decay_mult: 0 1355 | } 1356 | convolution_param { 1357 | num_output: 24 1358 | pad: 1 1359 | kernel_size: 3 1360 | stride: 1 1361 | weight_filler { 1362 | type: "xavier" 1363 | } 1364 | bias_filler { 1365 | type: "constant" 1366 | value: 0 1367 | } 1368 | } 1369 | } 1370 | layer { 1371 | name: "conv7_2_mbox_loc_perm" 1372 | type: "Permute" 1373 | bottom: "conv7_2_mbox_loc" 1374 | top: "conv7_2_mbox_loc_perm" 1375 | permute_param { 1376 | order: 0 1377 | order: 2 1378 | order: 3 1379 | order: 1 1380 | } 1381 | } 1382 | layer { 1383 | name: "conv7_2_mbox_loc_flat" 1384 | type: "Flatten" 1385 | bottom: "conv7_2_mbox_loc_perm" 1386 | top: "conv7_2_mbox_loc_flat" 1387 | flatten_param { 1388 | axis: 1 1389 | } 1390 | } 1391 | layer { 1392 | name: "conv7_2_mbox_conf" 1393 | type: "Convolution" 1394 | bottom: "conv7_2_h" 1395 | top: "conv7_2_mbox_conf" 1396 | param { 1397 | lr_mult: 1 1398 | decay_mult: 1 1399 | } 1400 | param { 1401 | lr_mult: 2 1402 | decay_mult: 0 1403 | } 1404 | convolution_param { 1405 | num_output: 12 # 126 1406 | pad: 1 1407 | kernel_size: 3 1408 | stride: 1 1409 | weight_filler { 1410 | type: "xavier" 1411 | } 1412 | bias_filler { 1413 | type: "constant" 1414 | value: 0 1415 | } 1416 | } 1417 | } 1418 | layer { 1419 | name: "conv7_2_mbox_conf_perm" 1420 | type: "Permute" 1421 | bottom: "conv7_2_mbox_conf" 1422 | top: "conv7_2_mbox_conf_perm" 1423 | permute_param { 1424 | order: 0 1425 | order: 2 1426 | order: 3 1427 | order: 1 1428 | } 1429 | } 1430 | layer { 1431 | name: "conv7_2_mbox_conf_flat" 1432 | type: "Flatten" 1433 | bottom: "conv7_2_mbox_conf_perm" 1434 | top: "conv7_2_mbox_conf_flat" 1435 | flatten_param { 1436 | axis: 1 1437 | } 1438 | } 1439 | layer { 1440 | name: "conv7_2_mbox_priorbox" 1441 | type: "PriorBox" 1442 | bottom: "conv7_2_h" 1443 | bottom: "data" 1444 | top: "conv7_2_mbox_priorbox" 1445 | prior_box_param { 1446 | min_size: 162.0 1447 | max_size: 213.0 1448 | aspect_ratio: 2 1449 | aspect_ratio: 3 1450 | flip: true 1451 | clip: false 1452 | variance: 0.1 1453 | variance: 0.1 1454 | variance: 0.2 1455 | variance: 0.2 1456 | step: 64 1457 | offset: 0.5 1458 | } 1459 | } 1460 | layer { 1461 | name: "conv8_2_mbox_loc" 1462 | type: "Convolution" 1463 | bottom: "conv8_2_h" 1464 | top: "conv8_2_mbox_loc" 1465 | param { 1466 | lr_mult: 1 1467 | decay_mult: 1 1468 | } 1469 | param { 1470 | lr_mult: 2 1471 | decay_mult: 0 1472 | } 1473 | convolution_param { 1474 | num_output: 16 1475 | pad: 1 1476 | kernel_size: 3 1477 | stride: 1 1478 | weight_filler { 1479 | type: "xavier" 1480 | } 1481 | bias_filler { 1482 | type: "constant" 1483 | value: 0 1484 | } 1485 | } 1486 | } 1487 | layer { 1488 | name: "conv8_2_mbox_loc_perm" 1489 | type: "Permute" 1490 | bottom: "conv8_2_mbox_loc" 1491 | top: "conv8_2_mbox_loc_perm" 1492 | permute_param { 1493 | order: 0 1494 | order: 2 1495 | order: 3 1496 | order: 1 1497 | } 1498 | } 1499 | layer { 1500 | name: "conv8_2_mbox_loc_flat" 1501 | type: "Flatten" 1502 | bottom: "conv8_2_mbox_loc_perm" 1503 | top: "conv8_2_mbox_loc_flat" 1504 | flatten_param { 1505 | axis: 1 1506 | } 1507 | } 1508 | layer { 1509 | name: "conv8_2_mbox_conf" 1510 | type: "Convolution" 1511 | bottom: "conv8_2_h" 1512 | top: "conv8_2_mbox_conf" 1513 | param { 1514 | lr_mult: 1 1515 | decay_mult: 1 1516 | } 1517 | param { 1518 | lr_mult: 2 1519 | decay_mult: 0 1520 | } 1521 | convolution_param { 1522 | num_output: 8 # 84 1523 | pad: 1 1524 | kernel_size: 3 1525 | stride: 1 1526 | weight_filler { 1527 | type: "xavier" 1528 | } 1529 | bias_filler { 1530 | type: "constant" 1531 | value: 0 1532 | } 1533 | } 1534 | } 1535 | layer { 1536 | name: "conv8_2_mbox_conf_perm" 1537 | type: "Permute" 1538 | bottom: "conv8_2_mbox_conf" 1539 | top: "conv8_2_mbox_conf_perm" 1540 | permute_param { 1541 | order: 0 1542 | order: 2 1543 | order: 3 1544 | order: 1 1545 | } 1546 | } 1547 | layer { 1548 | name: "conv8_2_mbox_conf_flat" 1549 | type: "Flatten" 1550 | bottom: "conv8_2_mbox_conf_perm" 1551 | top: "conv8_2_mbox_conf_flat" 1552 | flatten_param { 1553 | axis: 1 1554 | } 1555 | } 1556 | layer { 1557 | name: "conv8_2_mbox_priorbox" 1558 | type: "PriorBox" 1559 | bottom: "conv8_2_h" 1560 | bottom: "data" 1561 | top: "conv8_2_mbox_priorbox" 1562 | prior_box_param { 1563 | min_size: 213.0 1564 | max_size: 264.0 1565 | aspect_ratio: 2 1566 | flip: true 1567 | clip: false 1568 | variance: 0.1 1569 | variance: 0.1 1570 | variance: 0.2 1571 | variance: 0.2 1572 | step: 100 1573 | offset: 0.5 1574 | } 1575 | } 1576 | layer { 1577 | name: "conv9_2_mbox_loc" 1578 | type: "Convolution" 1579 | bottom: "conv9_2_h" 1580 | top: "conv9_2_mbox_loc" 1581 | param { 1582 | lr_mult: 1 1583 | decay_mult: 1 1584 | } 1585 | param { 1586 | lr_mult: 2 1587 | decay_mult: 0 1588 | } 1589 | convolution_param { 1590 | num_output: 16 1591 | pad: 1 1592 | kernel_size: 3 1593 | stride: 1 1594 | weight_filler { 1595 | type: "xavier" 1596 | } 1597 | bias_filler { 1598 | type: "constant" 1599 | value: 0 1600 | } 1601 | } 1602 | } 1603 | layer { 1604 | name: "conv9_2_mbox_loc_perm" 1605 | type: "Permute" 1606 | bottom: "conv9_2_mbox_loc" 1607 | top: "conv9_2_mbox_loc_perm" 1608 | permute_param { 1609 | order: 0 1610 | order: 2 1611 | order: 3 1612 | order: 1 1613 | } 1614 | } 1615 | layer { 1616 | name: "conv9_2_mbox_loc_flat" 1617 | type: "Flatten" 1618 | bottom: "conv9_2_mbox_loc_perm" 1619 | top: "conv9_2_mbox_loc_flat" 1620 | flatten_param { 1621 | axis: 1 1622 | } 1623 | } 1624 | layer { 1625 | name: "conv9_2_mbox_conf" 1626 | type: "Convolution" 1627 | bottom: "conv9_2_h" 1628 | top: "conv9_2_mbox_conf" 1629 | param { 1630 | lr_mult: 1 1631 | decay_mult: 1 1632 | } 1633 | param { 1634 | lr_mult: 2 1635 | decay_mult: 0 1636 | } 1637 | convolution_param { 1638 | num_output: 8 # 84 1639 | pad: 1 1640 | kernel_size: 3 1641 | stride: 1 1642 | weight_filler { 1643 | type: "xavier" 1644 | } 1645 | bias_filler { 1646 | type: "constant" 1647 | value: 0 1648 | } 1649 | } 1650 | } 1651 | layer { 1652 | name: "conv9_2_mbox_conf_perm" 1653 | type: "Permute" 1654 | bottom: "conv9_2_mbox_conf" 1655 | top: "conv9_2_mbox_conf_perm" 1656 | permute_param { 1657 | order: 0 1658 | order: 2 1659 | order: 3 1660 | order: 1 1661 | } 1662 | } 1663 | layer { 1664 | name: "conv9_2_mbox_conf_flat" 1665 | type: "Flatten" 1666 | bottom: "conv9_2_mbox_conf_perm" 1667 | top: "conv9_2_mbox_conf_flat" 1668 | flatten_param { 1669 | axis: 1 1670 | } 1671 | } 1672 | layer { 1673 | name: "conv9_2_mbox_priorbox" 1674 | type: "PriorBox" 1675 | bottom: "conv9_2_h" 1676 | bottom: "data" 1677 | top: "conv9_2_mbox_priorbox" 1678 | prior_box_param { 1679 | min_size: 264.0 1680 | max_size: 315.0 1681 | aspect_ratio: 2 1682 | flip: true 1683 | clip: false 1684 | variance: 0.1 1685 | variance: 0.1 1686 | variance: 0.2 1687 | variance: 0.2 1688 | step: 300 1689 | offset: 0.5 1690 | } 1691 | } 1692 | layer { 1693 | name: "mbox_loc" 1694 | type: "Concat" 1695 | bottom: "conv4_3_norm_mbox_loc_flat" 1696 | bottom: "fc7_mbox_loc_flat" 1697 | bottom: "conv6_2_mbox_loc_flat" 1698 | bottom: "conv7_2_mbox_loc_flat" 1699 | bottom: "conv8_2_mbox_loc_flat" 1700 | bottom: "conv9_2_mbox_loc_flat" 1701 | top: "mbox_loc" 1702 | concat_param { 1703 | axis: 1 1704 | } 1705 | } 1706 | layer { 1707 | name: "mbox_conf" 1708 | type: "Concat" 1709 | bottom: "conv4_3_norm_mbox_conf_flat" 1710 | bottom: "fc7_mbox_conf_flat" 1711 | bottom: "conv6_2_mbox_conf_flat" 1712 | bottom: "conv7_2_mbox_conf_flat" 1713 | bottom: "conv8_2_mbox_conf_flat" 1714 | bottom: "conv9_2_mbox_conf_flat" 1715 | top: "mbox_conf" 1716 | concat_param { 1717 | axis: 1 1718 | } 1719 | } 1720 | layer { 1721 | name: "mbox_priorbox" 1722 | type: "Concat" 1723 | bottom: "conv4_3_norm_mbox_priorbox" 1724 | bottom: "fc7_mbox_priorbox" 1725 | bottom: "conv6_2_mbox_priorbox" 1726 | bottom: "conv7_2_mbox_priorbox" 1727 | bottom: "conv8_2_mbox_priorbox" 1728 | bottom: "conv9_2_mbox_priorbox" 1729 | top: "mbox_priorbox" 1730 | concat_param { 1731 | axis: 2 1732 | } 1733 | } 1734 | 1735 | layer { 1736 | name: "mbox_conf_reshape" 1737 | type: "Reshape" 1738 | bottom: "mbox_conf" 1739 | top: "mbox_conf_reshape" 1740 | reshape_param { 1741 | shape { 1742 | dim: 0 1743 | dim: -1 1744 | dim: 2 1745 | } 1746 | } 1747 | } 1748 | layer { 1749 | name: "mbox_conf_softmax" 1750 | type: "Softmax" 1751 | bottom: "mbox_conf_reshape" 1752 | top: "mbox_conf_softmax" 1753 | softmax_param { 1754 | axis: 2 1755 | } 1756 | } 1757 | layer { 1758 | name: "mbox_conf_flatten" 1759 | type: "Flatten" 1760 | bottom: "mbox_conf_softmax" 1761 | top: "mbox_conf_flatten" 1762 | flatten_param { 1763 | axis: 1 1764 | } 1765 | } 1766 | 1767 | layer { 1768 | name: "detection_out" 1769 | type: "DetectionOutput" 1770 | bottom: "mbox_loc" 1771 | bottom: "mbox_conf_flatten" 1772 | bottom: "mbox_priorbox" 1773 | top: "detection_out" 1774 | include { 1775 | phase: TEST 1776 | } 1777 | detection_output_param { 1778 | num_classes: 2 1779 | share_location: true 1780 | background_label_id: 0 1781 | nms_param { 1782 | nms_threshold: 0.45 1783 | top_k: 400 1784 | } 1785 | code_type: CENTER_SIZE 1786 | keep_top_k: 200 1787 | confidence_threshold: 0.01 1788 | } 1789 | } 1790 | -------------------------------------------------------------------------------- /model/res10_300x300_ssd_iter_140000.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/prat96/Centroid-Object-Tracking/ccac42ec893d0ba4421ba3a8cccdf408283f60b5/model/res10_300x300_ssd_iter_140000.caffemodel -------------------------------------------------------------------------------- /src/centroidtracker.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Created by pratheek on 2019-11-27. 3 | */ 4 | #include "centroidtracker.h" 5 | #include 6 | 7 | using namespace std; 8 | 9 | CentroidTracker::CentroidTracker(int maxDisappeared) { 10 | this->nextObjectID = 0; 11 | this->maxDisappeared = maxDisappeared; 12 | } 13 | 14 | double CentroidTracker::calcDistance(double x1, double y1, double x2, double y2) { 15 | double x = x1 - x2; 16 | double y = y1 - y2; 17 | double dist = sqrt((x * x) + (y * y)); //calculating Euclidean distance 18 | 19 | return dist; 20 | } 21 | 22 | void CentroidTracker::register_Object(int cX, int cY) { 23 | int object_ID = this->nextObjectID; 24 | this->objects.push_back({object_ID, {cX, cY}}); 25 | this->disappeared.insert({object_ID, 0}); 26 | this->nextObjectID += 1; 27 | } 28 | 29 | vector::size_type findMin(const vector &v, vector::size_type pos = 0) { 30 | if (v.size() <= pos) return (v.size()); 31 | vector::size_type min = pos; 32 | for (vector::size_type i = pos + 1; i < v.size(); i++) { 33 | if (v[i] < v[min]) min = i; 34 | } 35 | return (min); 36 | } 37 | 38 | std::vector>> CentroidTracker::update(vector> boxes) { 39 | if (boxes.empty()) { 40 | auto it = this->disappeared.begin(); 41 | while (it != this->disappeared.end()) { 42 | it->second++; 43 | if (it->second > this->maxDisappeared) { 44 | this->objects.erase(remove_if(this->objects.begin(), this->objects.end(), [it](auto &elem) { 45 | return elem.first == it->first; 46 | }), this->objects.end()); 47 | 48 | this->path_keeper.erase(it->first); 49 | 50 | it = this->disappeared.erase(it); 51 | } else { 52 | ++it; 53 | } 54 | } 55 | return this->objects; 56 | } 57 | 58 | // initialize an array of input centroids for the current frame 59 | vector> inputCentroids; 60 | for (auto b : boxes) { 61 | int cX = int((b[0] + b[2]) / 2.0); 62 | int cY = int((b[1] + b[3]) / 2.0); 63 | inputCentroids.push_back(make_pair(cX, cY)); 64 | } 65 | 66 | //if we are currently not tracking any objects take the input centroids and register each of them 67 | if (this->objects.empty()) { 68 | for (auto i: inputCentroids) { 69 | this->register_Object(i.first, i.second); 70 | } 71 | } 72 | 73 | // otherwise, there are currently tracking objects so we need to try to match the 74 | // input centroids to existing object centroids 75 | else { 76 | vector objectIDs; 77 | vector> objectCentroids; 78 | for (auto object : this->objects) { 79 | objectIDs.push_back(object.first); 80 | objectCentroids.push_back(make_pair(object.second.first, object.second.second)); 81 | } 82 | 83 | // Calculate Distances 84 | vector> Distances; 85 | for (int i = 0; i < objectCentroids.size(); ++i) { 86 | vector temp_D; 87 | for (vector>::size_type j = 0; j < inputCentroids.size(); ++j) { 88 | double dist = calcDistance(objectCentroids[i].first, objectCentroids[i].second, inputCentroids[j].first, 89 | inputCentroids[j].second); 90 | 91 | temp_D.push_back(dist); 92 | } 93 | Distances.push_back(temp_D); 94 | } 95 | 96 | // load rows and cols 97 | vector cols; 98 | vector rows; 99 | 100 | //find indices for cols 101 | for (auto v: Distances) { 102 | auto temp = findMin(v); 103 | cols.push_back(temp); 104 | } 105 | 106 | //rows calculation 107 | //sort each mat row for rows calculation 108 | vector> D_copy; 109 | for (auto v: Distances) { 110 | sort(v.begin(), v.end()); 111 | D_copy.push_back(v); 112 | } 113 | 114 | // use cols calc to find rows 115 | // slice first elem of each column 116 | vector> temp_rows; 117 | int k = 0; 118 | for (auto i: D_copy) { 119 | temp_rows.push_back(make_pair(i[0], k)); 120 | k++; 121 | } 122 | //print sorted indices of temp_rows 123 | for (auto const &x : temp_rows) { 124 | rows.push_back(x.second); 125 | } 126 | 127 | set usedRows; 128 | set usedCols; 129 | 130 | //loop over the combination of the (rows, columns) index tuples 131 | for (int i = 0; i < rows.size(); i++) { 132 | //if we have already examined either the row or column value before, ignore it 133 | if (usedRows.count(rows[i]) || usedCols.count(cols[i])) { continue; } 134 | //otherwise, grab the object ID for the current row, set its new centroid, 135 | // and reset the disappeared counter 136 | int objectID = objectIDs[rows[i]]; 137 | for (int t = 0; t < this->objects.size(); t++) { 138 | if (this->objects[t].first == objectID) { 139 | this->objects[t].second.first = inputCentroids[cols[i]].first; 140 | this->objects[t].second.second = inputCentroids[cols[i]].second; 141 | } 142 | } 143 | this->disappeared[objectID] = 0; 144 | 145 | usedRows.insert(rows[i]); 146 | usedCols.insert(cols[i]); 147 | } 148 | 149 | // compute indexes we have NOT examined yet 150 | set objRows; 151 | set inpCols; 152 | 153 | //D.shape[0] 154 | for (int i = 0; i < objectCentroids.size(); i++) { 155 | objRows.insert(i); 156 | } 157 | //D.shape[1] 158 | for (int i = 0; i < inputCentroids.size(); i++) { 159 | inpCols.insert(i); 160 | } 161 | 162 | set unusedRows; 163 | set unusedCols; 164 | 165 | set_difference(objRows.begin(), objRows.end(), usedRows.begin(), usedRows.end(), 166 | inserter(unusedRows, unusedRows.begin())); 167 | set_difference(inpCols.begin(), inpCols.end(), usedCols.begin(), usedCols.end(), 168 | inserter(unusedCols, unusedCols.begin())); 169 | 170 | 171 | //If objCentroids > InpCentroids, we need to check and see if some of these objects have potentially disappeared 172 | if (objectCentroids.size() >= inputCentroids.size()) { 173 | // loop over unused row indexes 174 | for (auto row: unusedRows) { 175 | int objectID = objectIDs[row]; 176 | this->disappeared[objectID] += 1; 177 | 178 | if (this->disappeared[objectID] > this->maxDisappeared) { 179 | this->objects.erase(remove_if(this->objects.begin(), this->objects.end(), [objectID](auto &elem) { 180 | return elem.first == objectID; 181 | }), this->objects.end()); 182 | 183 | this->path_keeper.erase(objectID); 184 | 185 | this->disappeared.erase(objectID); 186 | } 187 | } 188 | } else { 189 | for (auto col: unusedCols) { 190 | this->register_Object(inputCentroids[col].first, inputCentroids[col].second); 191 | } 192 | } 193 | } 194 | //loading path tracking points 195 | if (!objects.empty()) { 196 | for (auto obj: objects) { 197 | 198 | if (path_keeper[obj.first].size() > 30) { 199 | path_keeper[obj.first].erase(path_keeper[obj.first].begin()); 200 | } 201 | path_keeper[obj.first].push_back(make_pair(obj.second.first, obj.second.second)); 202 | } 203 | } 204 | 205 | 206 | return this->objects; 207 | } 208 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace cv; 8 | using namespace std; 9 | 10 | int main() { 11 | std::cout << "Hello, Tracker!" << std::endl; 12 | auto centroidTracker = new CentroidTracker(20); 13 | 14 | VideoCapture cap(0); 15 | // VideoCapture cap("../../test2.mp4"); 16 | if (!cap.isOpened()) { 17 | cout << "Cannot open camera"; 18 | } 19 | 20 | String modelTxt = "../model/deploy.prototxt"; 21 | String modelBin = "../model/res10_300x300_ssd_iter_140000.caffemodel"; 22 | 23 | cout << "Loading model.." << endl; 24 | auto net = dnn::readNetFromCaffe(modelTxt, modelBin); 25 | 26 | cout << "Starting video stream" << endl; 27 | while (cap.isOpened()) { 28 | Mat cameraFrame; 29 | cap.read(cameraFrame); 30 | 31 | resize(cameraFrame, cameraFrame, Size(400, 300)); 32 | auto inputBlob = dnn::blobFromImage(cameraFrame, 1.0, Size(400, 300), Scalar(104.0, 177.0, 123.0)); 33 | 34 | net.setInput(inputBlob); 35 | auto detection = net.forward("detection_out"); 36 | Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr()); 37 | 38 | vector> boxes; 39 | 40 | float confidenceThreshold = 0.2; 41 | for (int i = 0; i < detectionMat.rows; i++) { 42 | float confidence = detectionMat.at(i, 2); 43 | 44 | if (confidence > confidenceThreshold) { 45 | int xLeftTop = static_cast(detectionMat.at(i, 3) * cameraFrame.cols); 46 | int yLeftTop = static_cast(detectionMat.at(i, 4) * cameraFrame.rows); 47 | int xRightBottom = static_cast(detectionMat.at(i, 5) * cameraFrame.cols); 48 | int yRightBottom = static_cast(detectionMat.at(i, 6) * cameraFrame.rows); 49 | 50 | Rect object((int) xLeftTop, (int) yLeftTop, (int) (xRightBottom - xLeftTop), 51 | (int) (yRightBottom - yLeftTop)); 52 | rectangle(cameraFrame, object, Scalar(0, 255, 0), 2); 53 | 54 | boxes.insert(boxes.end(), {xLeftTop, yLeftTop, xRightBottom, yRightBottom}); 55 | } 56 | } 57 | 58 | auto objects = centroidTracker->update(boxes); 59 | 60 | if (!objects.empty()) { 61 | for (auto obj: objects) { 62 | circle(cameraFrame, Point(obj.second.first, obj.second.second), 4, Scalar(255, 0, 0), -1); 63 | string ID = std::to_string(obj.first); 64 | cv::putText(cameraFrame, ID, Point(obj.second.first - 10, obj.second.second - 10), 65 | FONT_HERSHEY_COMPLEX, 0.5, Scalar(0, 255, 0), 2); 66 | } 67 | 68 | //drawing the path 69 | for (auto obj: objects) { 70 | int k = 1; 71 | for (int i = 1; i < centroidTracker->path_keeper[obj.first].size(); i++) { 72 | int thickness = int(sqrt(20 / float(k + 1) * 2.5)); 73 | cv::line(cameraFrame, 74 | Point(centroidTracker->path_keeper[obj.first][i - 1].first, centroidTracker->path_keeper[obj.first][i - 1].second), 75 | Point(centroidTracker->path_keeper[obj.first][i].first, centroidTracker->path_keeper[obj.first][i].second), 76 | Scalar(0, 0, 255), thickness); 77 | k += 1; 78 | } 79 | } 80 | } 81 | imshow("Detection", cameraFrame); 82 | char c = (char) waitKey(15); 83 | if (c == 27) 84 | break; 85 | } 86 | delete centroidTracker; 87 | return 0; 88 | } --------------------------------------------------------------------------------