├── GOTURN_Tracker_OpenCV_Python ├── .gitignore ├── README.md ├── goturn.prototxt ├── main_rect.py └── tracker.py ├── MOG2_Detector_Euclidean_Distance_Tracker_OpenCV_Python ├── .gitignore ├── README.md ├── main_MOT.py ├── main_SOT.py └── tracker.py ├── README.md └── YOLO_Detector_SiamRPN++_Tracker_OpenCV_Python ├── .gitignore ├── README.md ├── SiamRPNpp_tracker.py ├── main.py └── main_SiamRPNpp.py /GOTURN_Tracker_OpenCV_Python/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /GOTURN_Tracker_OpenCV_Python/README.md: -------------------------------------------------------------------------------- 1 | # GOTURN Object Tracker 2 | 3 | `SOT` trackers supported by OpenCV. 4 | - Boosting 5 | - MIL 6 | - KCF 7 | - MedianFlow 8 | - `GOTURN` 9 | - MOSSE 10 | - CSRT 11 | 12 | ## Testing 13 | 14 | `ROI`(Region Of Interest) tracking 15 | 16 | ```bash 17 | python main_rect.py 18 | ``` 19 | 20 | For `GOTURN`, it needs to unzip `goturn.caffemodel.zip` to `goturn.caffemodel`. -------------------------------------------------------------------------------- /GOTURN_Tracker_OpenCV_Python/goturn.prototxt: -------------------------------------------------------------------------------- 1 | name: "GOTURN" 2 | 3 | input: "data1" 4 | input_dim: 1 5 | input_dim: 3 6 | input_dim: 227 7 | input_dim: 227 8 | 9 | input: "data2" 10 | input_dim: 1 11 | input_dim: 3 12 | input_dim: 227 13 | input_dim: 227 14 | 15 | layer { 16 | name: "conv11" 17 | type: "Convolution" 18 | bottom: "data1" 19 | top: "conv11" 20 | param { 21 | lr_mult: 1 22 | decay_mult: 1 23 | } 24 | param { 25 | lr_mult: 2 26 | decay_mult: 0 27 | } 28 | convolution_param { 29 | num_output: 96 30 | kernel_size: 11 31 | stride: 4 32 | weight_filler { 33 | type: "gaussian" 34 | std: 0.01 35 | } 36 | bias_filler { 37 | type: "constant" 38 | value: 0 39 | } 40 | } 41 | } 42 | layer { 43 | name: "relu11" 44 | type: "ReLU" 45 | bottom: "conv11" 46 | top: "conv11" 47 | } 48 | layer { 49 | name: "pool11" 50 | type: "Pooling" 51 | bottom: "conv11" 52 | top: "pool11" 53 | pooling_param { 54 | pool: MAX 55 | kernel_size: 3 56 | stride: 2 57 | } 58 | } 59 | layer { 60 | name: "norm11" 61 | type: "LRN" 62 | bottom: "pool11" 63 | top: "norm11" 64 | lrn_param { 65 | local_size: 5 66 | alpha: 0.0001 67 | beta: 0.75 68 | } 69 | } 70 | layer { 71 | name: "conv12" 72 | type: "Convolution" 73 | bottom: "norm11" 74 | top: "conv12" 75 | param { 76 | lr_mult: 1 77 | decay_mult: 1 78 | } 79 | param { 80 | lr_mult: 2 81 | decay_mult: 0 82 | } 83 | convolution_param { 84 | num_output: 256 85 | pad: 2 86 | kernel_size: 5 87 | group: 2 88 | weight_filler { 89 | type: "gaussian" 90 | std: 0.01 91 | } 92 | bias_filler { 93 | type: "constant" 94 | value: 1 95 | } 96 | } 97 | } 98 | layer { 99 | name: "relu12" 100 | type: "ReLU" 101 | bottom: "conv12" 102 | top: "conv12" 103 | } 104 | layer { 105 | name: "pool12" 106 | type: "Pooling" 107 | bottom: "conv12" 108 | top: "pool12" 109 | pooling_param { 110 | pool: MAX 111 | kernel_size: 3 112 | stride: 2 113 | } 114 | } 115 | layer { 116 | name: "norm12" 117 | type: "LRN" 118 | bottom: "pool12" 119 | top: "norm12" 120 | lrn_param { 121 | local_size: 5 122 | alpha: 0.0001 123 | beta: 0.75 124 | } 125 | } 126 | layer { 127 | name: "conv13" 128 | type: "Convolution" 129 | bottom: "norm12" 130 | top: "conv13" 131 | param { 132 | lr_mult: 1 133 | decay_mult: 1 134 | } 135 | param { 136 | lr_mult: 2 137 | decay_mult: 0 138 | } 139 | convolution_param { 140 | num_output: 384 141 | pad: 1 142 | kernel_size: 3 143 | weight_filler { 144 | type: "gaussian" 145 | std: 0.01 146 | } 147 | bias_filler { 148 | type: "constant" 149 | value: 0 150 | } 151 | } 152 | } 153 | layer { 154 | name: "relu13" 155 | type: "ReLU" 156 | bottom: "conv13" 157 | top: "conv13" 158 | } 159 | layer { 160 | name: "conv14" 161 | type: "Convolution" 162 | bottom: "conv13" 163 | top: "conv14" 164 | param { 165 | lr_mult: 1 166 | decay_mult: 1 167 | } 168 | param { 169 | lr_mult: 2 170 | decay_mult: 0 171 | } 172 | convolution_param { 173 | num_output: 384 174 | pad: 1 175 | kernel_size: 3 176 | group: 2 177 | weight_filler { 178 | type: "gaussian" 179 | std: 0.01 180 | } 181 | bias_filler { 182 | type: "constant" 183 | value: 1 184 | } 185 | } 186 | } 187 | layer { 188 | name: "relu14" 189 | type: "ReLU" 190 | bottom: "conv14" 191 | top: "conv14" 192 | } 193 | layer { 194 | name: "conv15" 195 | type: "Convolution" 196 | bottom: "conv14" 197 | top: "conv15" 198 | param { 199 | lr_mult: 1 200 | decay_mult: 1 201 | } 202 | param { 203 | lr_mult: 2 204 | decay_mult: 0 205 | } 206 | convolution_param { 207 | num_output: 256 208 | pad: 1 209 | kernel_size: 3 210 | group: 2 211 | weight_filler { 212 | type: "gaussian" 213 | std: 0.01 214 | } 215 | bias_filler { 216 | type: "constant" 217 | value: 1 218 | } 219 | } 220 | } 221 | layer { 222 | name: "relu15" 223 | type: "ReLU" 224 | bottom: "conv15" 225 | top: "conv15" 226 | } 227 | layer { 228 | name: "pool15" 229 | type: "Pooling" 230 | bottom: "conv15" 231 | top: "pool15" 232 | pooling_param { 233 | pool: MAX 234 | kernel_size: 3 235 | stride: 2 236 | } 237 | } 238 | 239 | 240 | layer { 241 | name: "conv21" 242 | type: "Convolution" 243 | bottom: "data2" 244 | top: "conv21" 245 | param { 246 | lr_mult: 1 247 | decay_mult: 1 248 | } 249 | param { 250 | lr_mult: 2 251 | decay_mult: 0 252 | } 253 | convolution_param { 254 | num_output: 96 255 | kernel_size: 11 256 | stride: 4 257 | weight_filler { 258 | type: "gaussian" 259 | std: 0.01 260 | } 261 | bias_filler { 262 | type: "constant" 263 | value: 0 264 | } 265 | } 266 | } 267 | layer { 268 | name: "relu21" 269 | type: "ReLU" 270 | bottom: "conv21" 271 | top: "conv21" 272 | } 273 | layer { 274 | name: "pool21" 275 | type: "Pooling" 276 | bottom: "conv21" 277 | top: "pool21" 278 | pooling_param { 279 | pool: MAX 280 | kernel_size: 3 281 | stride: 2 282 | } 283 | } 284 | layer { 285 | name: "norm21" 286 | type: "LRN" 287 | bottom: "pool21" 288 | top: "norm21" 289 | lrn_param { 290 | local_size: 5 291 | alpha: 0.0001 292 | beta: 0.75 293 | } 294 | } 295 | layer { 296 | name: "conv22" 297 | type: "Convolution" 298 | bottom: "norm21" 299 | top: "conv22" 300 | param { 301 | lr_mult: 1 302 | decay_mult: 1 303 | } 304 | param { 305 | lr_mult: 2 306 | decay_mult: 0 307 | } 308 | convolution_param { 309 | num_output: 256 310 | pad: 2 311 | kernel_size: 5 312 | group: 2 313 | weight_filler { 314 | type: "gaussian" 315 | std: 0.01 316 | } 317 | bias_filler { 318 | type: "constant" 319 | value: 1 320 | } 321 | } 322 | } 323 | layer { 324 | name: "relu22" 325 | type: "ReLU" 326 | bottom: "conv22" 327 | top: "conv22" 328 | } 329 | layer { 330 | name: "pool22" 331 | type: "Pooling" 332 | bottom: "conv22" 333 | top: "pool22" 334 | pooling_param { 335 | pool: MAX 336 | kernel_size: 3 337 | stride: 2 338 | } 339 | } 340 | layer { 341 | name: "norm22" 342 | type: "LRN" 343 | bottom: "pool22" 344 | top: "norm22" 345 | lrn_param { 346 | local_size: 5 347 | alpha: 0.0001 348 | beta: 0.75 349 | } 350 | } 351 | layer { 352 | name: "conv23" 353 | type: "Convolution" 354 | bottom: "norm22" 355 | top: "conv23" 356 | param { 357 | lr_mult: 1 358 | decay_mult: 1 359 | } 360 | param { 361 | lr_mult: 2 362 | decay_mult: 0 363 | } 364 | convolution_param { 365 | num_output: 384 366 | pad: 1 367 | kernel_size: 3 368 | weight_filler { 369 | type: "gaussian" 370 | std: 0.01 371 | } 372 | bias_filler { 373 | type: "constant" 374 | value: 0 375 | } 376 | } 377 | } 378 | layer { 379 | name: "relu23" 380 | type: "ReLU" 381 | bottom: "conv23" 382 | top: "conv23" 383 | } 384 | layer { 385 | name: "conv24" 386 | type: "Convolution" 387 | bottom: "conv23" 388 | top: "conv24" 389 | param { 390 | lr_mult: 1 391 | decay_mult: 1 392 | } 393 | param { 394 | lr_mult: 2 395 | decay_mult: 0 396 | } 397 | convolution_param { 398 | num_output: 384 399 | pad: 1 400 | kernel_size: 3 401 | group: 2 402 | weight_filler { 403 | type: "gaussian" 404 | std: 0.01 405 | } 406 | bias_filler { 407 | type: "constant" 408 | value: 1 409 | } 410 | } 411 | } 412 | layer { 413 | name: "relu24" 414 | type: "ReLU" 415 | bottom: "conv24" 416 | top: "conv24" 417 | } 418 | layer { 419 | name: "conv25" 420 | type: "Convolution" 421 | bottom: "conv24" 422 | top: "conv25" 423 | param { 424 | lr_mult: 1 425 | decay_mult: 1 426 | } 427 | param { 428 | lr_mult: 2 429 | decay_mult: 0 430 | } 431 | convolution_param { 432 | num_output: 256 433 | pad: 1 434 | kernel_size: 3 435 | group: 2 436 | weight_filler { 437 | type: "gaussian" 438 | std: 0.01 439 | } 440 | bias_filler { 441 | type: "constant" 442 | value: 1 443 | } 444 | } 445 | } 446 | layer { 447 | name: "relu25" 448 | type: "ReLU" 449 | bottom: "conv25" 450 | top: "conv25" 451 | } 452 | layer { 453 | name: "pool25" 454 | type: "Pooling" 455 | bottom: "conv25" 456 | top: "pool25" 457 | pooling_param { 458 | pool: MAX 459 | kernel_size: 3 460 | stride: 2 461 | } 462 | } 463 | 464 | layer { 465 | name: "concat1" 466 | type: "Concat" 467 | bottom: "pool15" 468 | bottom: "pool25" 469 | top: "poolConcat" 470 | } 471 | 472 | layer { 473 | name: "fc6" 474 | type: "InnerProduct" 475 | bottom: "poolConcat" 476 | top: "fc6" 477 | param { 478 | lr_mult: 1 479 | decay_mult: 1 480 | } 481 | param { 482 | lr_mult: 2 483 | decay_mult: 0 484 | } 485 | inner_product_param { 486 | num_output: 4096 487 | weight_filler { 488 | type: "gaussian" 489 | std: 0.005 490 | } 491 | bias_filler { 492 | type: "constant" 493 | value: 1 494 | } 495 | } 496 | } 497 | layer { 498 | name: "relu6" 499 | type: "ReLU" 500 | bottom: "fc6" 501 | top: "fc6" 502 | } 503 | layer { 504 | name: "drop6" 505 | type: "Dropout" 506 | bottom: "fc6" 507 | top: "fc6" 508 | dropout_param { 509 | dropout_ratio: 0.5 510 | } 511 | } 512 | layer { 513 | name: "fc7" 514 | type: "InnerProduct" 515 | bottom: "fc6" 516 | top: "fc7" 517 | param { 518 | lr_mult: 1 519 | decay_mult: 1 520 | } 521 | param { 522 | lr_mult: 2 523 | decay_mult: 0 524 | } 525 | inner_product_param { 526 | num_output: 4096 527 | weight_filler { 528 | type: "gaussian" 529 | std: 0.005 530 | } 531 | bias_filler { 532 | type: "constant" 533 | value: 1 534 | } 535 | } 536 | } 537 | layer { 538 | name: "relu7" 539 | type: "ReLU" 540 | bottom: "fc7" 541 | top: "fc7" 542 | } 543 | layer { 544 | name: "drop7" 545 | type: "Dropout" 546 | bottom: "fc7" 547 | top: "fc7" 548 | dropout_param { 549 | dropout_ratio: 0.5 550 | } 551 | } 552 | layer { 553 | name: "fc8" 554 | type: "InnerProduct" 555 | bottom: "fc7" 556 | top: "fc8" 557 | param { 558 | lr_mult: 1 559 | decay_mult: 1 560 | } 561 | param { 562 | lr_mult: 2 563 | decay_mult: 0 564 | } 565 | inner_product_param { 566 | num_output: 4 567 | weight_filler { 568 | type: "gaussian" 569 | std: 0.01 570 | } 571 | bias_filler { 572 | type: "constant" 573 | value: 0 574 | } 575 | } 576 | } 577 | layer { 578 | name: "scale" 579 | bottom: "fc8" 580 | top: "out" 581 | type: "Power" 582 | power_param { 583 | power: 1 584 | scale: 10 585 | shift: 0 586 | } 587 | } 588 | -------------------------------------------------------------------------------- /GOTURN_Tracker_OpenCV_Python/main_rect.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import sys 3 | import time 4 | from tracker import * 5 | 6 | if __name__ == "__main__": 7 | 8 | # Open video file 9 | capture = cv2.VideoCapture("../Test_Video_Files/cars.mp4") 10 | 11 | if not capture.isOpened(): 12 | print("Cannot open video file") 13 | sys.exit() 14 | 15 | ok, frame = capture.read() 16 | if not ok: 17 | print("Cannot read video file") 18 | sys.exit() 19 | 20 | # Initialize calculating FPS 21 | start = time.time_ns() 22 | frame_count = 0 23 | fps = -1 24 | 25 | # Define an initial bounding box 26 | bounding_box = (10, 10, 100, 100) 27 | 28 | # Select a bounding box 29 | bounding_box = cv2.selectROI(frame, False) 30 | 31 | # Create a OpenCV tracker and Initialize tracker with first frame and bounding box 32 | tracker = OpenCVTracker() 33 | ok = tracker.init(frame, bounding_box) 34 | 35 | while True: 36 | # Read a new frame 37 | ok, frame = capture.read() 38 | if not ok: 39 | break 40 | 41 | # Increase frame count 42 | frame_count += 1 43 | 44 | # Update tracker 45 | ok, bounding_box = tracker.update(frame) 46 | 47 | # Draw bounding box 48 | if ok: 49 | # Tracking success 50 | p1 = (int(bounding_box[0]), int(bounding_box[1])) 51 | p2 = (int(bounding_box[0] + bounding_box[2]), int(bounding_box[1] + bounding_box[3])) 52 | cv2.rectangle(frame, p1, p2, (255, 0, 0), 2, 1) 53 | else: 54 | # Tracking failure 55 | cv2.putText(frame, "Tracking failure detected", (50, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2) 56 | 57 | # Calculate Frames per second (FPS) 58 | if frame_count >= 30: 59 | end = time.time_ns() 60 | fps = 1000000000 * frame_count / (end - start) 61 | frame_count = 0 62 | start = time.time_ns() 63 | 64 | # Display tracker type on frame 65 | cv2.putText(frame, tracker.type(), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50, 170, 50), 2) 66 | 67 | # Display FPS on frame 68 | if fps > 0: 69 | fps_label = "FPS: %.2f" % fps 70 | cv2.putText(frame, fps_label, (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50, 170, 50), 2) 71 | 72 | # Display result 73 | cv2.imshow("Tracking", frame) 74 | 75 | # Exit if ESC pressed 76 | key = cv2.waitKey(1) & 0xFF 77 | if key == 27: 78 | break 79 | -------------------------------------------------------------------------------- /GOTURN_Tracker_OpenCV_Python/tracker.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # OpenCV tracker types 4 | CV_TRACKER_BOOSTING = "CV_TRACKER_BOOSTING" 5 | CV_TRACKER_MIL = "CV_TRACKER_MIL" 6 | CV_TRACKER_KCF = "CV_TRACKER_KCF" 7 | CV_TRACKER_TLD = "CV_TRACKER_TLD" 8 | CV_TRACKER_MEDIANFLOW = "CV_TRACKER_MEDIANFLOW" 9 | CV_TRACKER_GOTURN = "CV_TRACKER_GOTURN" 10 | CV_TRACKER_MOSSE = "CV_TRACKER_MOSSE" 11 | CV_TRACKER_CSRT = "CV_TRACKER_CSRT" 12 | 13 | def compare_opencv_version(major, minor, revision): 14 | (current_major, current_minor, current_revision) = cv2.__version__.split(".") 15 | 16 | current_major = int(current_major) 17 | current_minor = int(current_minor) 18 | current_revision = int(current_revision) 19 | 20 | if current_major > major: 21 | return 1 22 | elif current_major < major: 23 | return -1 24 | 25 | if current_minor > minor: 26 | return 1 27 | elif current_minor < minor: 28 | return -1 29 | 30 | if current_revision > revision: 31 | return 1 32 | elif current_revision < revision: 33 | return -1 34 | 35 | return 0 36 | 37 | 38 | class OpenCVTracker: 39 | 40 | def __init__(self, tracker_type = CV_TRACKER_KCF): 41 | self._type = tracker_type 42 | 43 | # if int(minor_ver) < 3: 44 | if compare_opencv_version(4, 3, 0) < 0: 45 | self._tracker = cv2.Tracker_create(tracker_type) 46 | else: 47 | if tracker_type == CV_TRACKER_BOOSTING: 48 | if compare_opencv_version(4, 5, 1) <= 0: 49 | self._tracker = cv2.TrackerBoosting_create() 50 | else: 51 | self._tracker = cv2.legacy.upgradeTrackingAPI(cv2.legacy.TrackerBoosting_create()) 52 | 53 | if tracker_type == CV_TRACKER_MIL: 54 | self._tracker = cv2.TrackerMIL_create() 55 | 56 | if tracker_type == CV_TRACKER_KCF: 57 | self._tracker = cv2.TrackerKCF_create() 58 | 59 | if tracker_type == CV_TRACKER_TLD: 60 | if compare_opencv_version(4, 5, 1) <= 0: 61 | self._tracker = cv2.TrackerTLD_create() 62 | else: 63 | self._tracker = cv2.legacy.upgradeTrackingAPI(cv2.legacy.TrackerTLD_create()) 64 | 65 | if tracker_type == CV_TRACKER_MEDIANFLOW: 66 | if compare_opencv_version(4, 5, 1) <= 0: 67 | self._tracker = cv2.TrackerMedianFlow_create() 68 | else: 69 | self._tracker = cv2.legacy.upgradeTrackingAPI(cv2.legacy.TrackerMedianFlow_create()) 70 | 71 | if tracker_type == CV_TRACKER_GOTURN: 72 | self._tracker = cv2.TrackerGOTURN_create() 73 | 74 | if tracker_type == CV_TRACKER_MOSSE: 75 | if compare_opencv_version(4, 5, 1) <= 0: 76 | self._tracker = cv2.TrackerMOSSE_create() 77 | else: 78 | self._tracker = cv2.legacy.upgradeTrackingAPI(cv2.legacy.TrackerMOSSE_create()) 79 | 80 | if tracker_type == CV_TRACKER_CSRT: 81 | self._tracker = cv2.TrackerCSRT_create() 82 | 83 | def init(self, image, bounding_box): 84 | return self._tracker.init(image, bounding_box) 85 | 86 | def update(self, image): 87 | return self._tracker.update(image) 88 | 89 | def type(self): 90 | return self._type 91 | -------------------------------------------------------------------------------- /MOG2_Detector_Euclidean_Distance_Tracker_OpenCV_Python/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /MOG2_Detector_Euclidean_Distance_Tracker_OpenCV_Python/README.md: -------------------------------------------------------------------------------- 1 | # MOG2 Object Tracker and Euclidean Distance Tracker 2 | 3 | ## Detector 4 | - `MOG2` Background Subtractor 5 | - `KNN` Background Subtractor 6 | 7 | ## Tracker 8 | - `Euclidean Distance` (Centroid) 9 | 10 | ## Testing 11 | 12 | MOT (Multiple Object Tracking) 13 | 14 | ```bash 15 | python main_MOT.py 16 | ``` 17 | 18 | SOT by clicking an object 19 | 20 | ```bash 21 | python main_SOT.py 22 | ``` -------------------------------------------------------------------------------- /MOG2_Detector_Euclidean_Distance_Tracker_OpenCV_Python/main_MOT.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import sys 3 | import time 4 | from tracker import * 5 | 6 | if __name__== "__main__": 7 | 8 | # Create tracker object 9 | tracker = EuclideanDistanceTracker() 10 | 11 | capture = cv2.VideoCapture("../Test_Video_Files/highway.mp4") 12 | 13 | if not capture.isOpened(): 14 | print("Cannot open video file") 15 | sys.exit() 16 | 17 | ok, frame = capture.read() 18 | if not ok: 19 | print("Cannot read video file") 20 | sys.exit() 21 | 22 | # Object detection 23 | object_detector = cv2.createBackgroundSubtractorMOG2(history = 100, varThreshold = 16) 24 | # object_detector = cv2.createBackgroundSubtractorKNN() 25 | 26 | # Initialize calculating FPS 27 | start = time.time_ns() 28 | frame_count = 0 29 | fps = -1 30 | 31 | while True: 32 | # Read a new frame 33 | ok, frame = capture.read() 34 | if not ok: 35 | break 36 | 37 | # Increase frame count 38 | frame_count += 1 39 | # timer = cv2.getTickCount() 40 | 41 | # Extract region of interest 42 | roi = frame[340:720, 500:800] 43 | # roi = frame 44 | 45 | # 1. Object Detection 46 | mask = object_detector.apply(roi) 47 | _, mask = cv2.threshold(mask, 254, 255, cv2.THRESH_BINARY) 48 | contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 49 | 50 | detections = [] 51 | for contour in contours: 52 | # Calculate area and remove small elements 53 | area = cv2.contourArea(contour) 54 | if area > 100: 55 | # cv2.drawContours(roi, [contour], -1, (0, 255, 0), 2) 56 | x, y, w, h = cv2.boundingRect(contour) 57 | 58 | detections.append([x, y, w, h]) 59 | 60 | # 2. Object Tracking 61 | boxes_ids = tracker.update(detections) 62 | for box_id in boxes_ids: 63 | x, y, w, h, id = box_id 64 | cv2.putText(roi, str(id), (x, y - 15), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0), 2) 65 | cv2.rectangle(roi, (x, y), (x + w, y + h), (0, 255, 0), 3) 66 | 67 | # Calculate frames per second (FPS) 68 | # cv_fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer) 69 | if frame_count >= 30: 70 | end = time.time_ns() 71 | fps = 1000000000 * frame_count / (end - start) 72 | frame_count = 0 73 | start = time.time_ns() 74 | 75 | # Display FPS on frame 76 | # cv2.putText(frame, "FPS: " + str(int(cv_fps)), (100, 50), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), 2) 77 | if fps > 0: 78 | fps_label = "FPS: %.2f" % fps 79 | cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 2) 80 | 81 | cv2.imshow("roi", roi) 82 | cv2.imshow("Frame", frame) 83 | cv2.imshow("Mask", mask) 84 | 85 | key = cv2.waitKey(30) 86 | if key == 27: 87 | break 88 | 89 | capture.release() 90 | cv2.destroyAllWindows() 91 | -------------------------------------------------------------------------------- /MOG2_Detector_Euclidean_Distance_Tracker_OpenCV_Python/main_SOT.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import sys 3 | import time 4 | from tracker import * 5 | 6 | if __name__== "__main__": 7 | 8 | # Create tracker object 9 | tracker = EuclideanDistanceTracker() 10 | 11 | capture = cv2.VideoCapture("../Test_Video_Files/highway.mp4") 12 | 13 | if not capture.isOpened(): 14 | print("Cannot open video file") 15 | sys.exit() 16 | 17 | ok, frame = capture.read() 18 | if not ok: 19 | print("Cannot read video file") 20 | sys.exit() 21 | 22 | # Object detection 23 | object_detector = cv2.createBackgroundSubtractorMOG2(history = 100, varThreshold = 16) 24 | # object_detector = cv2.createBackgroundSubtractorKNN() 25 | 26 | # Initialize calculating FPS 27 | start = time.time_ns() 28 | frame_count = 0 29 | fps = -1 30 | 31 | # Set mouse click callback function 32 | object_clicked = False 33 | object_selected = False 34 | ox, oy = -1, -1 35 | selected_object_ids = [] 36 | 37 | def mouse_click(event, x, y, flags, param): 38 | global ox, oy, object_clicked, object_selected, selected_object_ids 39 | if event == cv2.EVENT_LBUTTONDOWN: 40 | ox, oy = x, y 41 | object_clicked = True 42 | object_selected = False 43 | selected_object_ids = [] 44 | 45 | cv2.namedWindow("Frame") 46 | cv2.setMouseCallback("Frame", mouse_click) 47 | 48 | while True: 49 | # Read a new frame 50 | ok, frame = capture.read() 51 | if not ok: 52 | break 53 | 54 | # Increase frame count 55 | frame_count += 1 56 | 57 | # 1. Object Detection 58 | mask = object_detector.apply(frame) 59 | _, mask = cv2.threshold(mask, 254, 255, cv2.THRESH_BINARY) 60 | contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 61 | 62 | detections = [] 63 | for contour in contours: 64 | # Calculate area and remove small elements 65 | area = cv2.contourArea(contour) 66 | if area > 50: 67 | # cv2.drawContours(frame, [contour], -1, (0, 255, 0), 2) 68 | x, y, w, h = cv2.boundingRect(contour) 69 | 70 | detections.append([x, y, w, h]) 71 | 72 | # 2. Object Tracking 73 | boxes_ids = tracker.update(detections) 74 | 75 | if object_clicked or object_selected: 76 | if object_clicked and not object_selected: 77 | selected_object_ids = [] 78 | 79 | for box_id in boxes_ids: 80 | x, y, w, h, id = box_id 81 | if ox >= x and ox <= x + w and oy >= y and oy <= y + h: 82 | selected_object_ids.append(id) 83 | break 84 | 85 | object_selected = True 86 | object_clicked = False 87 | 88 | if object_selected and len(selected_object_ids) > 0: 89 | for box_id in boxes_ids: 90 | x, y, w, h, id = box_id 91 | if id in selected_object_ids: 92 | # cv2.putText(frame, str(id), (x, y - 15), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0), 2) 93 | cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 3) 94 | 95 | # Calculate frames per second (FPS) 96 | if frame_count >= 30: 97 | end = time.time_ns() 98 | fps = 1000000000 * frame_count / (end - start) 99 | frame_count = 0 100 | start = time.time_ns() 101 | 102 | # Display FPS on frame 103 | if fps > 0: 104 | fps_label = "FPS: %.2f" % fps 105 | cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 2) 106 | 107 | cv2.imshow("Frame", frame) 108 | 109 | key = cv2.waitKey(30) 110 | if key == 27: 111 | break 112 | 113 | capture.release() 114 | cv2.destroyAllWindows() 115 | -------------------------------------------------------------------------------- /MOG2_Detector_Euclidean_Distance_Tracker_OpenCV_Python/tracker.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | class EuclideanDistanceTracker: 4 | 5 | def __init__(self): 6 | # Store the center positions of the objects 7 | self.center_points = {} 8 | 9 | # Keep the count of the IDs 10 | # Each time a new object is detected, the count will increase by one 11 | self.id_count = 0 12 | 13 | def update(self, object_boxes): 14 | # Object boxes and ids 15 | object_boxes_ids = [] 16 | 17 | # Get center point of new object 18 | for object_box in object_boxes: 19 | x, y, w, h = object_box 20 | cx = (x + x + w) // 2 21 | cy = (y + y + h) // 2 22 | 23 | # Find out if that object was detected already 24 | same_object_detected = False 25 | for id, center_point in self.center_points.items(): 26 | distance = math.hypot(cx - center_point[0], cy - center_point[1]) 27 | 28 | if distance < 25: 29 | self.center_points[id] = (cx, cy) 30 | 31 | # print(self.center_points) 32 | 33 | object_boxes_ids.append([x, y, w, h, id]) 34 | same_object_detected = True 35 | break 36 | 37 | # New object is detected, we assign the ID to that object 38 | if same_object_detected is False: 39 | self.center_points[self.id_count] = (cx, cy) 40 | object_boxes_ids.append([x, y, w, h, self.id_count]) 41 | self.id_count += 1 42 | 43 | # Clean the dictionary by center points to remove IDs not used anymore 44 | new_center_points = {} 45 | for object_box_id in object_boxes_ids: 46 | _, _, _, _, object_id = object_box_id 47 | center_point = self.center_points[object_id] 48 | new_center_points[object_id] = center_point 49 | 50 | # Update dictionary with IDs not used removed 51 | self.center_points = new_center_points.copy() 52 | 53 | return object_boxes_ids 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLO(v5, v8), Yunet Object detection & Tracking 2 | 3 | ## Testing Environment 4 | 5 | Tested with the below environment. 6 | - Ubuntu 22.04 7 | - Python 3.10.6 8 | - pip 22.02 9 | - opencv-contrib-python 4.7.0.72 10 | 11 | ## Testing Environment 12 | by vman241308 13 | -------------------------------------------------------------------------------- /YOLO_Detector_SiamRPN++_Tracker_OpenCV_Python/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /YOLO_Detector_SiamRPN++_Tracker_OpenCV_Python/README.md: -------------------------------------------------------------------------------- 1 | # YOLO Detector and SiamRPN++ tracker 2 | 3 | ## Detector 4 | - `YOLOv5s` 5 | - `YOLOv8s` 6 | 7 | ## Tracker 8 | - `SiamRPN++` tracker 9 | 10 | ## Testing 11 | 12 | Add `is_cuda` argument for running on CUDA. 13 | To test `SiamRPN++`, it needs to unzip `search_net.onnx.zip` and `target_net.onnx.zip`. 14 | 15 | `ROI` tracking 16 | 17 | ```bash 18 | python main_YOLO.py 19 | ``` 20 | 21 | SOT by clicking an object 22 | 23 | ```bash 24 | python main.py 25 | ``` -------------------------------------------------------------------------------- /YOLO_Detector_SiamRPN++_Tracker_OpenCV_Python/SiamRPNpp_tracker.py: -------------------------------------------------------------------------------- 1 | import cv2 as cv 2 | import numpy as np 3 | import os 4 | 5 | """ 6 | Link to original paper : https://arxiv.org/abs/1812.11703 7 | Link to original repo : https://github.com/STVIR/pysot 8 | 9 | You can download the pre-trained weights of the Tracker Model from https://drive.google.com/file/d/11bwgPFVkps9AH2NOD1zBDdpF_tQghAB-/view?usp=sharing 10 | You can download the target net (target branch of SiamRPN++) from https://drive.google.com/file/d/1dw_Ne3UMcCnFsaD6xkZepwE4GEpqq7U_/view?usp=sharing 11 | You can download the search net (search branch of SiamRPN++) from https://drive.google.com/file/d/1Lt4oE43ZSucJvze3Y-Z87CVDreO-Afwl/view?usp=sharing 12 | You can download the head model (RPN Head) from https://drive.google.com/file/d/1zT1yu12mtj3JQEkkfKFJWiZ71fJ-dQTi/view?usp=sharing 13 | """ 14 | 15 | class ModelBuilder(): 16 | """ This class generates the SiamRPN++ Tracker Model by using Imported ONNX Nets 17 | """ 18 | def __init__(self, target_net, search_net, rpn_head): 19 | super(ModelBuilder, self).__init__() 20 | # Build the target branch 21 | self.target_net = target_net 22 | # Build the search branch 23 | self.search_net = search_net 24 | # Build RPN_Head 25 | self.rpn_head = rpn_head 26 | 27 | def template(self, z): 28 | """ Takes the template of size (1, 1, 127, 127) as an input to generate kernel 29 | """ 30 | self.target_net.setInput(z) 31 | outNames = self.target_net.getUnconnectedOutLayersNames() 32 | self.zfs_1, self.zfs_2, self.zfs_3 = self.target_net.forward(outNames) 33 | 34 | def track(self, x): 35 | """ Takes the search of size (1, 1, 255, 255) as an input to generate classification score and bounding box regression 36 | """ 37 | self.search_net.setInput(x) 38 | outNames = self.search_net.getUnconnectedOutLayersNames() 39 | xfs_1, xfs_2, xfs_3 = self.search_net.forward(outNames) 40 | self.rpn_head.setInput(np.stack([self.zfs_1, self.zfs_2, self.zfs_3]), 'input_1') 41 | self.rpn_head.setInput(np.stack([xfs_1, xfs_2, xfs_3]), 'input_2') 42 | outNames = self.rpn_head.getUnconnectedOutLayersNames() 43 | cls, loc = self.rpn_head.forward(outNames) 44 | return {'cls': cls, 'loc': loc} 45 | 46 | class Anchors: 47 | """ This class generate anchors. 48 | """ 49 | def __init__(self, stride, ratios, scales, image_center=0, size=0): 50 | self.stride = stride 51 | self.ratios = ratios 52 | self.scales = scales 53 | self.image_center = image_center 54 | self.size = size 55 | self.anchor_num = len(self.scales) * len(self.ratios) 56 | self.anchors = self.generate_anchors() 57 | 58 | def generate_anchors(self): 59 | """ 60 | generate anchors based on predefined configuration 61 | """ 62 | anchors = np.zeros((self.anchor_num, 4), dtype=np.float32) 63 | size = self.stride**2 64 | count = 0 65 | for r in self.ratios: 66 | ws = int(np.sqrt(size * 1. / r)) 67 | hs = int(ws * r) 68 | 69 | for s in self.scales: 70 | w = ws * s 71 | h = hs * s 72 | anchors[count][:] = [-w * 0.5, -h * 0.5, w * 0.5, h * 0.5][:] 73 | count += 1 74 | return anchors 75 | 76 | class SiamRPNppTracker: 77 | def __init__(self, is_cuda, target_net_file = None, search_net_file = None, rpn_head_file = None): 78 | super(SiamRPNppTracker, self).__init__() 79 | self.anchor_stride = 8 80 | self.anchor_ratios = [0.33, 0.5, 1, 2, 3] 81 | self.anchor_scales = [8] 82 | self.track_base_size = 8 83 | self.track_context_amount = 0.5 84 | self.track_exemplar_size = 127 85 | self.track_instance_size = 255 86 | self.track_lr = 0.4 87 | self.track_penalty_k = 0.04 88 | self.track_window_influence = 0.44 89 | self.score_size = (self.track_instance_size - self.track_exemplar_size) // \ 90 | self.anchor_stride + 1 + self.track_base_size 91 | self.anchor_num = len(self.anchor_ratios) * len(self.anchor_scales) 92 | hanning = np.hanning(self.score_size) 93 | window = np.outer(hanning, hanning) 94 | self.window = np.tile(window.flatten(), self.anchor_num) 95 | self.anchors = self.generate_anchor(self.score_size) 96 | 97 | if target_net_file is None: 98 | target_net_file = os.path.join(os.path.dirname(__file__), "models/target_net.onnx") 99 | if search_net_file is None: 100 | search_net_file = os.path.join(os.path.dirname(__file__), "models/search_net.onnx") 101 | if rpn_head_file is None: 102 | rpn_head_file = os.path.join(os.path.dirname(__file__), "models/rpn_head.onnx") 103 | 104 | target_net = cv.dnn.readNetFromONNX(target_net_file) 105 | search_net = cv.dnn.readNetFromONNX(search_net_file) 106 | rpn_head = cv.dnn.readNetFromONNX(rpn_head_file) 107 | 108 | if is_cuda: 109 | target_net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA) 110 | target_net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16) 111 | search_net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA) 112 | search_net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16) 113 | rpn_head.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA) 114 | rpn_head.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16) 115 | else: 116 | target_net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) 117 | target_net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) 118 | search_net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) 119 | search_net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) 120 | rpn_head.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) 121 | rpn_head.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) 122 | 123 | self.model = ModelBuilder(target_net, search_net, rpn_head) 124 | 125 | def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans): 126 | """ 127 | Args: 128 | im: bgr based input image frame 129 | pos: position of the center of the frame 130 | model_sz: exemplar / target image size 131 | s_z: original / search image size 132 | avg_chans: channel average 133 | Return: 134 | im_patch: sub_windows for the given image input 135 | """ 136 | if isinstance(pos, float): 137 | pos = [pos, pos] 138 | sz = original_sz 139 | im_h, im_w, im_d = im.shape 140 | c = (original_sz + 1) / 2 141 | cx, cy = pos 142 | context_xmin = np.floor(cx - c + 0.5) 143 | context_xmax = context_xmin + sz - 1 144 | context_ymin = np.floor(cy - c + 0.5) 145 | context_ymax = context_ymin + sz - 1 146 | left_pad = int(max(0., -context_xmin)) 147 | top_pad = int(max(0., -context_ymin)) 148 | right_pad = int(max(0., context_xmax - im_w + 1)) 149 | bottom_pad = int(max(0., context_ymax - im_h + 1)) 150 | context_xmin += left_pad 151 | context_xmax += left_pad 152 | context_ymin += top_pad 153 | context_ymax += top_pad 154 | 155 | if any([top_pad, bottom_pad, left_pad, right_pad]): 156 | size = (im_h + top_pad + bottom_pad, im_w + left_pad + right_pad, im_d) 157 | te_im = np.zeros(size, np.uint8) 158 | te_im[top_pad:top_pad + im_h, left_pad:left_pad + im_w, :] = im 159 | if top_pad: 160 | te_im[0:top_pad, left_pad:left_pad + im_w, :] = avg_chans 161 | if bottom_pad: 162 | te_im[im_h + top_pad:, left_pad:left_pad + im_w, :] = avg_chans 163 | if left_pad: 164 | te_im[:, 0:left_pad, :] = avg_chans 165 | if right_pad: 166 | te_im[:, im_w + left_pad:, :] = avg_chans 167 | im_patch = te_im[int(context_ymin):int(context_ymax + 1), 168 | int(context_xmin):int(context_xmax + 1), :] 169 | else: 170 | im_patch = im[int(context_ymin):int(context_ymax + 1), 171 | int(context_xmin):int(context_xmax + 1), :] 172 | 173 | if not np.array_equal(model_sz, original_sz): 174 | im_patch = cv.resize(im_patch, (model_sz, model_sz)) 175 | im_patch = im_patch.transpose(2, 0, 1) 176 | im_patch = im_patch[np.newaxis, :, :, :] 177 | im_patch = im_patch.astype(np.float32) 178 | return im_patch 179 | 180 | def generate_anchor(self, score_size): 181 | """ 182 | Args: 183 | im: bgr based input image frame 184 | pos: position of the center of the frame 185 | model_sz: exemplar / target image size 186 | s_z: original / search image size 187 | avg_chans: channel average 188 | Return: 189 | anchor: anchors for pre-determined values of stride, ratio, and scale 190 | """ 191 | anchors = Anchors(self.anchor_stride, self.anchor_ratios, self.anchor_scales) 192 | anchor = anchors.anchors 193 | x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] 194 | anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1) 195 | total_stride = anchors.stride 196 | anchor_num = anchors.anchor_num 197 | anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) 198 | ori = - (score_size // 2) * total_stride 199 | xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)], 200 | [ori + total_stride * dy for dy in range(score_size)]) 201 | xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ 202 | np.tile(yy.flatten(), (anchor_num, 1)).flatten() 203 | anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) 204 | return anchor 205 | 206 | def _convert_bbox(self, delta, anchor): 207 | """ 208 | Args: 209 | delta: localisation 210 | anchor: anchor of pre-determined anchor size 211 | Return: 212 | delta: prediction of bounding box 213 | """ 214 | delta_transpose = np.transpose(delta, (1, 2, 3, 0)) 215 | delta_contig = np.ascontiguousarray(delta_transpose) 216 | delta = delta_contig.reshape(4, -1) 217 | delta[0, :] = delta[0, :] * anchor[:, 2] + anchor[:, 0] 218 | delta[1, :] = delta[1, :] * anchor[:, 3] + anchor[:, 1] 219 | delta[2, :] = np.exp(delta[2, :]) * anchor[:, 2] 220 | delta[3, :] = np.exp(delta[3, :]) * anchor[:, 3] 221 | return delta 222 | 223 | def _softmax(self, x): 224 | """ 225 | Softmax in the direction of the depth of the layer 226 | """ 227 | x = x.astype(dtype=np.float32) 228 | x_max = x.max(axis=1)[:, np.newaxis] 229 | e_x = np.exp(x-x_max) 230 | div = np.sum(e_x, axis=1)[:, np.newaxis] 231 | y = e_x / div 232 | return y 233 | 234 | def _convert_score(self, score): 235 | """ 236 | Args: 237 | cls: score 238 | Return: 239 | cls: score for cls 240 | """ 241 | score_transpose = np.transpose(score, (1, 2, 3, 0)) 242 | score_con = np.ascontiguousarray(score_transpose) 243 | score_view = score_con.reshape(2, -1) 244 | score = np.transpose(score_view, (1, 0)) 245 | score = self._softmax(score) 246 | return score[:,1] 247 | 248 | def _bbox_clip(self, cx, cy, width, height, boundary): 249 | """ 250 | Adjusting the bounding box 251 | """ 252 | bbox_h, bbox_w = boundary 253 | cx = max(0, min(cx, bbox_w)) 254 | cy = max(0, min(cy, bbox_h)) 255 | width = max(10, min(width, bbox_w)) 256 | height = max(10, min(height, bbox_h)) 257 | return cx, cy, width, height 258 | 259 | def init(self, img, bbox): 260 | """ 261 | Args: 262 | img(np.ndarray): bgr based input image frame 263 | bbox: (x, y, w, h): bounding box 264 | """ 265 | x, y, w, h = bbox 266 | self.center_pos = np.array([x + (w - 1) / 2, y + (h - 1) / 2]) 267 | self.h = h 268 | self.w = w 269 | w_z = self.w + self.track_context_amount * np.add(h, w) 270 | h_z = self.h + self.track_context_amount * np.add(h, w) 271 | s_z = round(np.sqrt(w_z * h_z)) 272 | self.channel_average = np.mean(img, axis=(0, 1)) 273 | z_crop = self.get_subwindow(img, self.center_pos, self.track_exemplar_size, s_z, self.channel_average) 274 | self.model.template(z_crop) 275 | 276 | def track(self, img): 277 | """ 278 | Args: 279 | img(np.ndarray): BGR image 280 | Return: 281 | bbox(list):[x, y, width, height] 282 | """ 283 | w_z = self.w + self.track_context_amount * np.add(self.w, self.h) 284 | h_z = self.h + self.track_context_amount * np.add(self.w, self.h) 285 | s_z = np.sqrt(w_z * h_z) 286 | scale_z = self.track_exemplar_size / s_z 287 | s_x = s_z * (self.track_instance_size / self.track_exemplar_size) 288 | x_crop = self.get_subwindow(img, self.center_pos, self.track_instance_size, round(s_x), self.channel_average) 289 | outputs = self.model.track(x_crop) 290 | score = self._convert_score(outputs['cls']) 291 | pred_bbox = self._convert_bbox(outputs['loc'], self.anchors) 292 | 293 | def change(r): 294 | return np.maximum(r, 1. / r) 295 | 296 | def sz(w, h): 297 | pad = (w + h) * 0.5 298 | return np.sqrt((w + pad) * (h + pad)) 299 | 300 | # scale penalty 301 | s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) / 302 | (sz(self.w * scale_z, self.h * scale_z))) 303 | 304 | # aspect ratio penalty 305 | r_c = change((self.w / self.h) / 306 | (pred_bbox[2, :] / pred_bbox[3, :])) 307 | penalty = np.exp(-(r_c * s_c - 1) * self.track_penalty_k) 308 | pscore = penalty * score 309 | 310 | # window penalty 311 | pscore = pscore * (1 - self.track_window_influence) + \ 312 | self.window * self.track_window_influence 313 | best_idx = np.argmax(pscore) 314 | bbox = pred_bbox[:, best_idx] / scale_z 315 | lr = penalty[best_idx] * score[best_idx] * self.track_lr 316 | 317 | cpx, cpy = self.center_pos 318 | x,y,w,h = bbox 319 | cx = x + cpx 320 | cy = y + cpy 321 | 322 | # smooth bbox 323 | width = self.w * (1 - lr) + w * lr 324 | height = self.h * (1 - lr) + h * lr 325 | 326 | # clip boundary 327 | cx, cy, width, height = self._bbox_clip(cx, cy, width, height, img.shape[:2]) 328 | 329 | # update state 330 | self.center_pos = np.array([cx, cy]) 331 | self.w = width 332 | self.h = height 333 | bbox = [cx - width / 2, cy - height / 2, width, height] 334 | best_score = score[best_idx] 335 | return {'bbox': bbox, 'best_score': best_score} 336 | -------------------------------------------------------------------------------- /YOLO_Detector_SiamRPN++_Tracker_OpenCV_Python/main.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import time 3 | import sys 4 | from SiamRPNpp_tracker import SiamRPNppTracker 5 | 6 | sys.path.append("..") 7 | from YOLO_Detector_OpenCV_Tracker_Python.YOLO_detector import * 8 | 9 | # Main function 10 | if __name__== "__main__": 11 | 12 | is_cuda = len(sys.argv) > 1 and sys.argv[1] == "cuda" 13 | 14 | yolo_detector = YoloDetector(YOLO_V8, is_cuda) 15 | 16 | tracker = None 17 | 18 | capture = cv2.VideoCapture("../Test_Video_Files/road.mp4") 19 | 20 | if not capture.isOpened(): 21 | print("Cannot open video file") 22 | sys.exit() 23 | 24 | ok, frame = capture.read() 25 | if not ok: 26 | print("Cannot read video file") 27 | sys.exit() 28 | 29 | # Initialize calculating FPS 30 | start = time.time_ns() 31 | frame_count = 0 32 | fps = -1 33 | 34 | # Set mouse click callback function 35 | object_clicked = False 36 | object_selected = False 37 | ox, oy = -1, -1 38 | selected_object_bounding_box = None 39 | 40 | def mouse_click(event, x, y, flags, param): 41 | global ox, oy, object_clicked, object_selected 42 | if event == cv2.EVENT_LBUTTONDOWN: 43 | ox, oy = x, y 44 | object_selected = False 45 | object_clicked = True 46 | 47 | cv2.namedWindow("Frame") 48 | cv2.setMouseCallback("Frame", mouse_click) 49 | 50 | while True: 51 | # Read a new frame 52 | ok, frame = capture.read() 53 | if not ok: 54 | break 55 | 56 | if frame is None: 57 | break 58 | 59 | frame_count += 1 60 | 61 | if object_clicked or object_selected: 62 | if object_clicked and not object_selected: 63 | selected_object_bounding_box = None 64 | 65 | tracker = SiamRPNppTracker(is_cuda) 66 | 67 | class_ids, class_names, confidences, boxes = yolo_detector.apply(frame) 68 | 69 | for (class_id, class_name, confidence, box) in zip(class_ids, class_names, confidences, boxes): 70 | x, y, w, h = box 71 | if ox >= x and ox <= x + w and oy >= y and oy <= y + h: 72 | tracker.init(frame, box) 73 | selected_object_bounding_box = box 74 | break 75 | 76 | object_selected = True 77 | object_clicked = False 78 | 79 | if object_selected: 80 | if not (selected_object_bounding_box is None): 81 | output = tracker.track(frame) 82 | selected_object_bounding_box = list(map(int, output['bbox'])) 83 | 84 | if not (selected_object_bounding_box is None): 85 | color = (0, 0, 255) 86 | label = "%s (%d%%)" % (class_name, int(confidence * 100)) 87 | 88 | box = selected_object_bounding_box 89 | cv2.rectangle(frame, box, color, 2) 90 | cv2.rectangle(frame, (box[0], box[1] - 20), (box[0] + box[2], box[1]), color, -1) 91 | cv2.putText(frame, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 0, 0)) 92 | 93 | if frame_count >= 30: 94 | end = time.time_ns() 95 | fps = 1000000000 * frame_count / (end - start) 96 | frame_count = 0 97 | start = time.time_ns() 98 | 99 | if fps > 0: 100 | fps_label = "FPS: %.2f" % fps 101 | cv2.putText(frame, fps_label, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) 102 | 103 | cv2.imshow("Frame", frame) 104 | 105 | key = cv2.waitKey(30) 106 | if key == 27: 107 | break 108 | 109 | capture.release() 110 | cv2.destroyAllWindows() 111 | -------------------------------------------------------------------------------- /YOLO_Detector_SiamRPN++_Tracker_OpenCV_Python/main_SiamRPNpp.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import sys 3 | import time 4 | from SiamRPNpp_tracker import SiamRPNppTracker 5 | 6 | if __name__ == "__main__": 7 | 8 | is_cuda = len(sys.argv) > 1 and sys.argv[1] == "cuda" 9 | 10 | # Open video file 11 | capture = cv2.VideoCapture("../Test_Video_Files/people.mp4") 12 | 13 | if not capture.isOpened(): 14 | print("Cannot open video file") 15 | sys.exit() 16 | 17 | ok, frame = capture.read() 18 | if not ok: 19 | print("Cannot read video file") 20 | sys.exit() 21 | 22 | # Initialize calculating FPS 23 | start = time.time_ns() 24 | frame_count = 0 25 | fps = -1 26 | 27 | # Define an initial bounding box 28 | bounding_box = (10, 10, 100, 100) 29 | 30 | # Select a bounding box 31 | bounding_box = cv2.selectROI(frame, False) 32 | 33 | # Create a OpenCV tracker and Initialize tracker with first frame and bounding box 34 | tracker = SiamRPNppTracker(is_cuda) 35 | tracker.init(frame, bounding_box) 36 | 37 | while True: 38 | # Read a new frame 39 | ok, frame = capture.read() 40 | if not ok: 41 | break 42 | 43 | # Increase frame count 44 | frame_count += 1 45 | 46 | # Update tracker 47 | output = tracker.track(frame) 48 | bounding_box = list(map(int, output['bbox'])) 49 | 50 | # Draw bounding box 51 | if ok: 52 | # Tracking success 53 | p1 = (int(bounding_box[0]), int(bounding_box[1])) 54 | p2 = (int(bounding_box[0] + bounding_box[2]), int(bounding_box[1] + bounding_box[3])) 55 | cv2.rectangle(frame, p1, p2, (255, 0, 0), 2, 1) 56 | else: 57 | # Tracking failure 58 | cv2.putText(frame, "Tracking failure detected", (50, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2) 59 | 60 | # Calculate Frames per second (FPS) 61 | if frame_count >= 30: 62 | end = time.time_ns() 63 | fps = 1000000000 * frame_count / (end - start) 64 | frame_count = 0 65 | start = time.time_ns() 66 | 67 | # Display FPS on frame 68 | if fps > 0: 69 | fps_label = "FPS: %.2f" % fps 70 | cv2.putText(frame, fps_label, (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50, 170, 50), 2) 71 | 72 | # Display result 73 | cv2.imshow("Tracking", frame) 74 | 75 | # Exit if ESC pressed 76 | key = cv2.waitKey(1) & 0xFF 77 | if key == 27: 78 | break 79 | --------------------------------------------------------------------------------