├── .gitignore ├── .style.yapf ├── LICENSE ├── README.md ├── examples ├── kitti_playback.py └── motchallenge_playback.py ├── pymotutils ├── __init__.py ├── algorithms │ ├── __init__.py │ ├── linear_assignment.py │ ├── postprocessing.py │ └── preprocessing.py ├── application │ ├── __init__.py │ ├── application.py │ ├── dataset.py │ └── mono.py ├── contrib │ ├── __init__.py │ ├── datasets │ │ ├── __init__.py │ │ ├── detrac │ │ │ ├── __init__.py │ │ │ └── detrac_devkit.py │ │ ├── kitti │ │ │ ├── __init__.py │ │ │ └── kitti_devkit.py │ │ ├── motchallenge │ │ │ ├── __init__.py │ │ │ └── motchallenge_devkit.py │ │ └── pets2009 │ │ │ ├── __init__.py │ │ │ └── pets2009_devkit.py │ └── detection │ │ ├── __init__.py │ │ └── tensorflow_object_detection_api.py ├── io │ ├── __init__.py │ ├── detrac_io.py │ ├── motchallenge_io.py │ └── pymot_io.py └── visualization │ ├── __init__.py │ ├── opencv.py │ └── util.py ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style=pep8 3 | align_closing_bracket_with_visual_indent=True 4 | allow_multiline_lambdas=True 5 | allow_multiline_dictionary_keys=True 6 | allow_split_before_dict_value=True 7 | blank_line_before_nested_class_or_def=True 8 | #blank_line_before_module_docstring=False 9 | blank_line_before_class_docstring=False 10 | #blank_lines_around_top_level_definitions=True 11 | coalesce_brackets=True 12 | dedent_closing_brackets=False 13 | each_dict_entry_on_separate_line=True 14 | indent_dictionary_value=True 15 | join_multiple_lines=False 16 | no_spaces_around_selected_binary_operators=False 17 | spaces_around_default_or_named_assign=False 18 | spaces_around_power_operator=False 19 | space_between_ending_comma_and_closing_bracket=True 20 | split_arguments_when_comma_terminated=True 21 | split_before_bitwise_operator=False 22 | split_before_closing_bracket=False 23 | split_before_dict_set_generator=False 24 | split_before_expression_after_opening_paren=True 25 | split_before_first_argument=True 26 | split_before_logical_operator=False 27 | split_before_named_assigns=False 28 | split_complex_comprehension=False 29 | use_tabs=False 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pymotutils 2 | 3 | A Python package that provides commonly used functionality when 4 | implementing and testing algorithms for multiple object tracking. 5 | This includes 6 | 7 | * Preprocessing and postprocessing 8 | * Datasets and evaluation toolkits 9 | * Visualization 10 | 11 | # Installation 12 | 13 | The library can be installed with pip: 14 | 15 | ``` 16 | pip install git+https://github.com/nwojke/pymotutils 17 | ``` 18 | 19 | Note that some of the dataset wrappers in ``contrib`` may require additional 20 | packages that are not listed as a dependency. 21 | 22 | ## Example 23 | 24 | The following example downloads the MOT16 dataset and plays back one of 25 | the training sequences: 26 | 27 | ``` 28 | wget https://motchallenge.net/data/MOT16.zip 29 | unzip MOT16.zip -d MOT16 30 | PYTHONPATH=$(pwd) python examples/motchallenge_playback.py \ 31 | --mot_dir=./MOT16/train --sequence=MOT16-02 32 | ``` 33 | 34 | A complete implementation of a tracking method using this utility package 35 | can be found in a seperate [project](https://github.com/nwojke/mcf-tracker). 36 | -------------------------------------------------------------------------------- /examples/kitti_playback.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import argparse 3 | import pymotutils 4 | from pymotutils.contrib.datasets import kitti 5 | 6 | 7 | def parse_args(): 8 | """ Parse command line arguments. 9 | """ 10 | parser = argparse.ArgumentParser(description="KITTI Dataset Playback") 11 | parser.add_argument( 12 | "--kitti_dir", help="Path to KITTI training/testing directory", 13 | required=True) 14 | parser.add_argument( 15 | "--sequence", help="A four digit sequence number", required=True) 16 | parser.add_argument( 17 | "--min_confidence", 18 | help="Detector confidence threshold. All detections with lower " 19 | "confidence are disregarded", type=float, default=None) 20 | return parser.parse_args() 21 | 22 | 23 | def main(): 24 | """Main program entry point.""" 25 | args = parse_args() 26 | 27 | devkit = kitti.Devkit(args.kitti_dir) 28 | data_source = devkit.create_data_source( 29 | args.sequence, kitti.OBJECT_CLASSES_PEDESTRIANS, 30 | min_confidence=args.min_confidence) 31 | 32 | visualization = pymotutils.MonoVisualization( 33 | update_ms=kitti.CAMERA_UPDATE_IN_MS, 34 | window_shape=kitti.CAMERA_IMAGE_SHAPE) 35 | application = pymotutils.Application(data_source) 36 | 37 | # First, play detections. Then, show ground truth tracks. 38 | application.play_detections(visualization) 39 | application.play_track_set(data_source.ground_truth, visualization) 40 | 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /examples/motchallenge_playback.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import argparse 3 | import pymotutils 4 | from pymotutils.contrib.datasets import motchallenge 5 | 6 | TRAJECTORY_VISUALIZATION_LEN_IN_MSECS = 3000.0 7 | 8 | def parse_args(): 9 | """ Parse command line arguments. 10 | """ 11 | parser = argparse.ArgumentParser( 12 | description="MOTChallenge Dataset Playback") 13 | parser.add_argument( 14 | "--mot_dir", help="Path to MOTChallenge train/test directory", 15 | required=True) 16 | parser.add_argument( 17 | "--sequence", help="Name of the sequence to play", required=True) 18 | parser.add_argument( 19 | "--min_confidence", 20 | help="Detector confidence threshold. All detections with lower " 21 | "confidence are disregarded", type=float, default=None) 22 | return parser.parse_args() 23 | 24 | 25 | def main(): 26 | """Main program entry point.""" 27 | args = parse_args() 28 | 29 | devkit = motchallenge.Devkit(args.mot_dir) 30 | data_source = devkit.create_data_source(args.sequence, args.min_confidence) 31 | 32 | # Compute a suitable window shape. 33 | image_shape = data_source.peek_image_shape()[::-1] 34 | aspect_ratio = float(image_shape[0]) / image_shape[1] 35 | window_shape = int(aspect_ratio * 600), 600 36 | 37 | visualization = pymotutils.MonoVisualization( 38 | update_ms=data_source.update_ms, window_shape=window_shape) 39 | visualization.trajectory_visualization_len = int( 40 | TRAJECTORY_VISUALIZATION_LEN_IN_MSECS / data_source.update_ms) 41 | application = pymotutils.Application(data_source) 42 | 43 | # First, play detections. Then, show ground truth tracks. 44 | application.play_detections(visualization) 45 | application.play_track_set(data_source.ground_truth, visualization) 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /pymotutils/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | from .algorithms import preprocessing 4 | from .algorithms import postprocessing 5 | from .algorithms import linear_assignment 6 | 7 | from .application.application import * 8 | from .application.dataset import * 9 | 10 | from .io import detrac_io 11 | from .io import motchallenge_io 12 | from .io import pymot_io 13 | 14 | from .visualization.opencv import * 15 | from .visualization.util import * 16 | 17 | from .application.mono import * 18 | -------------------------------------------------------------------------------- /pymotutils/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /pymotutils/algorithms/linear_assignment.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | This module contains code for solving the linear assingment problem (bipartite 4 | matching) as well as some useful cost functions. 5 | """ 6 | import numpy as np 7 | from sklearn.utils.linear_assignment_ import linear_assignment as la_solver 8 | 9 | _INFTY_COST = 1e+5 10 | _EPS_COST = 1e-5 11 | 12 | 13 | def _intersection_over_union(roi, candidates): 14 | """Computer intersection over union. 15 | 16 | Parameters 17 | ---------- 18 | roi : ndarray 19 | A bounding box in format (top left x, top left y, width, height). 20 | candidates : ndarray 21 | A matrix of candidate bounding boxes (one per row) in the same format 22 | as roi. 23 | 24 | Returns 25 | ------- 26 | ndarray 27 | The intersection over union in [0, 1] between the given roi and each 28 | candidate. A higher score means a larger fraction of the roi is 29 | occluded by the candidate. 30 | 31 | """ 32 | roi_tl, roi_br = roi[:2], roi[:2] + roi[2:] 33 | candidates_tl = candidates[:, :2] 34 | candidates_br = candidates[:, :2] + candidates[:, 2:] 35 | 36 | tl = np.c_[np.maximum(roi_tl[0], candidates_tl[:, 0])[:, np.newaxis], 37 | np.maximum(roi_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 38 | br = np.c_[np.minimum(roi_br[0], candidates_br[:, 0])[:, np.newaxis], 39 | np.minimum(roi_br[1], candidates_br[:, 1])[:, np.newaxis]] 40 | wh = np.maximum(0., br - tl) 41 | 42 | area_intersection = wh.prod(axis=1) 43 | area_roi = roi[2:].prod() 44 | area_candidates = candidates[:, 2:].prod(axis=1) 45 | return area_intersection / (area_roi + area_candidates - area_intersection) 46 | 47 | 48 | def intersection_over_union_cost(rois_a, rois_b): 49 | """Computer intersection over union. 50 | 51 | Parameters 52 | ---------- 53 | rois_a: ndarray 54 | An Nx4 dimensional array of bounding boxes in format (top left x, 55 | top left y, width, height). 56 | An Mx4 dimensional array of bounding boxes in format (top left x, 57 | top left y, width, height). 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | A cost matrix of shape NxM where element (i, j) contains `1 - iou` 63 | between the i-th roi in rois_a and the j-th roi in rois_b (a larger 64 | score means less bounding box overlap). 65 | 66 | """ 67 | cost_matrix = np.zeros((len(rois_a), len(rois_b))) 68 | for i, roi in enumerate(rois_a): 69 | cost_matrix[i, :] = 1.0 - _intersection_over_union(roi, rois_b) 70 | return cost_matrix 71 | 72 | 73 | def pdist(a, b): 74 | """Compute pair-wise squared distance between a and b. 75 | 76 | Parameters 77 | ---------- 78 | a : array_like 79 | A first matrix of row-vectors 80 | b : array_like 81 | A second matrix of row-vectors 82 | 83 | Returns 84 | ------- 85 | ndarray 86 | This functions returns the pair-wise squared distance between points 87 | in a and b. If there are N elements in a and M elements in b, the 88 | function returns a matrix of size NxM, such that element (i, j) returns 89 | the squared distance between a[i] and b[j]. 90 | 91 | """ 92 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 93 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 94 | r2 = np.clip(r2, 0., float(np.inf)) 95 | return r2 96 | 97 | 98 | def min_cost_matching(cost_matrix, max_cost=None): 99 | """Solve a linear assignment problem. 100 | 101 | Parameters 102 | ---------- 103 | cost_matrix : ndarray 104 | An NxM matrix where element (i,j) contains the cost of matching 105 | the i-th element out of the first set of N elements to the j-th element 106 | out of the second set of M elements. 107 | max_cost: float 108 | Gating threshold. Associations with cost larger than this value are 109 | disregarded. 110 | 111 | Returns 112 | ------- 113 | (ndarray, ndarray, ndarray) 114 | Returns a tuple with the following three entries: 115 | * An array of shape Lx2 of matched elements (row index, column index). 116 | * An array of unmatched row indices. 117 | * An array of unmatched column indices. 118 | 119 | """ 120 | if max_cost is not None: 121 | cost_matrix[cost_matrix > max_cost] = max_cost + _EPS_COST 122 | matched_indices = la_solver(cost_matrix) 123 | if max_cost is not None: 124 | row_indices, col_indices = matched_indices[:, 0], matched_indices[:, 1] 125 | mask = cost_matrix[row_indices, col_indices] <= max_cost 126 | matched_indices = matched_indices[mask, :] 127 | 128 | # TODO(nwojke): I think there is a numpy function for the set difference 129 | # that is computed here (it might also be possible to speed this up if 130 | # sklearn preserves the order of indices, which it does?). 131 | unmatched_a = np.array( 132 | list(set(range((cost_matrix.shape[0]))) - set(matched_indices[:, 0]))) 133 | unmatched_b = np.array( 134 | list(set(range((cost_matrix.shape[1]))) - set(matched_indices[:, 1]))) 135 | 136 | return matched_indices, unmatched_a, unmatched_b 137 | -------------------------------------------------------------------------------- /pymotutils/algorithms/postprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | This module contains functionality to postprocess the tracking output. 4 | """ 5 | import numpy as np 6 | import pymotutils 7 | 8 | 9 | def convert_track_set(track_set, detection_converter): 10 | """Create a track set copy with modified detections. 11 | 12 | Using this function, you can modify the detections contained in a track set 13 | using a user-specified converter function. 14 | 15 | For example, you may wish to set the sensor_data attribute to an application 16 | specific field 'roi' in order to interpolate this data: 17 | 18 | >>> track_set = create_my_tracking_hypotheses() 19 | >>> dataset_converter = lambda d: Detection(d.frame_idx, d.roi) 20 | >>> roi_data = convert_track_set(track_set, dataset_converter) 21 | >>> interpolated = interpolate_track_set(roi_data) 22 | 23 | Parameters 24 | ---------- 25 | track_set : TrackSet 26 | The input track set. 27 | detection_converter : Callable[Detection] -> Detection 28 | The converter function. This is called once for each detection contained 29 | in the track set. 30 | 31 | Returns 32 | ------- 33 | TrackSet 34 | Returns the converted track set with the same structure as the input 35 | track set, but where each detection has been converted. 36 | 37 | """ 38 | result = pymotutils.TrackSet() 39 | for tag, track in track_set.tracks.items(): 40 | result_track = result.create_track(tag) 41 | for detection in track.detections.values(): 42 | result_track.add(detection_converter(detection)) 43 | return result 44 | 45 | 46 | def interpolate_track_set(track_set): 47 | """Interpolate sensor data in given track set. 48 | 49 | This method uses linear interpolation to fill missing detections in each 50 | track of the given track set. Each dimension of the sensor data is 51 | interpolated independently of all others. 52 | 53 | For example, if the sensor data contains 3-D positions, then the X, Y, and Z 54 | coordinates of the trajectory are interpolated linearily. The same method 55 | works fairly well for image regions as well. 56 | 57 | Parameters 58 | ---------- 59 | track_set : TrackSet 60 | The track set to be interpolated. The sensor data must be an array_like 61 | (ndim=1). 62 | 63 | Returns 64 | ------- 65 | TrackSet 66 | The interpolated track set, where each target is visible from the first 67 | frame of appearance until leaving the scene. 68 | 69 | """ 70 | interp_set = pymotutils.TrackSet() 71 | for tag, track in track_set.tracks.items(): 72 | first, last = track.first_frame_idx(), track.last_frame_idx() 73 | frame_range = np.arange(first, last + 1) 74 | xp = sorted(list(track.detections.keys())) 75 | 76 | if len(xp) == 0: 77 | continue # This is an empty trajectory. 78 | 79 | sensor_data = np.asarray([track.detections[i].sensor_data for i in xp]) 80 | fps = [sensor_data[:, i] for i in range(sensor_data.shape[1])] 81 | interps = [np.interp(frame_range, xp, fp) for fp in fps] 82 | 83 | itrack = interp_set.create_track(tag) 84 | for i, frame_idx in enumerate(frame_range): 85 | sensor_data = np.array([interp[i] for interp in interps]) 86 | do_not_care = frame_idx not in track.detections 87 | itrack.add( 88 | pymotutils.Detection(frame_idx, sensor_data, do_not_care)) 89 | return interp_set 90 | -------------------------------------------------------------------------------- /pymotutils/algorithms/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | This module contains functionality to preprocess detections. 4 | """ 5 | import numpy as np 6 | 7 | 8 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 9 | """Suppress overlapping detections. 10 | 11 | Original code from [1]_ has been adapted to include confidence score. 12 | 13 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 14 | faster-non-maximum-suppression-python/ 15 | 16 | Examples 17 | -------- 18 | 19 | >>> boxes = [d.roi for d in detections] 20 | >>> scores = [d.confidence for d in detections] 21 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 22 | >>> detections = [detections[i] for i in indices] 23 | 24 | Parameters 25 | ---------- 26 | boxes : ndarray 27 | Array of ROIs (x, y, width, height). 28 | max_bbox_overlap : float 29 | ROIs that overlap more than this values are suppressed. 30 | scores : Optional[array_like] 31 | Detector confidence score. 32 | 33 | Returns 34 | ------- 35 | ndarray 36 | Returns indices of detections that have survived non-maxima suppression. 37 | 38 | """ 39 | if len(boxes) == 0: 40 | return [] 41 | 42 | boxes = boxes.astype(np.float) 43 | pick = [] 44 | 45 | x1 = boxes[:, 0] 46 | y1 = boxes[:, 1] 47 | x2 = boxes[:, 2] + boxes[:, 0] 48 | y2 = boxes[:, 3] + boxes[:, 1] 49 | 50 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 51 | if scores is not None: 52 | idxs = np.argsort(scores) 53 | else: 54 | idxs = np.argsort(y2) 55 | 56 | while len(idxs) > 0: 57 | last = len(idxs) - 1 58 | i = idxs[last] 59 | pick.append(i) 60 | 61 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 62 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 63 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 64 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 65 | 66 | w = np.maximum(0, xx2 - xx1 + 1) 67 | h = np.maximum(0, yy2 - yy1 + 1) 68 | 69 | overlap = (w * h) / area[idxs[:last]] 70 | 71 | idxs = np.delete( 72 | idxs, 73 | np.concatenate(([last], np.where(overlap > max_bbox_overlap)[0]))) 74 | 75 | return np.asarray(pick) 76 | 77 | 78 | def filter_detections( 79 | detection_dict, min_confidence=None, min_width=None, min_height=None, 80 | max_width=None, max_height=None): 81 | """Filter detections by detector confidencen and extent. 82 | 83 | Parameters 84 | ---------- 85 | detection_dict : Dict[int, List[Detection]] 86 | A dictionary that maps from frame index to a list of detections. 87 | min_confidence : Optional[float] 88 | If not None, discards detections with confidence lower than this 89 | value. 90 | min_width : Optional[float] 91 | If not None, discards detections with width lower than this value. 92 | min_height : Optional[float] 93 | If not None, discards detections with height lower than this value. 94 | max_width: Optional[float] 95 | If not None, discards detections with width lower than this value. 96 | max_height: Optional[float] 97 | If not None, discards detections with height lower than this value. 98 | 99 | Returns 100 | ------- 101 | Dict[int, List[Detection]] 102 | Returns the dictionary of filtered detections. 103 | 104 | """ 105 | if min_width is None: 106 | min_width = -np.inf 107 | if min_height is None: 108 | min_height = -np.inf 109 | if max_width is None: 110 | max_width = np.inf 111 | if max_height is None: 112 | max_height = np.inf 113 | 114 | def filter_fn(detection): 115 | if min_confidence is not None and detection.confidence < min_confidence: 116 | return False 117 | if detection.roi[2] < min_width or detection.roi[2] > max_width: 118 | return False 119 | if detection.roi[3] < min_height or detection.roi[3] > max_height: 120 | return False 121 | return True 122 | 123 | filtered_detection_dict = {} 124 | for frame_idx in detection_dict.keys(): 125 | filtered_detection_dict[frame_idx] = list( 126 | filter(filter_fn, detection_dict[frame_idx])) 127 | return filtered_detection_dict 128 | -------------------------------------------------------------------------------- /pymotutils/application/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nwojke/pymotutils/e702a76f552b641d50bab7d3ee48292650796d49/pymotutils/application/__init__.py -------------------------------------------------------------------------------- /pymotutils/application/application.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | This module contains base classes for tracking applications that allow for 4 | easy integration of new datasets and tracking algorithms. The application 5 | structure is divided into data acquisition, tracking, visualization, and 6 | application glue code. 7 | 8 | The modules application base class performs much of the often replicated 9 | functionality for running state estimation, data association, and evaluation. 10 | The module also contains abstract base classes that define the interface for 11 | data acquisition, tracking, and visualization. 12 | """ 13 | import six 14 | import abc 15 | import time 16 | import pymotutils 17 | 18 | 19 | @six.add_metaclass(abc.ABCMeta) 20 | class DataSource(object): 21 | """ 22 | This is an abstract base class that defines the interface between any data 23 | sources, e.g., a public dataset, and the application base class provided by 24 | this module. 25 | 26 | Attributes 27 | ---------- 28 | """ 29 | 30 | @abc.abstractmethod 31 | def first_frame_idx(self): 32 | """Get index of the first frame (usually 0). 33 | 34 | Returns 35 | ------- 36 | int 37 | Index of the first frame. 38 | 39 | """ 40 | raise NotImplementedError("abstract base class") 41 | 42 | @abc.abstractmethod 43 | def last_frame_idx(self): 44 | """Get index of the last frame or None, if data source has no defined 45 | end. 46 | 47 | Returns 48 | ------- 49 | int | NoneType 50 | Index of the last frame or None. 51 | 52 | """ 53 | raise NotImplementedError("abstract base class") 54 | 55 | def num_frames(self): 56 | """Get number of frames in the data source or None, if there is no 57 | defined end. 58 | 59 | Returns 60 | ------- 61 | int 62 | Number of frames in the data source or None. 63 | 64 | """ 65 | if self.last_frame_idx() is None: 66 | return None 67 | else: 68 | return self.last_frame_idx() - self.first_frame_idx() + 1 69 | 70 | @abc.abstractmethod 71 | def read_frame_data(self, frame_idx): 72 | """Read a given frame into memory. 73 | 74 | This method is called by the application base class to read data of a 75 | particular frame into memory. The data is returned as a dictionary 76 | that must contain all data that is necessary for visualization 77 | of tracking results (see :class:`Visualization`) and at least the 78 | following:: 79 | 80 | * "detections": The set of detections at the given time step. This is 81 | passed on to the tracker. 82 | * "timestamp": The timestamp of the given frame. This is used to 83 | configure the trackers motion model. 84 | 85 | Optionally, the frame data may contain:: 86 | 87 | * "sensor_pose": sensor pose at the current time step, this is 88 | passed on to the tracker. By convention, this should be an affine 89 | transformation matrix. 90 | * "ground_truth": ground truth data over the entire sequence, i.e., a 91 | :class:`TrackSet` that contains the multi-target ground truth 92 | trajectory of the entire sequence. This item should contain the full 93 | track set (over the entire sequence) for all given frame_idx. 94 | 95 | By convention, we currently use the following attribute names for 96 | visualization-dependent data:: 97 | 98 | * "bgr_image": a single color image in BGR color space 99 | * "bgr_images": color images of multiple sensors, all in BGR color space 100 | * "disparity_image": a single disparity image 101 | * "disparity_images": disparity images of multiple sensors 102 | 103 | Parameters 104 | ---------- 105 | frame_idx : int 106 | The index of the frame to load. 107 | 108 | Returns 109 | ------- 110 | Dict[str, T] 111 | This method returns a dictionary of frame-dependent data, such 112 | as timestamps, detections, etc. See description above. 113 | 114 | """ 115 | raise NotImplementedError("abstract base class") 116 | 117 | 118 | @six.add_metaclass(abc.ABCMeta) 119 | class Visualization(object): 120 | """ 121 | This is an abstract class that defines the interface between the modules 122 | application base class and visualization of tracking results. 123 | 124 | During visualization, the control flow is handed over from the application 125 | to the visualization object. Therefore, every concrete implementation of 126 | this class must provide a control loop that iterates over the entire 127 | sequence of data. 128 | """ 129 | 130 | @abc.abstractmethod 131 | def run(self, start_idx, end_idx, frame_callback): 132 | """Run visualization between a given range of frames. 133 | 134 | This method is called by the application base class. At this point, the 135 | application hands over control to the visualization, which is expected 136 | to call the given callback at each frame. From within the callback, the 137 | application will call the visualization routines declared in this 138 | class. 139 | 140 | Parameters 141 | ---------- 142 | start_idx : int 143 | The index of the first frame to show. 144 | end_idx : Optional[int] 145 | One plus the index of the last frame to show. If None given, the 146 | visualization should run forever or until the user has requested 147 | to terminate. 148 | frame_callback : Callable[int] -> None 149 | A callback that must be invoked at each frame from start_idx to 150 | end_idx - 1. As argument, the index of the current frame should 151 | be passed. 152 | 153 | """ 154 | raise NotImplementedError("abstract base class") 155 | 156 | @abc.abstractmethod 157 | def init_frame(self, frame_data): 158 | """Initialize visualization routines. 159 | 160 | This method is called once at the beginning of each frame. 161 | 162 | .. see: `class:`DataSource` 163 | 164 | Parameters 165 | ---------- 166 | frame_data : Dict[str, T] 167 | The dictionary of frame-dependent data. See :class:`DataSource` 168 | for more information. 169 | 170 | """ 171 | raise NotImplementedError("abstract base class") 172 | 173 | @abc.abstractmethod 174 | def finalize_frame(self): 175 | """Finalize visualization routines. 176 | 177 | This method is called once at the end of each frame. 178 | 179 | """ 180 | raise NotImplementedError("abstract base class") 181 | 182 | @abc.abstractmethod 183 | def draw_detections(self, detections): 184 | """Draw detections at the current time step. 185 | 186 | Parameters 187 | ---------- 188 | detections : List[Detection] 189 | The set of detections at the current time step. The concrete type 190 | is application specific. 191 | 192 | """ 193 | raise NotImplementedError("abstract base class") 194 | 195 | @abc.abstractmethod 196 | def draw_online_tracking_output(self, tracker): 197 | """Draw online tracking results. 198 | 199 | Called once every frame after all processing has been done. 200 | 201 | Parameters 202 | ---------- 203 | tracker : Tracker 204 | The multi-target tracker. 205 | 206 | """ 207 | raise NotImplementedError("abstract base class") 208 | 209 | @abc.abstractmethod 210 | def draw_track_set(self, frame_idx, track_set): 211 | """Draw a set of tracks at the current time step. 212 | 213 | Parameters 214 | ---------- 215 | frame_idx : int 216 | Index of the current frame that should be visualized. 217 | track_set : TrackSet 218 | The set of tracks to visualize. The concrete type of sensor data 219 | that is contained in the track set is application dependent. 220 | 221 | """ 222 | raise NotImplementedError("abstract base class") 223 | 224 | 225 | class NoVisualization(Visualization): 226 | """ 227 | A simple visualization object that loops through the sequence without 228 | showing any results. 229 | """ 230 | 231 | def run(self, start_idx, end_idx, frame_callback): 232 | """Initiate control loop for the given number of frames. 233 | 234 | Parameters 235 | ---------- 236 | start_idx : int 237 | Index of the first frame to process. 238 | end_idx : Optional[int] 239 | One plus the index of the last frame to process. If None given, 240 | control is executed in an endless loop. 241 | frame_callback : Callable[int] -> None 242 | A callable that is invoked at each frame. As argument, the index 243 | of the current frame is passed. 244 | 245 | """ 246 | for frame_idx in range(start_idx, end_idx): 247 | print("Frame index: %d / %d" % (frame_idx, end_idx)) 248 | frame_callback(frame_idx) 249 | 250 | def init_frame(self, frame_data): 251 | pass 252 | 253 | def finalize_frame(self): 254 | pass 255 | 256 | def draw_detections(self, detections): 257 | pass 258 | 259 | def draw_online_tracking_output(self, tracker): 260 | pass 261 | 262 | def draw_track_set(self, frame_idx, track_set): 263 | pass 264 | 265 | 266 | @six.add_metaclass(abc.ABCMeta) 267 | class Tracker(object): 268 | """ 269 | This is the abstract base class of tracking algorithms. The class defines 270 | a common interface that is enforced by the application base class. 271 | 272 | You can assume that data is processed sequentially, one frame at a time. 273 | 274 | """ 275 | 276 | @abc.abstractmethod 277 | def reset(self, start_idx, end_idx): 278 | """Reset the tracker. 279 | 280 | This method is called once before processing the data to inform the 281 | tracker that a new sequence will be processed. 282 | 283 | Parameters 284 | ---------- 285 | start_idx : int 286 | Index of the first frame of upcoming sequence. 287 | end_idx : Optional[int] 288 | One plus index of the last frame of the upcoming sequence, or None 289 | if there is no predefined end. 290 | 291 | """ 292 | raise NotImplementedError("abstract base class") 293 | 294 | @abc.abstractmethod 295 | def process_frame(self, frame_data): 296 | """Process incoming detections of a new frame. 297 | 298 | This method is called once for every frame, sequentially. 299 | 300 | Parameters 301 | ---------- 302 | frame_data : Dict[str, T] 303 | The dictionary of frame-dependent data. See :class:`DataSource` 304 | for more information. Must include a `timestamp`, a list of 305 | `detections` and optionally `sensor_pose`. 306 | 307 | Returns 308 | ------- 309 | Dict[int, tracking.track_hypothesis.TrackHypothesis] | NoneType 310 | Returns a dictionary that maps from track identity to track 311 | hypothesis, or None if identities cannot be resolved online. 312 | 313 | """ 314 | raise NotImplementedError("abstract base class") 315 | 316 | @abc.abstractmethod 317 | def compute_trajectories(self): 318 | """Compute trajectories. 319 | 320 | This method is called once at the end of the sequence to obtain 321 | target trajectories. 322 | 323 | Parameters 324 | ---------- 325 | 326 | Returns 327 | ------- 328 | List[List[Detection]] 329 | A list of target trajectories, where each target trajectory is 330 | a list of detections that belong to the same object. 331 | 332 | The sensor_data field in each detection should be compatible with 333 | the concrete implementation of track set visualization. 334 | 335 | """ 336 | raise NotImplementedError("abstract base class") 337 | 338 | 339 | class Application(object): 340 | """ 341 | This is the application base class that provides functionality for running 342 | a (multi-)target tracker on a particular data source and for evaluation 343 | against ground truth data. 344 | 345 | Parameters 346 | ---------- 347 | data_source : DataSource 348 | The concrete data source of the experiment. 349 | 350 | Attributes 351 | ---------- 352 | data_source : DataSource 353 | The data source of this application. 354 | hypotheses : TrackSet 355 | Track hypotheses recorded during last execution. 356 | ground_truth : TrackSet 357 | Ground truth tracks recorded during last execution. 358 | 359 | """ 360 | 361 | def __init__(self, data_source): 362 | assert isinstance( 363 | data_source, DataSource), "data_source is of wrong type" 364 | self.data_source = data_source 365 | self.hypotheses = pymotutils.TrackSet() 366 | self.ground_truth = pymotutils.TrackSet() 367 | 368 | self._visualization = None 369 | self._playback_trackset = pymotutils.TrackSet() 370 | self._tracker = None 371 | self._prev_timestep = None 372 | 373 | def play_track_set( 374 | self, track_set, visualization, start_idx=None, end_idx=None): 375 | """Loop through dataset and visualize a given track set. 376 | 377 | This method calls visualization routines for drawing the detections 378 | contained in the given track set. 379 | 380 | Parameters 381 | ---------- 382 | track_set : TrackSet 383 | The set of tracks to visualize. 384 | visualization : Visualization 385 | A concrete implementation of Visualization that draws the track set. 386 | start_idx : Optional[int] 387 | Index of the first frame. Defaults to 0. 388 | end_idx : Optional[int] 389 | One plus index of the last frame. Defaults to the number of frames 390 | in the data source. 391 | 392 | """ 393 | assert isinstance( 394 | visualization, Visualization), "visualization is of wrong type" 395 | start_idx = ( 396 | start_idx 397 | if start_idx is not None else self.data_source.first_frame_idx()) 398 | 399 | source_end_idx = ( 400 | self.data_source.last_frame_idx() + 1 401 | if self.data_source.last_frame_idx() is not None else None) 402 | end_idx = (end_idx if end_idx is not None else source_end_idx) 403 | 404 | self._visualization = visualization 405 | self._playback_trackset = track_set 406 | visualization.run(start_idx, end_idx, self._next_frame_playback) 407 | 408 | def play_groundtruth(self, visualization, start_idx=None, end_idx=None): 409 | """Play ground truth. 410 | 411 | This method visualizes the ground truth data that has been collected 412 | during the last run of process_data. If process_data has not been 413 | executed before calling this function, the ground truth will be empty. 414 | 415 | If you want to play the full ground truth data without evaluating 416 | a tracker, you can call play_track_set on data contained in the 417 | data source:: 418 | 419 | >>> ground_truth = my_data_source.read_frame_data(0)["ground_truth"] 420 | >>> app = Application(my_data_source) 421 | >>> app.play_track_set(ground_truth) 422 | 423 | This works, because by convention the ground_truth returned for a 424 | particular frame always contains the etire track set. 425 | 426 | Parameters 427 | ---------- 428 | visualization : Visualization 429 | A concrete implementation of Visualization that draws the track set. 430 | start_idx : Optional[int] 431 | Index of the first frame. Defaults to 0. 432 | end_idx : Optional[int] 433 | One plus index of the last frame. Defaults to the number of frames 434 | in the data source. 435 | 436 | """ 437 | self.play_track_set( 438 | self.ground_truth, visualization, start_idx, end_idx) 439 | 440 | def play_hypotheses(self, visualization, start_idx=None, end_idx=None): 441 | """Play tracking results. 442 | 443 | This method visualizes the tracking results that has been collected 444 | during the last run of process_data. If process_data has not been 445 | executed before calling this function, the tracking results will be 446 | empty. 447 | 448 | Parameters 449 | ---------- 450 | visualization : Visualization 451 | A concrete implementation of Visualization that draws the track set. 452 | start_idx : Optional[int] 453 | Index of the first frame. Defaults to 0. 454 | end_idx : Optional[int] 455 | One plus index of the last frame. Defaults to the number of frames 456 | in the data source. 457 | 458 | """ 459 | self.play_track_set(self.hypotheses, visualization, start_idx, end_idx) 460 | 461 | def _next_frame_playback(self, frame_idx): 462 | frame_data = self.data_source.read_frame_data(frame_idx) 463 | self._visualization.init_frame(frame_data) 464 | self._visualization.draw_track_set(frame_idx, self._playback_trackset) 465 | self._visualization.finalize_frame() 466 | 467 | def play_detections(self, visualization, start_idx=None, end_idx=None): 468 | """Show detections. 469 | 470 | Parameters 471 | ---------- 472 | visualization : Visualization 473 | A concrete implementation of Visualization that draws the 474 | detections. 475 | start_idx : Optional[int] 476 | Index of the first frame. Defaults to 0. 477 | end_idx 478 | One plus index of the last frame. Defauls to the number of frames 479 | in the data source. 480 | 481 | """ 482 | assert isinstance( 483 | visualization, Visualization), "visualization is of wrong type" 484 | start_idx = ( 485 | start_idx 486 | if start_idx is not None else self.data_source.first_frame_idx()) 487 | 488 | source_end_idx = ( 489 | self.data_source.last_frame_idx() + 1 490 | if self.data_source.last_frame_idx() is not None else None) 491 | end_idx = (end_idx if end_idx is not None else source_end_idx) 492 | 493 | self._visualization = visualization 494 | visualization.run(start_idx, end_idx, self._next_frame_detections) 495 | 496 | def _next_frame_detections(self, frame_idx): 497 | frame_data = self.data_source.read_frame_data(frame_idx) 498 | self._visualization.init_frame(frame_data) 499 | self._visualization.draw_detections(frame_data["detections"]) 500 | self._visualization.finalize_frame() 501 | 502 | def process_data( 503 | self, tracker, visualization=None, start_idx=None, end_idx=None): 504 | """Process a batch of frames. 505 | 506 | This method runs the given tracker on a sequence of data and collects 507 | the ground truth detections contained within. 508 | 509 | Parameters 510 | ---------- 511 | tracker : Tracker 512 | A concrete tracking implementation. 513 | visualization : Visualization 514 | A concrete implementation of Visualization that draws detections 515 | and state estimates. 516 | start_idx : Optional[int] 517 | Index of the first frame. Defaults to 0. 518 | end_idx : Optional[int] 519 | One plus index of the last frame. Defaults to the number of frames 520 | in the data source. 521 | 522 | """ 523 | if visualization is None: 524 | visualization = NoVisualization() 525 | assert isinstance( 526 | visualization, Visualization), "visualization is of wrong type" 527 | assert isinstance(tracker, Tracker), "tracker is of wrong type" 528 | 529 | start_idx = ( 530 | start_idx 531 | if start_idx is not None else self.data_source.first_frame_idx()) 532 | end_idx = ( 533 | end_idx 534 | if end_idx is not None else self.data_source.last_frame_idx() + 1) 535 | 536 | self._visualization = visualization 537 | self.ground_truth = pymotutils.TrackSet() 538 | self.hypotheses = pymotutils.TrackSet() 539 | self._tracker = tracker 540 | 541 | self._tracker.reset(start_idx, end_idx) 542 | self._visualization.run( 543 | start_idx, end_idx, self._next_frame_process_data) 544 | 545 | def _next_frame_process_data(self, frame_idx): 546 | frame_data = self.data_source.read_frame_data(frame_idx) 547 | detections = frame_data["detections"] 548 | 549 | t0 = time.time() 550 | self._tracker.process_frame(frame_data) 551 | t1 = time.time() 552 | print("Processing time for this frame:", 1e3 * (t1 - t0), "ms") 553 | 554 | self._visualization.init_frame(frame_data) 555 | self._visualization.draw_detections(detections) 556 | self._visualization.draw_online_tracking_output(self._tracker) 557 | self._visualization.finalize_frame() 558 | 559 | def compute_trajectories(self, interpolation, detection_converter=None): 560 | """Compute trajectories on the sequence of data that has previously been 561 | processed. 562 | 563 | You must call process_data before computing trajectories, otherwise 564 | this method will fail. 565 | 566 | Optionally, you can pass in a function for interpolating tracking 567 | results. Prior to interpolation, you may convert tracking results 568 | using a user-specified function. 569 | 570 | See 571 | ---- 572 | interpolate_track_set 573 | convert_track_set 574 | 575 | Parameters 576 | ---------- 577 | interpolation : bool | Callable[TrackSet] -> TrackSet 578 | If True, track hypotheses and ground truth data will be 579 | interpolated (i.e., missed detections will be filled). 580 | If False, track hypotheses and ground truth will not be 581 | interpolated. 582 | Alternatively, you can pass in a function to use for track set 583 | interpolation. 584 | detection_converter : Optional[Callable[Detection] -> Detection] 585 | A converter function that is called once for each detection in order 586 | to convert tracking results to a format that is suitable for 587 | evaluation. This function is called prior to interpolation. 588 | 589 | Returns 590 | ------- 591 | List[List[Detection]] 592 | Returns a list of trajectories, where each trajectory is a 593 | sequence of detections that belong to the same object. 594 | 595 | """ 596 | trajectories = self._tracker.compute_trajectories() 597 | self.hypotheses = pymotutils.TrackSet() 598 | for i, trajectory in enumerate(trajectories): 599 | track = self.hypotheses.create_track(i) 600 | for detection in trajectory: 601 | track.add(detection) 602 | 603 | if detection_converter is not None: 604 | self.hypotheses = pymotutils.postprocessing.convert_track_set( 605 | self.hypotheses, detection_converter) 606 | 607 | if not isinstance(interpolation, bool): 608 | self.ground_truth = interpolation(self.ground_truth) 609 | self.hypotheses = interpolation(self.hypotheses) 610 | elif interpolation: 611 | interpolation = pymotutils.postprocessing.interpolate_track_set 612 | self.ground_truth = interpolation(self.ground_truth) 613 | self.hypotheses = interpolation(self.hypotheses) 614 | 615 | return trajectories 616 | -------------------------------------------------------------------------------- /pymotutils/application/dataset.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | The data structures in this module may be used to store detections, ground 4 | truth data, and tracking hypotheses for a particular dataset. 5 | """ 6 | import numpy as np 7 | import pymotutils 8 | 9 | 10 | class Detection(object): 11 | """This is a container class for object detections and ground truth data. 12 | 13 | Parameters 14 | ---------- 15 | frame_idx : int 16 | Index of the frame at which the detection occured 17 | sensor_data : array_like 18 | Sensor data (application dependent). Could be, e.g., a point 19 | measurement or a region of interest in an image. The sensor data 20 | must be representable as vector of floats. If you have other 21 | application data (e.g., for visualization), you can pass them 22 | through kwargs. 23 | do_not_care : bool 24 | This flag indicates whether this detection should be included 25 | in evaluation. 26 | If True, missing this detection will be counted as false negative. If 27 | False, missing this detection will not have a negative impact on 28 | tracking performance. Therefore, this flag can be used to mark hard to 29 | detect objects (such as full occlusions) in ground truth. 30 | kwargs : Optional[Dict[str, T]] 31 | Optional keyname arguments that will be accissible as attributes of 32 | this class. 33 | 34 | Attributes 35 | ---------- 36 | frame_idx : int 37 | Index of the frame at which the detection occured. 38 | sensor_data : ndarray 39 | Sensor data (application dependent). Could be, e.g., a point 40 | measurement or a region of interest in an image. 41 | do_not_care : bool 42 | This flag indicates whether this detection is should be included 43 | in evaluation. 44 | If True, missing this detection will be counted as false negative. 45 | If False, missing this detection will not have an impact on tracking 46 | performance. 47 | Therefore, this flag can be used to mark hard to detect objects (such 48 | as full occlusions) in ground truth. 49 | 50 | """ 51 | 52 | def __init__(self, frame_idx, sensor_data, do_not_care=False, **kwargs): 53 | self.frame_idx = frame_idx 54 | self.sensor_data = np.asarray(sensor_data) 55 | self.do_not_care = do_not_care 56 | for key, attr in kwargs.items(): 57 | setattr(self, key, attr) 58 | 59 | 60 | class Track(object): 61 | """A track is an object identity that appears within the dataset. 62 | 63 | Parameters 64 | ---------- 65 | detections : Optional[Dict[int, Detection]] 66 | A dictionary of detections. The key is the frame index at which the 67 | detection occured. The value is the detection object itself. 68 | If None, an empty dictionary is created. 69 | 70 | Attributes 71 | ---------- 72 | detections : Dict[int, Detection] 73 | A dictionary of detections. The key is the frame index at which the 74 | detection occured, the value is the detection object itself. 75 | You may directly modify this attribute. 76 | 77 | """ 78 | 79 | def __init__(self, detections=None): 80 | if detections is None: 81 | detections = {} 82 | self.detections = detections 83 | 84 | def add(self, detection): 85 | """Add a detection to the track. 86 | 87 | Parameters 88 | ---------- 89 | detection : Detection 90 | The detection to add. 91 | 92 | """ 93 | assert isinstance(detection, Detection), "Detection is of wrong type" 94 | assert detection.frame_idx not in self.detections, "duplicate frame_idx" 95 | self.detections[detection.frame_idx] = detection 96 | 97 | def first_frame_idx(self): 98 | """Get index of the first frame at which the object appears. 99 | 100 | Returns 101 | ------- 102 | int 103 | Index of the first frame at which the object appears. 104 | 105 | """ 106 | return 0 if len(self.detections) == 0 else min(self.detections.keys()) 107 | 108 | def last_frame_idx(self): 109 | """Get index of the last frame at which the object is present. 110 | 111 | Returns 112 | ------- 113 | int 114 | Index of the last frame at which the object is present. 115 | 116 | """ 117 | return 0 if len(self.detections) == 0 else max(self.detections.keys()) 118 | 119 | def num_frames(self): 120 | """Get the total number of frames this object appears in, including 121 | occlusions. 122 | 123 | Returns 124 | ------- 125 | int 126 | self.last_frame_idx() - self.first_frame_idx() + 1 127 | 128 | """ 129 | return self.last_frame_idx() - self.first_frame_idx() + 1 130 | 131 | def frame_range(self): 132 | """ Get range from first frame of appearance to last frame of presence. 133 | 134 | Returns 135 | ------- 136 | range 137 | >>> range(self.first_frame_idx(), self.last_frame_idx() + 1) 138 | 139 | """ 140 | start, end = self.first_frame_idx(), self.last_frame_idx() + 1 141 | return range(start, end) 142 | 143 | def is_in_frame(self, frame_idx): 144 | """Test whether the object has been detected in a given frame. 145 | 146 | Parameters 147 | ---------- 148 | frame_idx : int 149 | Index of the frame to test against. 150 | 151 | Returns 152 | ------- 153 | bool 154 | True if the object is present at the given frame. 155 | 156 | """ 157 | return frame_idx in self.detections 158 | 159 | 160 | class TrackSet(object): 161 | """ 162 | A set of multiple tracks. Each track is identified by a unique index (tag). 163 | 164 | Parameters 165 | ---------- 166 | tracks : Optional[Dict[int, Track]] 167 | Mapping from track identifier (also called track id or tag) to Track 168 | object. If None, an empty dictionary is created. 169 | 170 | Attributes 171 | ---------- 172 | tracks : Dict[int, Track] 173 | Mapping from track id (also called tag) to Track object. You may 174 | directly manipulate this attribute. 175 | 176 | """ 177 | 178 | def __init__(self, tracks=None): 179 | self.tracks = tracks if tracks is not None else {} 180 | 181 | def create_track(self, tag): 182 | """Create a new track. 183 | 184 | The newly created track is added to the track set. 185 | 186 | Parameters 187 | ---------- 188 | tag : int 189 | A unique object identifier. None of the existing tracks must share 190 | the same tag. 191 | 192 | Returns 193 | ------- 194 | Track 195 | The newly created track. 196 | 197 | """ 198 | assert tag not in self.tracks, "track with tag %d exists already" % tag 199 | self.tracks[tag] = Track() 200 | return self.tracks[tag] 201 | 202 | def first_frame_idx(self): 203 | """Get the index of the first frame at which any object is present. 204 | 205 | Returns 206 | ------- 207 | int 208 | Index of the first frame at which any object is present. 209 | 210 | """ 211 | return 0 if len(self.tracks) == 0 else min( 212 | track.first_frame_idx() for track in self.tracks.values()) 213 | 214 | def last_frame_idx(self): 215 | """Get the index of the last frame at which any object is present. 216 | 217 | Returns 218 | ------- 219 | int 220 | Index of the last frame at which any object is present. 221 | 222 | """ 223 | return 0 if len(self.tracks) == 0 else max( 224 | track.last_frame_idx() for track in self.tracks.values()) 225 | 226 | def num_frames(self): 227 | """Get the total number of frames at which at least one object is 228 | present, including occlusions. 229 | 230 | Returns 231 | ------- 232 | int 233 | self.last_frame_idx() - self.first_frame_idx() + 1 234 | 235 | """ 236 | return self.last_frame_idx() - self.first_frame_idx() + 1 237 | 238 | def frame_range(self): 239 | """ Get range of frames where at least one object is present. 240 | 241 | Returns 242 | ------- 243 | range 244 | Range from first frame at which any object appears to last frame at 245 | which any object is present:: 246 | 247 | >>> range(self.first_frame_idx(), self.last_frame_idx() + 1) 248 | 249 | """ 250 | start, end = self.first_frame_idx(), self.last_frame_idx() + 1 251 | return range(start, end) 252 | 253 | def collect_detections(self, frame_idx): 254 | """Collect all detections for a given frame index. 255 | 256 | Parameters 257 | ---------- 258 | frame_idx : int 259 | Index of the frame for which to collect detections. 260 | 261 | Returns 262 | ------- 263 | Dict[int, Detection] 264 | A mapping from track identifier (tag) to Detection. 265 | """ 266 | detections = {} 267 | for tag, track in self.tracks.items(): 268 | if frame_idx not in track.detections: 269 | continue 270 | detections[tag] = track.detections[frame_idx] 271 | return detections 272 | 273 | def collect_sensor_data(self, frame_idx): 274 | """Collect all sensor data for a given frame index. 275 | 276 | Parameters 277 | ---------- 278 | frame_idx : int 279 | Index of the frame for which to collect sensor data. 280 | 281 | Returns 282 | ------- 283 | Dict[int, T] 284 | A mapping from track identifier (tag) to sensor data. 285 | """ 286 | sensor_data = {} 287 | for tag, track in self.tracks.items(): 288 | if frame_idx not in track.detections: 289 | continue 290 | sensor_data[tag] = track.detections[frame_idx].sensor_data 291 | return sensor_data 292 | 293 | 294 | def iterate_track_pairwise_with_time_offset(track, time_offset, for_each): 295 | """Generate all pairs of detections contained in a track. 296 | 297 | This function may be used to obtain positive examples for training the 298 | parameters of a pairwise matching cost function. 299 | 300 | Parameters 301 | ---------- 302 | track : Track 303 | A track to iterate over. 304 | time_offset : int 305 | The specified time offset between all pairs of detections. 306 | for_each : Callable[Detection, Detection] -> None 307 | A function that will be called for each pair. The first argument is 308 | the detection with smaller frame index, the second argument is the 309 | matching detection according to the time_offset. 310 | 311 | """ 312 | for frame_idx in track.frame_range(): 313 | ahead_idx = frame_idx + time_offset 314 | if frame_idx not in track.detections: 315 | continue 316 | if ahead_idx not in track.detections: 317 | continue 318 | for_each(track.detections[frame_idx], track.detections[ahead_idx]) 319 | 320 | 321 | def iterate_track_set_with_time_offset(track_set, time_offset, for_each): 322 | """Generate all pairs of detections contained in a track set. 323 | 324 | This function may be used to obtain positive and negative examples for 325 | training the parameters of a pairwise matching cost function. 326 | 327 | Parameters 328 | ---------- 329 | track_set : TrackSet 330 | The track set to iterate over. 331 | time_offset : int 332 | The specified time offset between all pairs of detections. 333 | for_each : Callable[int, Detection, int, Detection] -> None 334 | A function that will be called for each pair. The first two arguments 335 | are the track id and detection with smaller frame index, the third 336 | and fourth argument are a matching track id and detection. 337 | 338 | """ 339 | for frame_idx in track_set.frame_range(): 340 | ahead_idx = frame_idx + time_offset 341 | 342 | detections_now = track_set.collect_detections(frame_idx) 343 | detections_ahead = track_set.collect_detections(ahead_idx) 344 | 345 | for track_id_i, detection_i in detections_now.items(): 346 | for track_id_k, detection_k in detections_ahead.items(): 347 | for_each(track_id_i, detection_i, track_id_k, detection_k) 348 | 349 | 350 | def associate_detections(ground_truth, detections, min_bbox_overlap=0.5): 351 | """Associate detections to ground truth tracks. 352 | 353 | Parameters 354 | ---------- 355 | ground_truth : TrackSet 356 | The ground truth track set. Each detection must contain a region 357 | of interest in format (top left x, top left y, width, height)` in 358 | the sensor_data attribute. 359 | detections : Dict[int, List[Detection]] 360 | A dictionary that maps from frame index to list of detections. Each 361 | detection must contain an attribute `roi` that contains the region 362 | of interest in the same format as the ground_truth. 363 | min_bbox_overlap : float 364 | The minimum bounding box overlap for valid associations. A larger value 365 | increases the misalignment between detections and ground truth. 366 | 367 | Returns 368 | ------- 369 | (TrackSet, Dict[int, List[Detection]]) 370 | The first element in the tuple is the set of detections associated with 371 | each ground truth track. The second element is a dictionary that maps 372 | from frame index to list of false alarms. 373 | 374 | """ 375 | track_set = TrackSet() 376 | false_alarms = {} 377 | 378 | for frame_idx in ground_truth.frame_range(): 379 | ground_truth_id_to_roi = ground_truth.collect_sensor_data(frame_idx) 380 | ground_truth_ids = list(ground_truth_id_to_roi.keys()) 381 | ground_truth_rois = np.asarray([ 382 | ground_truth_id_to_roi[k] for k in ground_truth_ids]) 383 | detection_rois = np.asarray([ 384 | d.roi for d in detections.get(frame_idx, [])]) 385 | 386 | if ground_truth_rois.shape[0] == 0: 387 | ground_truth_rois = ground_truth_rois.reshape(0, 4) 388 | if detection_rois.shape[0] == 0: 389 | detection_rois = detection_rois.reshape(0, 4) 390 | 391 | cost_matrix = pymotutils.linear_assignment.intersection_over_union_cost( 392 | ground_truth_rois, detection_rois) 393 | matched_indices, _, unmatched_detections = ( 394 | pymotutils.linear_assignment.min_cost_matching( 395 | cost_matrix, max_cost=1.0 - min_bbox_overlap)) 396 | 397 | false_alarms[frame_idx] = [ 398 | detections[frame_idx][i] for i in unmatched_detections] 399 | for ground_truth_idx, detection_idx in matched_indices: 400 | track_id = ground_truth_ids[ground_truth_idx] 401 | if track_id not in track_set.tracks: 402 | track_set.create_track(track_id) 403 | track_set.tracks[track_id].add( 404 | detections[frame_idx][detection_idx]) 405 | 406 | return track_set, false_alarms 407 | -------------------------------------------------------------------------------- /pymotutils/application/mono.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | This module contains code for tracking applications using a single 4 | camera. 5 | """ 6 | import numpy as np 7 | import cv2 8 | import pymotutils 9 | 10 | 11 | class RegionOfInterestDetection(pymotutils.Detection): 12 | """ 13 | A single-camera detection that contains the region of interest (ROI) and, 14 | optionally, a detector confidence score. 15 | 16 | Parameters 17 | ---------- 18 | frame_idx : int 19 | Index of the frame at which this detection occured. 20 | roi : ndarray 21 | The region of interest in which the object is contained as 4 22 | dimensional vector (x, y, w, h) where (x, y) is the top-left corner 23 | and (w, h) is the extent. 24 | confidence : NoneType | float 25 | Optional detector confidence score. If not None, it is appended to the 26 | sensor_data field of the detection. 27 | xyz : NoneType | ndarray 28 | Optional object locataion, e.g., in camera or world frame. 29 | feature : NoneType | ndarray 30 | Optional appearance descriptor. 31 | do_not_care : bool 32 | This flag indicates whether this detection should be included 33 | in evaluation. 34 | If True, missing this detection will be counted as false negative. If 35 | False, missing this detection will not have a negative impact on 36 | tracking performance. Therefore, this flag can be used to mark hard to 37 | detect objects (such as full occlusions) in ground truth. 38 | class_label : Optional[int] 39 | An optional integer-valued class label. 40 | class_name : Optional[str] 41 | An optional class name. 42 | 43 | Attributes 44 | ---------- 45 | roi : ndarray 46 | The region of interest in which the object is contained as 4 47 | dimensional vector (x, y, w, h) where (x, y) is the top-left corner 48 | and (w, h) is the extent. 49 | confidence : NoneType | float 50 | Optinal detector confidence score 51 | xyz : NoneType | ndarray 52 | Optional object location, e.g., in camera or world frame. 53 | feature : NoneType | ndarray 54 | Optional appearance descriptor. 55 | 56 | """ 57 | 58 | def __init__( 59 | self, frame_idx, roi, confidence=None, xyz=None, feature=None, 60 | do_not_care=False, class_label=None, class_name=None): 61 | sensor_data = roi if confidence is None else np.r_[roi, confidence] 62 | super(RegionOfInterestDetection, self).__init__( 63 | frame_idx, sensor_data, do_not_care=do_not_care, roi=roi, 64 | confidence=confidence, xyz=xyz, feature=feature, 65 | class_label=class_label, class_name=class_name) 66 | 67 | 68 | class MonoVisualization(pymotutils.ImageVisualization): 69 | """ 70 | This class implements an image-based visualization of tracking output 71 | obtained from a single camera. 72 | 73 | Parameters 74 | ---------- 75 | update_ms : int 76 | Number of milliseconds to wait before processing the next time step. 77 | window_shape : Tuple[int, int] 78 | Shape of the image viewer in format (width, height). 79 | online_tracking_visualization : Optional[Callable[MonoVisualization, Dict[str, T], Tracker]] 80 | If not None, this function is called once at the end of each time step 81 | to visualize tracking output. 82 | 83 | The first argument is the visualization object (self), the second 84 | argument is frame_data dictionary of the current time step, and the 85 | third argument is the tracker that is processing the data. 86 | caption : str 87 | The window caption. 88 | 89 | Attributes 90 | ---------- 91 | detection_thickness : int 92 | The line thickness to be used for drawing detections. 93 | detection_color : Tuple[int, int, int] 94 | The color to be used for drawing detections. 95 | track_set_thickness : int 96 | The line thickness to be used for drawing track sets (e.g., 97 | ground truth or tracking output). 98 | trajectory_visualization_len : int 99 | A positive integer which evaluates to the maximum length of visualized 100 | trajectories. If 1, only the current time step is visualized. This is 101 | the default behavior. 102 | 103 | """ 104 | 105 | def __init__( 106 | self, update_ms, window_shape, online_tracking_visualization=None, 107 | caption="Figure 1"): 108 | super(MonoVisualization, self).__init__( 109 | update_ms, window_shape, caption) 110 | self.detection_thickness = 2 111 | self.detection_color = 0, 0, 255 112 | self.track_set_thickness = 2 113 | self.line_thickness = 10 114 | self.trajectory_visualization_len = 1 115 | self._frame_data = None 116 | 117 | if online_tracking_visualization is None: 118 | self._draw_online_tracking_results = ( 119 | lambda image_viewer, frame_data, tracker: None) 120 | else: 121 | self._draw_online_tracking_results = online_tracking_visualization 122 | 123 | def init_frame(self, frame_data): 124 | self._viewer.image = frame_data["bgr_image"].copy() 125 | self._frame_data = frame_data 126 | 127 | def finalize_frame(self): 128 | self._frame_data = None 129 | 130 | def draw_detections(self, detections): 131 | self._viewer.thickness = self.detection_thickness 132 | self._viewer.color = self.detection_color 133 | for i, detection in enumerate(detections): 134 | x, y, w, h = detection.roi 135 | confidence = detection.confidence 136 | label = "%0.02f" % confidence if confidence is not None else None 137 | self._viewer.color = 0, 0, 255 138 | self._viewer.rectangle(x, y, w, h, label) 139 | 140 | def draw_track_set(self, frame_idx, track_set): 141 | track_set_frame = track_set.collect_sensor_data(frame_idx) 142 | 143 | self._viewer.thickness = self.track_set_thickness 144 | if self.trajectory_visualization_len > 1: 145 | for tag in track_set_frame.keys(): 146 | self._viewer.color = pymotutils.create_unique_color_uchar(tag) 147 | track = track_set.tracks[tag] 148 | points = [] 149 | 150 | first_frame_idx = max( 151 | track.first_frame_idx(), 152 | frame_idx - self.trajectory_visualization_len) 153 | for this_frame_idx in range(first_frame_idx, frame_idx): 154 | if this_frame_idx not in track.detections: 155 | continue 156 | x, y, w, h = ( 157 | track.detections[this_frame_idx].sensor_data[:4]) 158 | x, y = int(x + w / 2), int(y + h) 159 | points.append((x, y)) 160 | self._viewer.circle(x, y, 1) 161 | if len(points) > 0: 162 | points = np.asarray(points) 163 | self._viewer.thickness = self.line_thickness 164 | self._viewer.polyline(np.asarray(points), alpha=0.5) 165 | self._viewer.thickness = self.track_set_thickness 166 | for tag, (x, y, w, h) in track_set_frame.items(): 167 | self._viewer.color = pymotutils.create_unique_color_uchar(tag) 168 | self._viewer.rectangle(x, y, w, h, label=str(tag)) 169 | 170 | def draw_online_tracking_output(self, tracker): 171 | self._draw_online_tracking_results( 172 | self._viewer, self._frame_data, tracker) 173 | 174 | 175 | def compute_features(detections, image_filenames, feature_extractor): 176 | """Utility function to pre-compute features. 177 | 178 | Parameters 179 | ---------- 180 | detections : Dict[int, List[RegionOfInterestDetection]] 181 | A dictionary that maps from frame index to list of detections. 182 | image_filenames : Dict[int, str] 183 | A dictionary that maps from frame index to image filename. The keys of 184 | the provided detections and image_filenames must match. 185 | feature_extractor: Callable[ndarray, ndarray] -> ndarray 186 | The feature extractor takes as input a color image and an Nx4 187 | dimensional matrix of bounding boxes in format (x, y, w, h) and returns 188 | an NxM dimensional matrix of N associated feature vectors. 189 | 190 | """ 191 | 192 | frame_indices = sorted(list(detections.keys())) 193 | for frame_idx in frame_indices: 194 | bgr_image = cv2.imread(image_filenames[frame_idx], cv2.IMREAD_COLOR) 195 | assert bgr_image is not None, "Failed to load image" 196 | 197 | rois = np.asarray([d.roi for d in detections[frame_idx]]) 198 | features = feature_extractor(bgr_image, rois) 199 | for i, feature in enumerate(features): 200 | setattr(detections[frame_idx][i], "feature", feature) 201 | 202 | 203 | def extract_image_patches(detections, image_filenames, patch_shape): 204 | """Utility function to extract image patches of each detetions bounding box. 205 | 206 | On exit, each detection in `detections` has an attribute `image` that 207 | contains the image patch of shape `patch_shape` that shows the corresponding 208 | to the bounding box detection. 209 | 210 | Parameters 211 | ---------- 212 | detections : Dict[int, List[RegionOfInterestDetection]] 213 | A dictionary that maps from frame index to list of detections. 214 | image_filenames : Dict[int, str] 215 | A dictionary that maps from frame index to image filename. The keys of 216 | the provided detections and image_filenames must match. 217 | patch_shape : (int, int) 218 | Image patch shape (width, height). All bounding boxes are reshaped to 219 | this shape. 220 | 221 | """ 222 | 223 | def extract_image_patch(image, bbox): 224 | bbox = np.array(bbox) 225 | if patch_shape is not None: 226 | # correct aspect ratio to patch shape 227 | target_aspect = float(patch_shape[1]) / patch_shape[0] 228 | new_width = target_aspect * bbox[3] 229 | bbox[0] -= (new_width - bbox[2]) / 2 230 | bbox[2] = new_width 231 | 232 | # convert to top left, bottom right 233 | bbox[2:] += bbox[:2] 234 | bbox = bbox.astype(np.int) 235 | 236 | # clip at image boundaries 237 | bbox[:2] = np.maximum(0, bbox[:2]) 238 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:]) 239 | if np.any(bbox[:2] >= bbox[2:]): 240 | return None 241 | sx, sy, ex, ey = bbox 242 | image = image[sy:ey, sx:ex] 243 | image = cv2.resize(image, patch_shape[::-1]) 244 | return image 245 | 246 | frame_indices = sorted(list(detections.keys())) 247 | for frame_idx in frame_indices: 248 | bgr_image = cv2.imread(image_filenames[frame_idx], cv2.IMREAD_COLOR) 249 | assert bgr_image is not None, "Failed to load image" 250 | 251 | rois = np.asarray([d.roi for d in detections[frame_idx]]) 252 | for i, detection in enumerate(detections[frame_idx]): 253 | setattr( 254 | detection, "image", extract_image_patch(bgr_image, rois[i])) 255 | -------------------------------------------------------------------------------- /pymotutils/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /pymotutils/contrib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /pymotutils/contrib/datasets/detrac/__init__.py: -------------------------------------------------------------------------------- 1 | from .detrac_devkit import * 2 | -------------------------------------------------------------------------------- /pymotutils/contrib/datasets/detrac/detrac_devkit.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import os 3 | import numpy as np 4 | import cv2 5 | 6 | import pymotutils 7 | 8 | CAMERA_UPDATE_IN_MS = 33.3 # 30 FPS 9 | CAMERA_IMAGE_SHAPE = (960, 540) 10 | 11 | 12 | class DataSource(pymotutils.DataSource): 13 | """ 14 | A data source that provides access to one sequence out of the UA-DETRAC 15 | dataset. 16 | 17 | Parameters 18 | ---------- 19 | bgr_filenames : Dict[int, str] 20 | A dictionary that maps from frame index to image filename. 21 | detections : Dict[int, List[pymotutils.RegionOfInterestDetection]] 22 | A dictionary that maps from frame index to list of detections. Each 23 | detection contains the bounding box. 24 | ground_truth : Optional[pymotutils.TrackSet] 25 | The set of ground-truth tracks. 26 | 27 | Attributes 28 | ---------- 29 | bgr_filenames : Dict[int, str] 30 | A dictionary that maps from frame index to image filename. 31 | detections : Dict[int, List[pymotutils.RegionOfInterestDetection]] 32 | A dictionary that maps from frame index to list of detections. Each 33 | detection contains the bounding box. 34 | ground_truth : NoneType | pymotutils.TrackSet 35 | The set of ground-truth tracks, if available. 36 | 37 | """ 38 | 39 | def __init__(self, bgr_filenames, detections, ground_truth=None): 40 | self.bgr_filenames = bgr_filenames 41 | self.detections = detections 42 | self.ground_truth = ground_truth 43 | 44 | def apply_nonmaxima_suppression(self, max_bbox_overlap): 45 | """Apply non-maxima suppression. 46 | 47 | Parameters 48 | ---------- 49 | max_bbox_overlap : float 50 | ROIs that overlap more than this value are suppressed. 51 | 52 | Returns 53 | ------- 54 | 55 | """ 56 | for frame_idx, detections in self.detections.items(): 57 | if len(detections) == 0: 58 | continue 59 | boxes = np.asarray([d.roi for d in detections]) 60 | scores = np.asarray([d.confidence for d in detections]) 61 | indices = pymotutils.preprocessing.non_max_suppression( 62 | boxes, max_bbox_overlap, scores) 63 | self.detections[frame_idx] = [detections[i] for i in indices] 64 | 65 | def first_frame_idx(self): 66 | return min(self.bgr_filenames.keys()) 67 | 68 | def last_frame_idx(self): 69 | return max(self.bgr_filenames.keys()) 70 | 71 | @property 72 | def update_ms(self): 73 | return 25 74 | 75 | def read_frame_data(self, frame_idx): 76 | bgr_image = cv2.imread(self.bgr_filenames[frame_idx], cv2.IMREAD_COLOR) 77 | frame_data = { 78 | "bgr_image": bgr_image, 79 | "detections": self.detections.get(frame_idx, []), 80 | "ground_truth": self.ground_truth, 81 | "timestamp": float(frame_idx)} 82 | return frame_data 83 | 84 | def peek_image_shape(self): 85 | """Get the image shape for this sequence in format (height, width). """ 86 | image = cv2.imread(next(iter(self.bgr_filenames.values()))) 87 | return image.shape[:2] 88 | 89 | 90 | class Devkit(object): 91 | """ 92 | A development kit for the UA-DETRAC dataset [1]_. To use this development 93 | kit download the dataset from [1]_. Since the dataset comes in several zip 94 | files without structure, you can store the extracted files whereever you 95 | want, but you have to specify the `image_dir`, `detection_dir` and 96 | `xml_gt_dir`. You can use either the train or test sequences. 97 | 98 | [1]_ http://detrac-db.rit.albany.edu/ 99 | 100 | Parameters 101 | ---------- 102 | image_dir : str 103 | Path to the directory containing the sequences. 104 | detection_dir : str 105 | Path to the directory containing the detections. (Eg. R-CNN, DPM, etc.) 106 | xml_gt_dir : str 107 | Path to the directory containing the xml annotations. 108 | 109 | """ 110 | 111 | def __init__(self, image_dir, detection_dir, xml_gt_dir): 112 | 113 | self.image_dir = image_dir 114 | self.detection_dir = detection_dir 115 | self.xml_gt_dir = xml_gt_dir 116 | 117 | def create_data_source(self, sequence, min_confidence=None): 118 | """ 119 | Create data source for a given sequence. 120 | 121 | Parameters 122 | ---------- 123 | sequence : str 124 | Name of the sequence directory inside the `image_dir` 125 | min_confidence : Optional[float] 126 | A detector confidence threshold. All detections with confidence 127 | lower than this value are disregarded. 128 | 129 | Returns 130 | ------- 131 | DataSource 132 | Returns the data source of the given sequence. 133 | 134 | """ 135 | 136 | sequence_image_dir = os.path.join(self.image_dir, sequence) 137 | 138 | bgr_filenames = { 139 | int(os.path.splitext(f[3:])[0]): os.path.join( 140 | sequence_image_dir, f) 141 | for f in sorted(os.listdir(sequence_image_dir))} 142 | 143 | detection_dir_name = os.path.basename( 144 | os.path.normpath(self.detection_dir)) 145 | detection_file = os.path.join( 146 | self.detection_dir, 147 | sequence + "_Det_" + detection_dir_name + ".txt") 148 | detections = pymotutils.motchallenge_io.read_detections( 149 | detection_file, min_confidence) 150 | 151 | ground_truth_file = os.path.join(self.xml_gt_dir, sequence + '.xml') 152 | ground_truth = pymotutils.detrac_io.read_groundtruth(ground_truth_file) 153 | 154 | return DataSource(bgr_filenames, detections, ground_truth) 155 | -------------------------------------------------------------------------------- /pymotutils/contrib/datasets/kitti/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from .kitti_devkit import * 3 | -------------------------------------------------------------------------------- /pymotutils/contrib/datasets/kitti/kitti_devkit.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import os 3 | import numpy as np 4 | import cv2 5 | 6 | import pymotutils 7 | 8 | SEQUENCES_TRAININING = ["%04.d" % i for i in range(1, 21)] 9 | SEQUENCES_TESTING = ["%04.d" % i for i in range(1, 29)] 10 | 11 | OBJECT_CLASSES = [ 12 | "Car", "Van", "Truck", "Pedestrian", "Person_sitting", "Cyclist", "Tram", 13 | "Misc", "DontCare"] 14 | 15 | OBJECT_CLASSES_CARS = ["Car"] 16 | 17 | OBJECT_CLASSES_PEDESTRIANS = ["Pedestrian"] 18 | 19 | MIN_OBJECT_HEIGHT_IN_PIXELS = 25 20 | 21 | CAMERA_IMAGE_SHAPE = (1242, 375) 22 | CAMERA_UPDATE_IN_MS = 100 # 10 Hz 23 | 24 | GROUND_PLANE_NORMAL = np.array([0., 0., 1.]) 25 | GROUND_PLANE_DISTANCE = -0.93 26 | 27 | 28 | def convert_oxts_to_pose(oxts_list): 29 | # Converted code from KITTI devkit MATLAB script: 30 | # 31 | # Converts a list of oxts measurements into metric poses, starting at 32 | # (-1,0,0) meters, OXTS coordinates are defined as x = forward, y = right, 33 | # z = down (see OXTS RT2999 user manual) afterwards, pose[i] contains the 34 | # transformation which takes a 2D point in the i'th frame and projects it 35 | # into the oxts coordinates of the first frame. 36 | def lat_to_scale(lat): 37 | return np.cos(lat * np.pi / 180.0) 38 | 39 | def lat_lon_to_mercator(lat, lon, scale): 40 | er = 6378137 41 | mx = scale * lon * np.pi * er / 180 42 | my = scale * er * np.log(np.tan((90 + lat) * np.pi / 360)) 43 | return mx, my 44 | 45 | scale = lat_to_scale(oxts_list[0][0]) 46 | 47 | # init pose 48 | pose_list = [] 49 | inv_transform_0 = None 50 | 51 | # for all oxts packets do 52 | for i, oxts in enumerate(oxts_list): 53 | # if there is no data => no pose 54 | if oxts is None: 55 | pose_list.append(None) 56 | continue 57 | 58 | # translation vector 59 | translation = np.zeros((3, )) 60 | translation[:2] = lat_lon_to_mercator(oxts[0], oxts[1], scale) 61 | translation[2] = oxts[2] 62 | 63 | # rotation matrix (OXTS RT3000 user manual, page 71/92) 64 | rx, ry, rz = oxts[3:6] # roll, pitch, heading 65 | 66 | # base => nav (level oxts => rotated oxts) 67 | rotation_x = np.array([[1, 0, 0], [0, np.cos(rx), -np.sin(rx)], [ 68 | 0, np.sin(rx), np.cos(rx)]]) 69 | 70 | # base => nav (level oxts => rotated oxts) 71 | rotation_y = np.array([[np.cos(ry), 0, np.sin(ry)], [0, 1, 0], [ 72 | -np.sin(ry), 0, np.cos(ry)]]) 73 | 74 | # base => nav (level oxts => rotated oxts) 75 | rotation_z = np.array([[np.cos(rz), -np.sin(rz), 0], [ 76 | np.sin(rz), np.cos(rz), 0], [0, 0, 1]]) 77 | 78 | # normalize translation and rotation (start at 0/0/0) 79 | transformation = np.eye(4, 4) 80 | transformation[:3, :3] = np.linalg.multi_dot( 81 | (rotation_z, rotation_y, rotation_x)) 82 | transformation[:3, 3] = translation 83 | if inv_transform_0 is None: 84 | inv_transform_0 = np.linalg.inv(transformation) 85 | 86 | # add pose 87 | pose_list.append(np.dot(inv_transform_0, transformation)) 88 | 89 | return pose_list 90 | 91 | 92 | def read_odometry(filename): 93 | oxts_list = np.loadtxt(filename) 94 | return convert_oxts_to_pose(oxts_list) 95 | 96 | 97 | def read_calibration(filename): 98 | with open(filename, "r") as f: 99 | lines = f.read().splitlines() 100 | 101 | data_dict = {} 102 | for line in lines: 103 | words = line.strip().split(' ') 104 | data_dict[words[0].strip(':')] = np.fromstring( 105 | ";".join(words[1:]), sep=';') 106 | 107 | velodyne_to_camera = np.eye(4) 108 | velodyne_to_camera[:3, :4] = data_dict["Tr_velo_cam"].reshape(3, 4) 109 | 110 | imu_to_velodyne = np.eye(4) 111 | imu_to_velodyne[:3, :4] = data_dict["Tr_imu_velo"].reshape(3, 4) 112 | imu_to_camera = np.dot(velodyne_to_camera, imu_to_velodyne) 113 | 114 | camera_to_rectified = np.eye(4) 115 | camera_to_rectified[:3, :3] = data_dict["R_rect"].reshape(3, 3) 116 | imu_to_rectified = np.dot(camera_to_rectified, imu_to_camera) 117 | 118 | projection_matrix = data_dict["P2"].reshape(3, 4) 119 | return projection_matrix, imu_to_rectified 120 | 121 | 122 | def read_ground_truth( 123 | filename, object_classes=None, min_height=MIN_OBJECT_HEIGHT_IN_PIXELS): 124 | """ 125 | 126 | File format: 127 | 128 | #Values Name Description 129 | ---------------------------------------------------------------------------- 130 | 1 frame Frame within the sequence where the object appearers 131 | 1 track id Unique tracking id of this object within this sequence 132 | 1 type Describes the type of object: 'Car', 'Van', 'Truck', 133 | 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 134 | 'Misc' or 'DontCare' 135 | 1 truncated Float from 0 (non-truncated) to 1 (truncated), where 136 | truncated refers to the object leaving image 137 | boundaries. 138 | Truncation 2 indicates an ignored object (in particular 139 | in the beginning or end of a track) introduced by 140 | manual labeling. 141 | 1 occluded Integer (0,1,2,3) indicating occlusion state: 142 | 0 = fully visible, 1 = partly occluded 143 | 2 = largely occluded, 3 = unknown 144 | 1 alpha Observation angle of object, ranging [-pi..pi] 145 | 4 bbox 2D bounding box of object in the image (0-based index): 146 | contains left, top, right, bottom pixel coordinates 147 | 3 dimensions 3D object dimensions: height, width, length (in meters) 148 | 3 location 3D object location x,y,z in camera coordinates 149 | (in meters) 150 | 1 rotation_y Rotation ry around Y-axis in camera coordinates 151 | [-pi..pi] 152 | 153 | """ 154 | with open(filename, "r") as f: 155 | lines = f.read().splitlines() 156 | 157 | track_set = pymotutils.TrackSet() 158 | for line in lines: 159 | words = line.strip().split(' ') 160 | assert len(words) == 17, "Invalid number of elements in line." 161 | object_class = words[2] 162 | if object_class not in object_classes: 163 | continue 164 | frame_idx, track_id = int(words[0]), int(words[1]) 165 | roi = np.asarray([float(x) for x in words[6:10]]) 166 | roi[2:] -= roi[:2] - 1 # Convert to x, y, w, h 167 | if roi[3] < min_height: 168 | continue 169 | 170 | if track_id not in track_set.tracks: 171 | track_set.create_track(track_id) 172 | track_set.tracks[track_id].add(pymotutils.Detection(frame_idx, roi)) 173 | 174 | return track_set 175 | 176 | 177 | def read_detections( 178 | filename, object_classes=None, min_height=MIN_OBJECT_HEIGHT_IN_PIXELS, 179 | min_confidence=-np.inf): 180 | """ 181 | 182 | File format: 183 | 184 | #Values Name Description 185 | ---------------------------------------------------------------------------- 186 | 1 frame Frame within the sequence where the object appearers 187 | 1 track id IGNORED 188 | 1 type Describes the type of object: 'Car', 'Van', 'Truck', 189 | 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 190 | 'Misc' or 'DontCare' 191 | 1 truncated IGNORED 192 | 1 occluded IGNORED 193 | 1 alpha IGNORED 194 | 4 bbox 2D bounding box of object in the image (0-based index): 195 | contains left, top, right, bottom pixel coordinates 196 | 3 dimensions IGNORED 197 | 3 location IGNORED 198 | 1 rotation_y IGNORED 199 | 1 score Float, indicating confidence in detection, higher is 200 | better. 201 | 202 | """ 203 | with open(filename, "r") as f: 204 | lines = f.read().splitlines() 205 | 206 | detections = {} 207 | for line in lines: 208 | words = line.strip().split(' ') 209 | assert len(words) == 18, "Invalid number of elements in line." 210 | object_class = words[2] 211 | if object_class not in object_classes: 212 | continue 213 | frame_idx = int(words[0]) 214 | roi = np.asarray([float(x) for x in words[6:10]]) 215 | roi[2:] -= roi[:2] - 1 # Convert to x, y, w, h 216 | if roi[3] < min_height: 217 | continue 218 | confidence = float(words[17]) 219 | if confidence < min_confidence: 220 | continue 221 | detections.setdefault(frame_idx, []).append( 222 | pymotutils.RegionOfInterestDetection(frame_idx, roi, confidence)) 223 | 224 | return detections 225 | 226 | 227 | def write_hypotheses(filename, track_set, object_class): 228 | lines = [] 229 | for frame_idx in track_set.frame_range(): 230 | track_id_to_bbox = track_set.collect_sensor_data(frame_idx) 231 | for track_id, bbox in track_id_to_bbox.items(): 232 | line = ( 233 | "%d %d %s -1 -1 -1 %0.2f %0.2f %0.2f %0.2f " 234 | "-1 -1 -1 -1 -1 -1 -1" % ( 235 | frame_idx, track_id, object_class, bbox[0], bbox[1], 236 | bbox[0] + bbox[2], bbox[1] + bbox[3]) + os.linesep) 237 | lines.append(line) 238 | 239 | with open(filename, "w") as f: 240 | f.writelines(lines) 241 | 242 | 243 | class DataSource(pymotutils.DataSource): 244 | 245 | def __init__( 246 | self, projection_matrix, bgr_filenames, ground_truth, detections, 247 | sensor_poses, sequence_name, object_classes): 248 | self.projection_matrix = projection_matrix 249 | self.bgr_filenames = bgr_filenames 250 | self.ground_truth = ground_truth 251 | self.detections = detections 252 | self.sensor_poses = sensor_poses 253 | self.sequence_name = sequence_name 254 | self.object_classes = object_classes 255 | 256 | def apply_nonmaxima_suppression(self, max_bbox_overlap): 257 | for frame_idx, detections in self.detections.items(): 258 | if len(detections) == 0: 259 | continue 260 | boxes = np.asarray([d.roi for d in detections]) 261 | scores = np.asarray([d.confidence for d in detections]) 262 | indices = pymotutils.preprocessing.non_max_suppression( 263 | boxes, max_bbox_overlap, scores) 264 | self.detections[frame_idx] = [detections[i] for i in indices] 265 | 266 | def first_frame_idx(self): 267 | return min(self.bgr_filenames.keys()) 268 | 269 | def last_frame_idx(self): 270 | return max(self.bgr_filenames.keys()) 271 | 272 | @property 273 | def update_ms(self): 274 | return CAMERA_UPDATE_IN_MS 275 | 276 | def read_frame_data(self, frame_idx): 277 | bgr_image = cv2.imread(self.bgr_filenames[frame_idx], cv2.IMREAD_COLOR) 278 | frame_data = { 279 | "bgr_image": bgr_image, 280 | "detections": self.detections.get(frame_idx, []), 281 | "ground_truth": self.ground_truth, 282 | "timestamp": float(frame_idx) * self.update_ms / 1000., 283 | "sensor_pose": self.sensor_poses[frame_idx], 284 | "projection_matrix": self.projection_matrix} 285 | return frame_data 286 | 287 | 288 | class Devkit(object): 289 | 290 | def __init__(self, dataset_dir, detection_dir=None): 291 | # dataset_dir should point to either 'training' or 'testing' dir 292 | # If detection_dir is not None, takes pickled detections from that 293 | # directory instead of loading raw the detections from KITTI datset. 294 | self.dataset_dir = dataset_dir 295 | self.detection_dir = detection_dir 296 | 297 | def create_data_source( 298 | self, sequence, object_classes, 299 | min_height=MIN_OBJECT_HEIGHT_IN_PIXELS, min_confidence=-np.inf): 300 | image_dir = os.path.join(self.dataset_dir, "image_02", sequence) 301 | bgr_filenames = { 302 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f) 303 | for f in sorted(os.listdir(image_dir))} 304 | 305 | if self.detection_dir is not None: 306 | detections_filename = os.path.join( 307 | self.detection_dir, "%s.pkl" % sequence) 308 | with open(detections_filename, "rb") as f: 309 | import pickle 310 | unfiltered_detections = pickle.load(f) 311 | 312 | detections = {} 313 | for frame_idx, dets in unfiltered_detections.items(): 314 | detections[frame_idx] = [ 315 | d for d in dets if d.confidence >= min_confidence and 316 | d.roi[3] >= min_height] 317 | else: 318 | detections_filename = os.path.join( 319 | self.dataset_dir, "det_02/%s.txt" % sequence) 320 | detections = read_detections( 321 | detections_filename, object_classes, min_height, 322 | min_confidence) 323 | 324 | ground_truth_filename = os.path.join( 325 | self.dataset_dir, "label_02/%s.txt" % sequence) 326 | if os.path.exists(ground_truth_filename): 327 | ground_truth = read_ground_truth( 328 | ground_truth_filename, object_classes) 329 | else: 330 | ground_truth = None 331 | 332 | oxts_filename = os.path.join( 333 | self.dataset_dir, "oxts/%s.txt" % sequence) 334 | imu_to_world_list = read_odometry(oxts_filename) 335 | 336 | calibration_filename = os.path.join( 337 | self.dataset_dir, "calib/%s.txt" % sequence) 338 | projection_matrix, imu_to_rectified = read_calibration( 339 | calibration_filename) 340 | 341 | rectified_to_imu = np.linalg.inv(imu_to_rectified) 342 | frame_idx_to_sensor_pose = { 343 | i: np.dot(imu_to_world, rectified_to_imu)[:3, :4] 344 | for i, imu_to_world in enumerate(imu_to_world_list)} 345 | 346 | return DataSource( 347 | projection_matrix, bgr_filenames, ground_truth, detections, 348 | frame_idx_to_sensor_pose, sequence, object_classes) 349 | -------------------------------------------------------------------------------- /pymotutils/contrib/datasets/motchallenge/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from .motchallenge_devkit import * -------------------------------------------------------------------------------- /pymotutils/contrib/datasets/motchallenge/motchallenge_devkit.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import os 3 | import numpy as np 4 | import cv2 5 | 6 | import pymotutils 7 | 8 | 9 | class DataSource(pymotutils.DataSource): 10 | """ 11 | A data source that provides access to one sequence out of the MOTChallenge 12 | dataset. 13 | 14 | Parameters 15 | ---------- 16 | bgr_filenames : Dict[int, str] 17 | A dictionary that maps from frame index to image filename. 18 | detections : Dict[int, List[pymotutils.RegionOfInterestDetection]] 19 | A dictionary that maps from frame index to list of detections. Each 20 | detection contains the bounding box and, if provided, the 3D object 21 | coordinates (via attribute `xyz`). 22 | ground_truth : Optional[TrackSet] 23 | The set of ground-truth tracks. 24 | 25 | Attributes 26 | ---------- 27 | bgr_filenames : Dict[int, str] 28 | A dictionary that maps from frame index to image filename. 29 | detections : Dict[int, List[pymotutils.RegionOfInterestDetection]] 30 | A dictionary that maps from frame index to list of detections. Each 31 | detection contains the bounding box and, if provided, the 3D object 32 | coordinates (via attribute `xyz`). 33 | ground_truth : NoneType | TrackSet 34 | The set of ground-truth tracks, if available. 35 | 36 | """ 37 | 38 | def __init__(self, bgr_filenames, detections, ground_truth=None): 39 | self.bgr_filenames = bgr_filenames 40 | self.detections = detections 41 | self.ground_truth = ground_truth 42 | 43 | def apply_nonmaxima_suppression(self, max_bbox_overlap): 44 | """Apply non-maxima suppression. 45 | 46 | Parameters 47 | ---------- 48 | max_bbox_overlap : float 49 | ROIs that overlap more than this value are suppressed. 50 | 51 | Returns 52 | ------- 53 | 54 | """ 55 | for frame_idx, detections in self.detections.items(): 56 | if len(detections) == 0: 57 | continue 58 | boxes = np.asarray([d.roi for d in detections]) 59 | scores = np.asarray([d.confidence for d in detections]) 60 | indices = pymotutils.preprocessing.non_max_suppression( 61 | boxes, max_bbox_overlap, scores) 62 | self.detections[frame_idx] = [detections[i] for i in indices] 63 | 64 | def first_frame_idx(self): 65 | return min(self.bgr_filenames.keys()) 66 | 67 | def last_frame_idx(self): 68 | return max(self.bgr_filenames.keys()) 69 | 70 | @property 71 | def update_ms(self): 72 | return 25 # TODO(nwojke): Peek correct frame rate from seqinfo.ini file 73 | 74 | def read_frame_data(self, frame_idx): 75 | bgr_image = cv2.imread(self.bgr_filenames[frame_idx], cv2.IMREAD_COLOR) 76 | frame_data = { 77 | "bgr_image": bgr_image, 78 | "detections": self.detections.get(frame_idx, []), 79 | "ground_truth": self.ground_truth, 80 | "timestamp": float(frame_idx)} 81 | return frame_data 82 | 83 | def peek_image_shape(self): 84 | """Get the image shape for this sequence in format (height, width). """ 85 | image = cv2.imread(next(iter(self.bgr_filenames.values()))) 86 | return image.shape[:2] 87 | 88 | 89 | class Devkit(object): 90 | """ 91 | A development kit for the MOTChallenge dataset [1]_. To use this development 92 | kit, download the dataset from [1]_ and set the `dataset_dir` to either 93 | the train or test directory. Then, create a DataSource for one of the 94 | sequences contained in this directory. 95 | 96 | [1]_ http://www.motchallenge.net 97 | 98 | Parameters 99 | ---------- 100 | dataset_dir : str 101 | Path to the MOTChallenge train or test directory. 102 | detection_dir : Optional[str] 103 | Optional path to a directory containing custom detections. The expected 104 | filename is `detection_dir/[sequence_name].txt`. Detections must be 105 | stored in the original MOTChallenge format. 106 | 107 | Attributes 108 | ---------- 109 | dataset_dir : str 110 | Path to the MOTChallenge train/test directory. 111 | detection_dir : NoneType | str 112 | If not None, a path to a directory containing custom detections in 113 | MOTChallenge format. 114 | 115 | """ 116 | 117 | def __init__(self, dataset_dir, detection_dir=None): 118 | self.dataset_dir = dataset_dir 119 | self.detection_dir = detection_dir 120 | 121 | def create_data_source(self, sequence, min_confidence=None): 122 | """Create data source for a given sequence. 123 | 124 | Parameters 125 | ---------- 126 | sequence : str 127 | Name of the sequence directory inside the `dataset_dir`. 128 | min_confidence : Optional[float] 129 | A detector confidence threshold. All detections with confidence 130 | lower than this value are disregarded. 131 | 132 | Returns 133 | ------- 134 | DataSource 135 | Returns the data source of the given sequence. 136 | 137 | """ 138 | sequence_dir = os.path.join(self.dataset_dir, sequence) 139 | image_dir = os.path.join(sequence_dir, "img1") 140 | bgr_filenames = { 141 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f) 142 | for f in sorted(os.listdir(image_dir))} 143 | 144 | detection_file = ( 145 | os.path.join(sequence_dir, "det", "det.txt") 146 | if self.detection_dir is None else os.path.join( 147 | self.detection_dir, "%s.txt" % sequence)) 148 | detections = pymotutils.motchallenge_io.read_detections( 149 | detection_file, min_confidence) 150 | 151 | ground_truth_file = os.path.join(sequence_dir, "gt", "gt.txt") 152 | ground_truth = pymotutils.motchallenge_io.read_groundtruth( 153 | ground_truth_file, sensor_data_is_3d=False) # Evaluation always 2D 154 | 155 | # TODO(nwojke): MOT16 and newer have a seqinfo.ini file that contains 156 | # information on the frame rate and image size. 157 | return DataSource(bgr_filenames, detections, ground_truth) 158 | -------------------------------------------------------------------------------- /pymotutils/contrib/datasets/pets2009/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from .pets2009_devkit import * 3 | -------------------------------------------------------------------------------- /pymotutils/contrib/datasets/pets2009/pets2009_devkit.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import os 3 | import urllib.request 4 | import hashlib 5 | from functools import partial 6 | import tarfile 7 | import glob 8 | import shutil 9 | from xml.dom import minidom 10 | import subprocess 11 | 12 | import numpy as np 13 | import cv2 14 | 15 | import pymotutils 16 | 17 | TRAIN_SEQUENCES = ["S1L1-1", "S1L2-2", "S2L1"] 18 | 19 | TEST_SEQUENCES = ["S1L1-2", "S1L2-1", "S2L2", "S2L3"] 20 | 21 | CAMERA_IMAGE_SHAPE = (768, 576) 22 | CAMERA_UPDATE_IN_MS = 142.85 # approx. 7 Hz 23 | 24 | GROUND_PLANE_NORMAL = np.array([0., 0., 1.]) 25 | GROUND_PLANE_DISTANCE = 0. 26 | 27 | CROPPED_TRACKING_AREA_MIN = np.array([-14.0696, -14.274, -np.inf]) 28 | CROPPED_TRACKING_AREA_MAX = np.array([4.9813, 1.7335, np.inf]) 29 | 30 | 31 | def euler_to_mat(rx, ry, rz, tx, ty, tz, scale=1. / 1000): 32 | # Default extrinsics are in mm. Use scale 1./1000 for world coordinate frame 33 | # in meters. 34 | tx, ty, tz = tx * scale, ty * scale, tz * scale 35 | 36 | # NOTE: this is accoring to the Tsai Camera Calibration Toolbox 37 | # http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/DIAS1/ 38 | cx, cy, cz = np.cos(rx), np.cos(ry), np.cos(rz) 39 | sx, sy, sz = np.sin(rx), np.sin(ry), np.sin(rz) 40 | 41 | pose = np.eye(4) 42 | pose[0, 0] = cy * cz 43 | pose[0, 1] = cz * sx * sy - cx * sz 44 | pose[0, 2] = sx * sz + cx * cz * sy 45 | pose[1, 0] = cy * sz 46 | pose[1, 1] = sx * sy * sz + cx * cz 47 | pose[1, 2] = cx * sy * sz - cz * sx 48 | pose[2, 0] = -sy 49 | pose[2, 1] = cy * sx 50 | pose[2, 2] = cx * cy 51 | 52 | pose[0, 3] = tx 53 | pose[1, 3] = ty 54 | pose[2, 3] = tz 55 | return pose 56 | 57 | 58 | def create_projection_matrix(filename, extrinsic_scale=1. / 1000): 59 | # Default extrinsics are in mm. Use scale 1./1000 for world coordinate frame 60 | # in meters. 61 | xmldoc = minidom.parse(filename) 62 | geometry = xmldoc.getElementsByTagName("Geometry")[0] 63 | intrinsic = xmldoc.getElementsByTagName("Intrinsic")[0] 64 | extrinsic = xmldoc.getElementsByTagName("Extrinsic")[0] 65 | 66 | def g(attr): 67 | return float(geometry.attributes[attr].value) 68 | 69 | def i(attr): 70 | return float(intrinsic.attributes[attr].value) 71 | 72 | def e(attr): 73 | return float(extrinsic.attributes[attr].value) 74 | 75 | world_to_camera = euler_to_mat( 76 | e("rx"), e("ry"), e("rz"), e("tx"), e("ty"), e("tz"), extrinsic_scale) 77 | 78 | projection_matrix = np.eye(3, 4) 79 | projection_matrix[0, 0] = i("sx") * i("focal") / g("dpx") 80 | projection_matrix[1, 1] = i("focal") / g("dpy") 81 | projection_matrix[0, 2] = i("cx") 82 | projection_matrix[1, 2] = i("cy") 83 | return np.dot(projection_matrix, world_to_camera) 84 | 85 | 86 | def intersect_with_ground_plane( 87 | inv_projection_matrix, ground_plane_normal, ground_plane_distance, 88 | points): 89 | """Find intersection of a ray through an image pixel with the ground plane. 90 | 91 | Plane parameters: 92 | .. math:: ground_plane_normal.T \cdot x - ground_plane_distance = 0 93 | 94 | Parameters 95 | ---------- 96 | inv_projection_matrix : ndarray 97 | The 4x4 inverse of the projection matrix. 98 | ground_plane_normal : ndarray 99 | The normal vector of the plane. 100 | ground_plane_distance : float 101 | Distance of the plane to origin. 102 | points : ndarray 103 | The Nx2 array of pixel coordinates. 104 | 105 | Returns 106 | ------- 107 | (ndarray, ndarray) 108 | This method returns the Nx3 array of intersections as well as 109 | an array of booleans that is True if the intersection point is 110 | is valid and False if ray and plane are (almost) parallel. 111 | 112 | """ 113 | # 1) Create ray that passes through pixels, transform to world frame 114 | rays = np.empty((points.shape[0], 4)) 115 | rays[:, :2], rays[:, 2], rays[:, 3] = points, 1., 0. 116 | rays = np.dot(rays, inv_projection_matrix.T) 117 | rays /= np.atleast_2d(np.sqrt(np.sum(rays[:, :3]**2, axis=1))).T 118 | rays = rays[:, :3] 119 | 120 | # 2) check for intersection using dot product between rays 121 | # and plane normal 122 | min_dot = np.cos(89 * np.pi / 180.) 123 | dot_nv = np.sum(rays * ground_plane_normal, axis=1) 124 | isvalid = np.abs(dot_nv) >= min_dot 125 | 126 | # 3) compute point of intersection 127 | p = inv_projection_matrix[:3, 3] 128 | lamda = (ground_plane_distance - np.dot(p, ground_plane_normal)) / dot_nv 129 | intersection = p + np.atleast_2d(lamda).T * rays 130 | return intersection, isvalid 131 | 132 | 133 | def read_cvml_detections( 134 | filename, projection_matrix, roi_scale_w=0.75, roi_scale_h=1.0): 135 | 136 | def fattr(node, name): 137 | return float(node.attributes[name].value) 138 | 139 | def rescale_roi(old_roi): 140 | x, y, w, h = old_roi 141 | new_w, new_h = roi_scale_w * w, roi_scale_h * h 142 | dw, dh = w - new_w, h - new_h 143 | x += dw / 2 144 | y += dh / 2 145 | return x, y, new_w, new_h 146 | 147 | wrapped_projection_matrix = np.eye(4) 148 | wrapped_projection_matrix[:3, :4] = projection_matrix 149 | inv_projection_matrix = np.linalg.inv(wrapped_projection_matrix) 150 | 151 | xmldoc = minidom.parse(filename) 152 | detections = {} 153 | for frame in xmldoc.getElementsByTagName("frame"): 154 | frame_idx = int(frame.attributes["number"].value) 155 | detections[frame_idx] = [] 156 | for obj in frame.getElementsByTagName("object"): 157 | box = obj.getElementsByTagName("box")[0] 158 | xc, yc = fattr(box, "xc"), fattr(box, "yc") 159 | w, h = fattr(box, "w"), fattr(box, "h") 160 | roi = xc - w / 2., yc - h / 2., w, h 161 | roi = rescale_roi(roi) 162 | confidence = fattr(obj, "confidence") 163 | xyz, isvalid = intersect_with_ground_plane( 164 | inv_projection_matrix, GROUND_PLANE_NORMAL, 165 | GROUND_PLANE_DISTANCE, np.array([[xc, yc + h / 2.]])) 166 | assert isvalid[0], "Failed to compute ground plane projection" 167 | detections[frame_idx].append( 168 | pymotutils.RegionOfInterestDetection( 169 | frame_idx, np.asarray(roi), confidence, xyz[0])) 170 | return detections 171 | 172 | 173 | def read_cvml_groundtruth(filename, projection_matrix): 174 | 175 | def fattr(node, name): 176 | return float(node.attributes[name].value) 177 | 178 | wrapped_projection_matrix = np.eye(4) 179 | wrapped_projection_matrix[:3, :4] = projection_matrix 180 | inv_projection_matrix = np.linalg.inv(wrapped_projection_matrix) 181 | 182 | xmldoc = minidom.parse(filename) 183 | track_set = pymotutils.TrackSet() 184 | for frame in xmldoc.getElementsByTagName("frame"): 185 | frame_idx = int(frame.attributes["number"].value) 186 | for obj in frame.getElementsByTagName("object"): 187 | box = obj.getElementsByTagName("box")[0] 188 | xc, yc = fattr(box, "xc"), fattr(box, "yc") 189 | w, h = fattr(box, "w"), fattr(box, "h") 190 | roi = xc - w / 2., yc - h / 2., w, h 191 | xyz, isvalid = intersect_with_ground_plane( 192 | inv_projection_matrix, GROUND_PLANE_NORMAL, 193 | GROUND_PLANE_DISTANCE, np.array([[xc, yc + h / 2.]])) 194 | assert isvalid[0], "Failed to compute ground plane projection" 195 | 196 | track_id = int(obj.attributes["id"].value) 197 | if track_id not in track_set.tracks: 198 | track_set.create_track(track_id) 199 | track_set.tracks[track_id].add( 200 | pymotutils.RegionOfInterestDetection( 201 | frame_idx, roi, xyz=xyz[0])) 202 | return track_set 203 | 204 | 205 | def clip_track_set_at_tracking_area(track_set, xyz="sensor_data"): 206 | cropped_track_set = pymotutils.TrackSet() 207 | for tag, track in track_set.tracks.items(): 208 | detections = { 209 | i: d for i, d in track.detections.items() 210 | if np.all(getattr(d, xyz) >= CROPPED_TRACKING_AREA_MIN) and 211 | np.all(getattr(d, xyz) <= CROPPED_TRACKING_AREA_MAX)} 212 | if len(detections) == 0: 213 | continue 214 | cropped_track = cropped_track_set.create_track(tag) 215 | cropped_track.detections = detections 216 | return cropped_track_set 217 | 218 | 219 | class DataSource(pymotutils.DataSource): 220 | 221 | def __init__( 222 | self, projection_matrix, bgr_filenames, ground_truth, detections, 223 | sequence_name): 224 | self.projection_matrix = projection_matrix 225 | self.bgr_filenames = bgr_filenames 226 | self.ground_truth = ground_truth 227 | self.detections = detections 228 | self.sequence_name = sequence_name 229 | 230 | def apply_nonmaxima_suppression(self, max_bbox_overlap): 231 | for frame_idx, detections in self.detections.items(): 232 | if len(detections) == 0: 233 | continue 234 | boxes = np.asarray([d.roi for d in detections]) 235 | scores = np.asarray([d.confidence for d in detections]) 236 | indices = pymotutils.preprocessing.non_max_suppression( 237 | boxes, max_bbox_overlap, scores) 238 | self.detections[frame_idx] = [detections[i] for i in indices] 239 | 240 | def first_frame_idx(self): 241 | return min(self.bgr_filenames.keys()) 242 | 243 | def last_frame_idx(self): 244 | return max(self.bgr_filenames.keys()) 245 | 246 | @property 247 | def update_ms(self): 248 | return CAMERA_UPDATE_IN_MS 249 | 250 | def read_frame_data(self, frame_idx): 251 | bgr_image = cv2.imread(self.bgr_filenames[frame_idx], cv2.IMREAD_COLOR) 252 | frame_data = { 253 | "bgr_image": bgr_image, 254 | "detections": self.detections.get(frame_idx, []), 255 | "ground_truth": self.ground_truth, 256 | "timestamp": float(frame_idx) * CAMERA_UPDATE_IN_MS / 1000., 257 | "projection_matrix": self.projection_matrix} 258 | return frame_data 259 | 260 | 261 | class Devkit(object): 262 | 263 | def __init__(self, dataset_dir): 264 | self.sequences = TRAIN_SEQUENCES + TEST_SEQUENCES 265 | self.dataset_dir = dataset_dir 266 | 267 | def download_data(self, base_url=None): 268 | if base_url is None: 269 | base_url = "ftp://ftp.cs.rdg.ac.uk/pub/PETS2009/" + \ 270 | "Crowd_PETS09_dataset/a_data/Crowd_PETS09/" 271 | print("Download and extract data.") 272 | self._download_extract_data_if(base_url) 273 | print("Download and extract calibration.") 274 | self._download_extract_calibration_if(base_url) 275 | print("Extracting tracking data.") 276 | self._download_tracking_data_if() 277 | print("Done with download and extracting.") 278 | 279 | def create_data_source( 280 | self, sequence, cropped=False, extrinsic_scale=1. / 1000): 281 | if sequence not in self.sequences: 282 | raise KeyError("Unknown sequence '%s'" % sequence) 283 | 284 | projection_matrix = create_projection_matrix( 285 | os.path.join(self.calibration_dir, "View_001.xml"), 286 | extrinsic_scale) 287 | 288 | base_dir = self.get_dataset_dir(sequence) 289 | image_dir = os.path.join(base_dir, "View_001") 290 | bgr_filenames = {} 291 | for filename in os.listdir(image_dir): 292 | frame_idx = int(filename.replace('.', '_').split('_')[1]) 293 | bgr_filenames[frame_idx] = os.path.join(image_dir, filename) 294 | 295 | data_dir = self.get_tracking_data_dir(sequence) 296 | if cropped: 297 | groundtruth_file = os.path.join( 298 | data_dir, "PETS2009-%s-cropped.xml" % sequence) 299 | else: 300 | groundtruth_file = os.path.join( 301 | data_dir, "PETS2009-%s.xml" % sequence) 302 | ground_truth = read_cvml_groundtruth( 303 | groundtruth_file, projection_matrix) 304 | 305 | detections_file = os.path.join( 306 | data_dir, "PETS2009-%s-c1-det.xml" % sequence) 307 | detections = read_cvml_detections(detections_file, projection_matrix) 308 | 309 | return DataSource( 310 | projection_matrix, bgr_filenames, ground_truth, detections, 311 | sequence) 312 | 313 | @property 314 | def calibration_dir(self): 315 | return os.path.join(self.dataset_dir, "Calibration") 316 | 317 | def get_dataset_dir(self, sequence): 318 | return os.path.join(self.dataset_dir, sequence) 319 | 320 | def get_tracking_data_dir(self, sequence): 321 | return os.path.join(self.get_dataset_dir(sequence), "Tracking_Data") 322 | 323 | def _download_extract_data_if(self, base_url): 324 | datasets = [ 325 | "S1_L1.tar.bz2", "S1_L2.tar.bz2", "S1_L3.tar.bz2", "S2_L1.tar.bz2", 326 | "S2_L2.tar.bz2", "S2_L3.tar.bz2"] 327 | sha1_sums = [ 328 | "2a15a1f8f81384499081c032ad0ca3bb7e7b88e9", 329 | "cbd4a825500a4994f1c2ddbf7b4f4dd0ae9493a1", 330 | "26a26bc7779b88ad9f41b3e672ad44967010176c", 331 | "ea01601147245f66ea03c82f6b40f98a130441ed", 332 | "c1aaf3559ba758bee68aa572b798ff64a0eeb076", 333 | "7be2e22b4d8fa44186c4bcfd26eb32e7d299cd72"] 334 | 335 | if not os.path.isdir(self.dataset_dir): 336 | os.mkdir(self.dataset_dir) 337 | 338 | for dataset, sha1_sum in zip(datasets, sha1_sums): 339 | url = os.path.join(base_url, dataset) 340 | filename = os.path.join(self.dataset_dir, dataset) 341 | self._download_if(url, filename, sha1_sum) 342 | self._extract_if(filename) 343 | 344 | def _download_extract_calibration_if(self, base_url): 345 | if os.path.isdir(self.calibration_dir): 346 | return 347 | calibration_file = "Calibrationxmls.tar" 348 | calibration_sha1 = "8d1d21a5e832f751150a57c23716bb39dc70043c" 349 | self._download_if( 350 | os.path.join(base_url, calibration_file), 351 | os.path.join(self.dataset_dir, calibration_file), calibration_sha1) 352 | 353 | print("Extracting calibration") 354 | tar = tarfile.open(os.path.join(self.dataset_dir, calibration_file)) 355 | tar.extractall(self.calibration_dir) 356 | tar.close() 357 | print("Patching XML files") 358 | for filename in os.listdir(self.calibration_dir): 359 | filename = os.path.join(self.calibration_dir, filename) 360 | subprocess.call(["sed", "-i", "s/dpx\"/dpx=\"/g", filename]) 361 | subprocess.call(["sed", "-i", "s/dpy\"/dpy=\"/g", filename]) 362 | print("Done.") 363 | 364 | def _download_tracking_data_if(self): 365 | 366 | def download_gt_if(seq, filename): 367 | path = os.path.join(self.get_tracking_data_dir(seq), filename) 368 | if os.path.isfile(path): 369 | return 370 | base_url = "http://www.milanton.de/files/gt/PETS2009/" 371 | print("Downloading %s" % os.path.join(base_url, filename)) 372 | urllib.request.urlretrieve(os.path.join(base_url, filename), path) 373 | print("Done.") 374 | 375 | def download_det_if(seq, filename): 376 | path = os.path.join(self.get_tracking_data_dir(seq), filename) 377 | if os.path.isfile(path): 378 | return 379 | base_url = "http://www.milanton.de/files/det/PETS2009/" 380 | print("Downloading %s" % os.path.join(base_url, filename)) 381 | urllib.request.urlretrieve(os.path.join(base_url, filename), path) 382 | print("Done.") 383 | 384 | for sequence in self.sequences: 385 | os.makedirs(self.get_tracking_data_dir(sequence), exist_ok=True) 386 | download_gt_if(sequence, "PETS2009-%s.xml" % sequence) 387 | download_gt_if(sequence, "PETS2009-%s-cropped.xml" % sequence) 388 | download_det_if(sequence, "PETS2009-%s-c1-det.xml" % sequence) 389 | 390 | def _download_if(self, url, filename, sha1_sum): 391 | if os.path.isfile(filename): 392 | with open(filename, "rb") as file: 393 | d = hashlib.sha1() 394 | for buf in iter(partial(file.read, 128), b''): 395 | d.update(buf) 396 | if d.hexdigest() == sha1_sum: 397 | return 398 | print("Downloading %s" % url) 399 | urllib.request.urlretrieve( 400 | url, os.path.join(self.dataset_dir, filename)) 401 | print("Done.") 402 | 403 | def _extract_if(self, filename): 404 | print("Extracting %s" % filename) 405 | tmpdir = os.path.join(self.dataset_dir, "tmp") 406 | tar = tarfile.open(filename) 407 | tar.extractall(tmpdir) 408 | tar.close() 409 | 410 | # get destination directory 411 | dataset_dir = os.path.basename(filename).replace("_", "").split(".")[0] 412 | dest_dir = os.path.join(self.dataset_dir, dataset_dir) 413 | if os.path.isdir(dataset_dir) or len( 414 | glob.glob(dataset_dir + "-*")) > 0: 415 | # glob, because of possible suffix, e.g., S1_L1-1 416 | return 417 | 418 | # get directory of extracted data 419 | scontainer = os.path.join(tmpdir, "Crowd_PETS09") 420 | spath = glob.glob(os.path.join(scontainer, "*"))[0] 421 | sname = os.path.basename(spath) 422 | 423 | lcontainer = os.path.join(scontainer, sname) 424 | lpath = glob.glob(os.path.join(lcontainer, "*"))[0] 425 | lname = os.path.basename(lpath) 426 | 427 | tcontainer = os.path.join(lcontainer, lname) 428 | tnames = glob.glob(os.path.join(tcontainer, "*")) 429 | 430 | print("Copying files") 431 | for i, tname in enumerate(sorted(tnames)): 432 | views = sorted(os.listdir(tname)) 433 | for view in views: 434 | source_dir = os.path.join(tname, view) 435 | this_dest_dir = ( 436 | dest_dir 437 | if len(tnames) == 1 else "%s-%d" % (dest_dir, 1 + i)) 438 | this_dest_dir = os.path.join(this_dest_dir, view) 439 | os.makedirs(this_dest_dir, exist_ok=True) 440 | 441 | filenames = glob.glob(os.path.join(source_dir, "*.jpg")) 442 | for filename in filenames: 443 | shutil.copyfile( 444 | filename, 445 | os.path.join( 446 | this_dest_dir, os.path.basename(filename))) 447 | 448 | print("Removing temporary files") 449 | shutil.rmtree(tmpdir) 450 | print("Done.") 451 | -------------------------------------------------------------------------------- /pymotutils/contrib/detection/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /pymotutils/contrib/detection/tensorflow_object_detection_api.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | This module contains code to run models from the TensorFlow detection model 4 | zoo [1]. 5 | 6 | [1] https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md 7 | """ 8 | import cv2 9 | import numpy as np 10 | import tensorflow as tf 11 | import pymotutils 12 | 13 | 14 | class Detector(object): 15 | """ 16 | A thin wrapper class around the TensorFlow Object Detection inference 17 | API. 18 | 19 | Parameters 20 | ---------- 21 | inference_graph_pb : str 22 | Path to the frozen_inference_graph.pb file. This file is contained in 23 | the model archive. 24 | 25 | """ 26 | 27 | def __init__(self, inference_graph_pb): 28 | self._detection_graph = tf.Graph() 29 | with self._detection_graph.as_default(): 30 | graph_def = tf.GraphDef() 31 | with tf.gfile.GFile(inference_graph_pb, "rb") as file_handle: 32 | serialized_graph = file_handle.read() 33 | graph_def.ParseFromString(serialized_graph) 34 | tf.import_graph_def(graph_def, name="") 35 | 36 | self._image_tensor = self._detection_graph.get_tensor_by_name( 37 | "image_tensor:0") 38 | self._detection_boxes = self._detection_graph.get_tensor_by_name( 39 | "detection_boxes:0") 40 | self._detection_scores = self._detection_graph.get_tensor_by_name( 41 | "detection_scores:0") 42 | self._detection_classes = self._detection_graph.get_tensor_by_name( 43 | "detection_classes:0") 44 | self._session = tf.Session(graph=self._detection_graph) 45 | 46 | def run(self, bgr_image, min_confidence=0.5, max_bbox_overlap=0.7): 47 | """Run object detector on single image. 48 | 49 | Parameter 50 | --------- 51 | bgr_image : ndarray 52 | Input image in BGR color space. 53 | min_confidence : float 54 | Minimum detector confidence in [0, 1]. Detections with confidence 55 | lower than this value are suppressed. 56 | max_bbox_overlap : float 57 | Non-maxima suppression threshold in [0, 1]. A large value 58 | reduces the number of returned detections. 59 | 60 | Returns 61 | ------- 62 | (ndarray, ndarray, ndarray) 63 | Returns a tuple containing the following elements: 64 | * An array of shape (N, 4) which contains the bounding boxes of 65 | N object detections in format (top-left-x, top-left-y, width, 66 | height). 67 | * An array of shape (N, ) which contains the corresponding detector 68 | confidence score. 69 | * An array of shape (N, ) which contains the corresponding class 70 | label (integer-valued). 71 | 72 | """ 73 | rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) 74 | with self._detection_graph.as_default(): 75 | boxes, scores, classes = self._session.run([ 76 | self._detection_boxes, self._detection_scores, 77 | self._detection_classes], feed_dict={ 78 | self._image_tensor: rgb_image[np.newaxis, :, :, :]}) 79 | boxes, scores, classes = ( 80 | boxes[0], scores[0], classes[0].astype(np.int32)) 81 | 82 | keep = np.greater_equal(scores, min_confidence) 83 | boxes, scores, classes = boxes[keep], scores[keep], classes[keep] 84 | 85 | # Convert to (x, y, width, height). 86 | boxes[:, :2] *= np.asarray(bgr_image.shape[:2]) 87 | boxes[:, 2:] *= np.asarray(bgr_image.shape[:2]) 88 | boxes[:, 2:] -= boxes[:, :2] 89 | boxes[:, [0, 1, 2, 3]] = boxes[:, [1, 0, 3, 2]] 90 | 91 | keep = pymotutils.preprocessing.non_max_suppression( 92 | boxes, max_bbox_overlap, scores) 93 | boxes, scores, classes = boxes[keep], scores[keep], classes[keep] 94 | return boxes, scores, classes 95 | 96 | 97 | def generate_detections( 98 | index_to_bgr_filenames, inference_graph_pb, class_to_name, 99 | min_confidence=0.5, max_bbox_overlap=0.7, verbose=False): 100 | """Generate detections from list of image filenames. 101 | 102 | Parameters 103 | ---------- 104 | index_to_bgr_filenames: Dict[int, str] 105 | Maps from frame index to image filename. The frame index is used to 106 | populate the RegionOfInterestDetection.frame_idx attribute. 107 | inference_graph_pb : str 108 | Path to the frozen_inference_graph.pb file. This file is contained in 109 | the model archive. 110 | class_to_name : Dict[int, str] 111 | A dictionary that maps from label to class name. Classes that are not 112 | contained in the dictionary are suppressed. Use MSCOCO_LABELMAP for 113 | networks trained on MSCOCO. 114 | min_confidence : float 115 | Minimum detector confidence in [0, 1]. Detections with confidence 116 | lower than this value are suppressed. 117 | max_bbox_overlap : float 118 | Non-maxima suppression threshold in [0, 1]. A large value 119 | reduces the number of returned detections. 120 | verbose : bool 121 | If True, prints status information about the number of processed frames 122 | to standard output. 123 | 124 | Returns 125 | ------- 126 | Dict[int, List[pymotutils.RegionOfInterestDetection]] 127 | Returns a dictionary that maps from frame index to list of detections. 128 | 129 | """ 130 | detector = Detector(inference_graph_pb) 131 | detections = dict() 132 | 133 | num_processed = 0 134 | for frame_idx, filename in sorted(list(index_to_bgr_filenames.items())): 135 | if verbose: 136 | print( 137 | "Processing detection on frame %d out of %d" % 138 | (num_processed, len(index_to_bgr_filenames))) 139 | num_processed += 1 140 | bgr_image = cv2.imread(filename, cv2.IMREAD_COLOR) 141 | boxes, scores, classes = detector.run( 142 | bgr_image, min_confidence, max_bbox_overlap) 143 | 144 | keep = [i for i in range(len(boxes)) if classes[i] in class_to_name] 145 | boxes, scores, classes = boxes[keep], scores[keep], classes[keep] 146 | class_names = [class_to_name[x] for x in classes] 147 | 148 | detections[frame_idx] = { 149 | pymotutils.RegionOfInterestDetection( 150 | frame_idx, boxes[i], scores[i], class_label=classes[i], 151 | class_name=class_names[i]) for i in range(len(boxes))} 152 | return detections 153 | 154 | 155 | """ 156 | This dictionary provides the mapping from class ID to display_name for networks 157 | trained on MSCOCO. 158 | """ 159 | MSCOCO_LABELMAP = { 160 | 1: "person", 161 | 2: "bicycle", 162 | 3: "car", 163 | 4: "motorcycle", 164 | 5: "airplane", 165 | 6: "bus", 166 | 7: "train", 167 | 8: "truck", 168 | 9: "boat", 169 | 10: "traffic light", 170 | 11: "fire hydrant", 171 | 13: "stop sign", 172 | 14: "parking meter", 173 | 15: "bench", 174 | 16: "bird", 175 | 17: "cat", 176 | 18: "dog", 177 | 19: "horse", 178 | 20: "sheep", 179 | 21: "cow", 180 | 22: "elephant", 181 | 23: "bear", 182 | 24: "zebra", 183 | 25: "giraffe", 184 | 27: "backpack", 185 | 28: "umbrella", 186 | 31: "handbag", 187 | 32: "tie", 188 | 33: "suitcase", 189 | 34: "frisbee", 190 | 35: "skis", 191 | 36: "snowboard", 192 | 37: "sports ball", 193 | 38: "kite", 194 | 39: "baseball bat", 195 | 40: "baseball glove", 196 | 41: "skateboard", 197 | 42: "surfboard", 198 | 43: "tennis racket", 199 | 44: "bottle", 200 | 46: "wine glass", 201 | 47: "cup", 202 | 48: "fork", 203 | 49: "knife", 204 | 50: "spoon", 205 | 51: "bowl", 206 | 52: "banana", 207 | 53: "apple", 208 | 54: "sandwich", 209 | 55: "orange", 210 | 56: "broccoli", 211 | 57: "carrot", 212 | 58: "hot dog", 213 | 59: "pizza", 214 | 60: "donut", 215 | 61: "cake", 216 | 62: "chair", 217 | 63: "couch", 218 | 64: "potted plant", 219 | 65: "bed", 220 | 67: "dining table", 221 | 70: "toilet", 222 | 72: "tv", 223 | 73: "laptop", 224 | 74: "mouse", 225 | 75: "remote", 226 | 76: "keyboard", 227 | 77: "cell phone", 228 | 78: "microwave", 229 | 79: "oven", 230 | 80: "toaster", 231 | 81: "sink", 232 | 82: "refrigerator", 233 | 84: "book", 234 | 85: "clock", 235 | 86: "vase", 236 | 87: "scissors", 237 | 88: "teddy bear", 238 | 89: "hair drier", 239 | 90: "toothbrush"} 240 | 241 | """ 242 | This dictionary provides the mapping from class ID to display_name for networks 243 | trained on KITTI. 244 | """ 245 | KITTI_LABELMAP = { 1: "car", 2: "pedestrian" } 246 | -------------------------------------------------------------------------------- /pymotutils/io/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /pymotutils/io/detrac_io.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | This module contains helper functions to read and write dataset structures in a 4 | way that is compatible with the DETRAC-toolkit [1]_. 5 | 6 | .. [1] http://detrac-db.rit.albany.edu/ 7 | """ 8 | import numpy as np 9 | import xml.etree.ElementTree as ElementTree 10 | import os 11 | from six import itervalues 12 | 13 | import pymotutils 14 | 15 | 16 | def read_detections(filename, min_confidence=None): 17 | """Read detection file. 18 | 19 | Parameters 20 | ---------- 21 | filename : str 22 | Path to the detection file. 23 | min_confidence : Optional[float] 24 | A detector confidence threshold. Detections with lower confidence are 25 | disregarded. 26 | 27 | Returns 28 | ------- 29 | Dict[int, List[MonoDetection]] 30 | This function returns a dictionary that maps frame indices to a list 31 | of detections in that frame. 32 | 33 | """ 34 | # format: frame id, bbox (x, y, w, h), confidence 35 | data = np.loadtxt(filename, delimiter=',') 36 | min_frame_idx = int(data[:, 0].min()) 37 | max_frame_idx = int(data[:, 0].max()) 38 | detections = {i: [] for i in range(min_frame_idx, max_frame_idx + 1)} 39 | for row in data: 40 | confidence = row[6] 41 | if min_confidence is not None and confidence < min_confidence: 42 | continue 43 | frame_idx, roi = int(row[0]), row[2:6] 44 | detections[frame_idx].append( 45 | pymotutils.RegionOfInterestDetection(frame_idx, roi, confidence)) 46 | return detections 47 | 48 | 49 | def read_groundtruth(filename): 50 | """Read ground truth data. 51 | 52 | Parameters 53 | ---------- 54 | filename : str 55 | Path to the ground truth file. 56 | 57 | Returns 58 | ------- 59 | dataset.TrackSet 60 | Returns the tracking ground truth. However the ground truth file 61 | contains other useful information, the sensor_data contains only the ROI 62 | [left, top, width, height]. 63 | 64 | """ 65 | if not os.path.isfile(filename): 66 | return pymotutils.TrackSet() 67 | tree = ElementTree.parse(filename) 68 | 69 | ground_truth = pymotutils.TrackSet() 70 | sequence = tree.getroot() 71 | for frame in sequence.iter('frame'): 72 | for target in frame.iter('target'): 73 | frame_idx = int(frame.get('num')) 74 | track_id = int(target.get('id')) 75 | box = target.find('box') 76 | sensor_data = np.asarray([ 77 | float(box.get('left')), 78 | float(box.get('top')), 79 | float(box.get('width')), 80 | float(box.get('height'))]) 81 | 82 | if track_id not in ground_truth.tracks: 83 | ground_truth.create_track(track_id) 84 | ground_truth.tracks[track_id].add( 85 | pymotutils.Detection(frame_idx, sensor_data)) 86 | return ground_truth 87 | 88 | 89 | def write_hypotheses(foldername, sequence_name, track_set, speed=25.0): 90 | """Write track hypotheses (tracking output) to files. 91 | 92 | The DETRAC toolkit expect tracking result in 5 separate files per sequence. 93 | More info here: [1]_. 94 | 95 | .. [1] http://detrac-db.rit.albany.edu/instructions 96 | 97 | Parameters 98 | ---------- 99 | foldername : str 100 | Path to the folder to store CSVs. 101 | sequence_name : str 102 | Name of the current sequence. The DETRAC toolkit expects all results in 103 | a single folder, and uses this name as a prefix to separate sequences. 104 | track_set : dataset.TrackSet 105 | The set of track hypotheses (tracking output), where sensor_data 106 | contains the object's region of interest (ROI). 107 | speed : Optional[float] 108 | Running speed of the tracker in frame per sec (FPS). If not specified, a 109 | a dummy value will be used. 110 | 111 | Returns 112 | ------- 113 | 114 | """ 115 | num_of_frames = track_set.last_frame_idx() 116 | track_array = np.zeros((4, num_of_frames, len(track_set.tracks))) 117 | for (track_idx, track_id) in enumerate(track_set.tracks): 118 | for obj in itervalues(track_set.tracks[track_id].detections): 119 | track_array[:, obj.frame_idx - 1, track_idx] = obj.sensor_data[0:4] 120 | 121 | for i, suffix in enumerate(['_LX.txt', '_LY.txt', '_W.txt', '_H.txt']): 122 | np.savetxt( 123 | os.path.join(foldername, sequence_name + suffix), track_array[i], 124 | fmt='%.3g', delimiter=',') 125 | 126 | np.savetxt( 127 | os.path.join(foldername, sequence_name + '_Speed.txt'), 128 | [speed], fmt='%.5f') 129 | 130 | 131 | def write_groundtruth(): 132 | raise NotImplementedError() 133 | -------------------------------------------------------------------------------- /pymotutils/io/motchallenge_io.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | This module contains helper functions to write dataset structures to a simple 4 | CSV file format that is compatible with the MOT challenge SDK [1]_. 5 | 6 | The MOT challenge is a multiple object tracking benchmark that aims to establish 7 | standardized multiple object tracking evaluation on wide ringe of datasets. 8 | 9 | .. [1] https://motchallenge.net/ 10 | """ 11 | import os 12 | import numpy as np 13 | 14 | import pymotutils 15 | 16 | 17 | def read_detections(filename, min_confidence=None): 18 | """Read detection file. 19 | 20 | Parameters 21 | ---------- 22 | filename : str 23 | Path to the detection file. 24 | min_confidence : Optional[float] 25 | A detector confidence threshold. Detections with lower confidence are 26 | disregarded. 27 | 28 | Returns 29 | ------- 30 | Dict[int, List[MonoDetection]] 31 | This function returns a dictionary that maps frame indices to a list 32 | of detections in that frame. If the detection file contains 3D 33 | positions, these will be used as sensor_data. Otherwise, the sensor_data 34 | is set to the detection's region of interest (ROI). 35 | 36 | """ 37 | # format: frame id, track id, bbox (x, y, w, h), confidence, world (x, y, z) 38 | # track id is always -1 39 | data = np.loadtxt(filename, delimiter=',') 40 | has_threed = np.any(data[:, 7:10] != -1) 41 | min_frame_idx = int(data[:, 0].min()) 42 | max_frame_idx = int(data[:, 0].max()) 43 | detections = {i: [] for i in range(min_frame_idx, max_frame_idx + 1)} 44 | for row in data: 45 | confidence = row[6] 46 | if min_confidence is not None and row[6] < min_confidence: 47 | continue 48 | frame_idx, roi = int(row[0]), row[2:6] 49 | xyz = row[7:10] if has_threed else None 50 | detections[frame_idx].append( 51 | pymotutils.RegionOfInterestDetection( 52 | frame_idx, roi, confidence, xyz=xyz)) 53 | return detections 54 | 55 | 56 | def read_groundtruth(filename, sensor_data_is_3d=False): 57 | """Read ground truth file. 58 | 59 | Parameters 60 | ---------- 61 | filename : str 62 | Path to the ground truth file. 63 | sensor_data_is_3d : bool 64 | If True, the ground truth's sensor data is set to the 3D position. 65 | If False, the ground truth's sensor data is set to the region of 66 | interest (ROI). 67 | 68 | Note that not all of the sequences provided by the MOT challenge contain 69 | valid 3D positions. 70 | 71 | Returns 72 | ------- 73 | TrackSet 74 | Returns the tracking ground truth. If sensor_data_is_3d is True, the 75 | sensor data contains the 3D position. Otherwise, sensor_data 76 | is set to the region of interest (ROI). 77 | 78 | """ 79 | # format: frame id, track id, bbox (x, y, w, h), care_flag, world (x, y, z) 80 | if not os.path.isfile(filename): 81 | return pymotutils.TrackSet() 82 | data = np.loadtxt(filename, delimiter=',') 83 | 84 | has_threed = np.any(data[:, 7:10] != -1) 85 | if sensor_data_is_3d and not has_threed: 86 | raise RuntimeError("File does not contain valid 3D coordinates") 87 | 88 | ground_truth = pymotutils.TrackSet() 89 | for row in data: 90 | frame_idx, track_id = int(row[0]), int(row[1]) 91 | do_not_care = row[6] == 0 92 | if sensor_data_is_3d: 93 | sensor_data = row[7:10] 94 | else: 95 | sensor_data = row[2:6] 96 | if track_id not in ground_truth.tracks: 97 | ground_truth.create_track(track_id) 98 | ground_truth.tracks[track_id].add( 99 | pymotutils.Detection(frame_idx, sensor_data, do_not_care)) 100 | return ground_truth 101 | 102 | 103 | def write_hypotheses(filename, track_set_2d=None, track_set_3d=None): 104 | """Write track hypotheses (tracking output) to file. 105 | 106 | This function supports writing of track hypotheses files compatible with 107 | 2D or 3D evaluation, or both. In the 2D case, the track set should contain 108 | the image region of interest (ROI). In the 3D case, the track set should 109 | contain the 3D position in the tracking frame. 110 | 111 | Note that the MOT challenge devkit requires that sequences startat index 1. 112 | This function will automatically correct the index accordingly. 113 | 114 | Parameters 115 | ---------- 116 | filename : str 117 | Name of the file to write to (file format will be CSV). 118 | track_set_2d : Optional[TrackSet] 119 | The set of track hypotheses (tracking output), where sensor_data 120 | contains the object's region of interest (ROI). 121 | track_set_3d : Optional[TrackSet] 122 | The set of track hypotheses (tracking output), where sensor_data 123 | contains the object's 3D position. 124 | 125 | """ 126 | ref_set = track_set_2d if track_set_2d is not None else track_set_3d 127 | offset = 1 - ref_set.first_frame_idx() 128 | 129 | csvfile = open(filename, "w") 130 | for frame_idx in ref_set.frame_range(): 131 | if track_set_2d is not None: 132 | data_2d = track_set_2d.collect_sensor_data(frame_idx) 133 | else: 134 | data_2d = {} 135 | if track_set_3d is not None: 136 | data_3d = track_set_3d.collect_sensor_data(frame_idx) 137 | else: 138 | data_3d = {} 139 | track_ids = set(data_2d.keys()) | set(data_3d.keys()) 140 | for track_id in track_ids: 141 | if track_id in data_2d: 142 | bbox = list(data_2d[track_id]) 143 | else: 144 | bbox = [-1, -1, -1, -1] 145 | if track_id in data_3d: 146 | world = list(data_3d[track_id]) 147 | else: 148 | world = [-1, -1, -1] 149 | row = [frame_idx + offset, track_id] + bbox + [-1] + world 150 | csvfile.writelines(",".join(str(x) for x in row) + os.linesep) 151 | csvfile.close() 152 | 153 | 154 | def write_groundtruth(filename, track_set_2d=None, track_set_3d=None): 155 | """Write ground truth data to file. 156 | 157 | This function supports writing of ground truth files compatible with 158 | 2D or 3D evaluation, or both. In the 2D case, the track set should contain 159 | the image region of interest (ROI). In the 3D case, the track set should 160 | contain the 3D position in the tracking frame. 161 | 162 | Note that the MOT challenge devkit requires that sequences startat index 1. 163 | This function will automatically correct the index accordingly. 164 | 165 | Parameters 166 | ---------- 167 | filename : str 168 | Name of the file to write to (file format will be CSV) 169 | track_set_2d : Optional[TrackSet] 170 | The set of ground truth tracks, where sensor_data contains the 171 | image region of interest (ROI). 172 | track_set_3d : Optional[TrackSet] 173 | The set of ground truth tracks, where sensor_data contains the 174 | objects' 3D position. 175 | 176 | """ 177 | ref_set = track_set_2d if track_set_2d is not None else track_set_3d 178 | offset = 1 - ref_set.first_frame_idx() 179 | 180 | csvfile = open(filename, "w") 181 | for frame_idx in ref_set.frame_range(): 182 | if track_set_2d is not None: 183 | data_2d = track_set_2d.collect_detections(frame_idx) 184 | else: 185 | data_2d = {} 186 | if track_set_3d is not None: 187 | data_3d = track_set_3d.collect_detections(frame_idx) 188 | else: 189 | data_3d = {} 190 | track_ids = set(data_2d.keys()) | set(data_3d.keys()) 191 | for track_id in track_ids: 192 | care_flag = True 193 | if track_id in data_2d: 194 | bbox = list(data_2d[track_id].sensor_data) 195 | care_flag = care_flag and not data_2d[track_id].do_not_care 196 | else: 197 | bbox = [-1, -1, -1, -1] 198 | if track_id in data_3d: 199 | world = list(data_3d[track_id].sensor_data) 200 | care_flag = care_flag and not data_3d[track_id].do_not_care 201 | else: 202 | world = [-1, -1, -1] 203 | 204 | care_int = 1 if care_flag else 0 205 | row = [frame_idx + offset, track_id] + bbox + [care_int] + world 206 | csvfile.writelines(",".join(str(x) for x in row) + os.linesep) 207 | csvfile.close() 208 | -------------------------------------------------------------------------------- /pymotutils/io/pymot_io.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | This module contains helper functions to write dataset structures to a JSON 4 | file format that is compatible with pymot [1]_. 5 | 6 | Pymot is an open source tool for evaluation of multiple object tracking 7 | performance using CLEAR MOT metrics. 8 | 9 | .. [1] https://github.com/Videmo/pymot 10 | """ 11 | import json 12 | import pymotutils 13 | 14 | 15 | def write_groundtruth(filename, track_set): 16 | """Write ground truth data to file. 17 | 18 | It is assumed that the sensor data contained in each of the tracks 19 | is a region of interest (x, y, width, height). 20 | 21 | Parameters 22 | ---------- 23 | filename : str 24 | Name of the file to write to (file format will be JSON). 25 | track_set : TrackSet 26 | The set of ground truth tracks. 27 | 28 | """ 29 | assert isinstance( 30 | track_set, pymotutils.TrackSet), "track_set is of wrong type" 31 | 32 | output = {"frames": []} 33 | for frame_id in track_set.frame_range(): 34 | frame = dict() 35 | frame["timestamp"] = frame_id 36 | frame["class"] = "frame" 37 | frame["annotations"] = [] 38 | 39 | detections = track_set.collect_detections(frame_id) 40 | for tag, detection in detections.items(): 41 | annotation = dict() 42 | annotation["dco"] = bool(detection.do_not_care) 43 | annotation["x"] = float(detection.sensor_data[0]) 44 | annotation["y"] = float(detection.sensor_data[1]) 45 | annotation["width"] = float(detection.sensor_data[2]) 46 | annotation["height"] = float(detection.sensor_data[3]) 47 | annotation["id"] = tag 48 | frame["annotations"].append(annotation) 49 | output["frames"].append(frame) 50 | output["class"] = "video" 51 | 52 | with open(filename, "w") as f: 53 | json.dump([output], f, indent=4, sort_keys=True) 54 | 55 | 56 | def write_hypotheses(filename, track_set): 57 | """Write track hypotheses (tracking output) to file. 58 | 59 | It is assumed that the sensor data contained in each of the tracks 60 | is a region of interest (x, y, width, height). 61 | 62 | Parameters 63 | ---------- 64 | filename : str 65 | Name of the file to write to (file format will be JSON). 66 | track_set : TrackSet 67 | The set of track hypotheses (tracking output). 68 | 69 | """ 70 | assert isinstance( 71 | track_set, pymotutils.TrackSet), "track_set is of wrong type" 72 | 73 | output = {"frames": []} 74 | for frame_id in track_set.frame_range(): 75 | frame = dict() 76 | frame["timestamp"] = frame_id 77 | frame["class"] = "frame" 78 | frame["hypotheses"] = [] 79 | 80 | detections = track_set.collect_detections(frame_id) 81 | for tag, detection in detections.items(): 82 | hypothesis = dict() 83 | hypothesis["x"] = float(detection.sensor_data[0]) 84 | hypothesis["y"] = float(detection.sensor_data[1]) 85 | hypothesis["width"] = float(detection.sensor_data[2]) 86 | hypothesis["height"] = float(detection.sensor_data[3]) 87 | hypothesis["id"] = tag 88 | frame["hypotheses"].append(hypothesis) 89 | output["frames"].append(frame) 90 | output["class"] = "video" 91 | 92 | with open(filename, "w") as f: 93 | json.dump([output], f, indent=4, sort_keys=True) 94 | -------------------------------------------------------------------------------- /pymotutils/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /pymotutils/visualization/opencv.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | """ 3 | This module contains an image viewer and drawing routines based on OpenCV. 4 | """ 5 | import numpy as np 6 | import cv2 7 | import time 8 | import pymotutils 9 | 10 | 11 | def is_in_bounds(mat, roi): 12 | """Check if ROI is fully contained in the image. 13 | 14 | Parameters 15 | ---------- 16 | mat : ndarray 17 | An ndarray of ndim>=2. 18 | roi : (int, int, int, int) 19 | Region of interest (x, y, width, height) where (x, y) is the top-left 20 | corner. 21 | 22 | Returns 23 | ------- 24 | bool 25 | Returns true if the ROI is contain in mat. 26 | 27 | """ 28 | if roi[0] < 0 or roi[0] + roi[2] >= mat.shape[1]: 29 | return False 30 | if roi[1] < 0 or roi[1] + roi[3] >= mat.shape[0]: 31 | return False 32 | return True 33 | 34 | 35 | def view_roi(mat, roi): 36 | """Get sub-array. 37 | 38 | The ROI must be valid, i.e., fully contained in the image. 39 | 40 | Parameters 41 | ---------- 42 | mat : ndarray 43 | An ndarray of ndim=2 or ndim=3. 44 | roi : (int, int, int, int) 45 | Region of interest (x, y, width, height) where (x, y) is the top-left 46 | corner. 47 | 48 | Returns 49 | ------- 50 | ndarray 51 | A view of the roi. 52 | 53 | """ 54 | sx, ex = roi[0], roi[0] + roi[2] 55 | sy, ey = roi[1], roi[1] + roi[3] 56 | if mat.ndim == 2: 57 | return mat[sy:ey, sx:ex] 58 | else: 59 | return mat[sy:ey, sx:ex, :] 60 | 61 | 62 | def copy_to(src, dst_roi, dst): 63 | """ Copy src to dst[roi]. 64 | 65 | The dst_roi must be fully contained in dst and of the same shape as 66 | the src image. 67 | 68 | Parameters 69 | ---------- 70 | src : ndarray 71 | Image that should be copied (ndim=2 or ndim=3). 72 | dst_roi : (int, int, int, int) 73 | Region of interest (x, y, width, height) where (x, y) is the top-left 74 | corner. 75 | dst : ndarray 76 | The target image of same ndim as src. 77 | 78 | """ 79 | sx, ex = dst_roi[0], dst_roi[0] + dst_roi[2] 80 | sy, ey = dst_roi[1], dst_roi[1] + dst_roi[3] 81 | if dst.ndim == 2: 82 | dst[sy:ey, sx:ex] = src 83 | else: 84 | dst[sy:ey, sx:ex, :] = src 85 | 86 | 87 | class ImageViewer(object): 88 | """An image viewer with drawing routines and video capture capabilities. 89 | 90 | Key Bindings: 91 | 92 | * 'SPACE' : pause 93 | * 'ESC' : quit 94 | 95 | Parameters 96 | ---------- 97 | update_ms : int 98 | Number of milliseconds between frames (1000 / frames per second). 99 | window_shape : (int, int) 100 | Shape of the window (width, height). 101 | caption : Optional[str] 102 | Title of the window. 103 | 104 | Attributes 105 | ---------- 106 | image : ndarray 107 | Color image of shape (height, width, 3). You may directly manipulate 108 | this image to change the view. Otherwise, you may call any of the 109 | drawing routines of this class. Internally, the image is treated as 110 | beeing in BGR color space. 111 | 112 | Note that the image is resized to the the image viewers window_shape 113 | just prior to visualization. Therefore, you may pass differently sized 114 | images and call drawing routines with the appropriate, original point 115 | coordinates. 116 | color : (int, int, int) 117 | Current BGR color code that applies to all drawing routines. 118 | Values are in range [0-255]. 119 | text_color : (int, int, int) 120 | Current BGR text color code that applies to all text rendering routines. 121 | Values are in range [0-255]. 122 | thickness : int 123 | Stroke width in pixels that applies to all drawing routines. 124 | 125 | """ 126 | 127 | def __init__(self, update_ms, window_shape=(640, 480), caption="Figure 1"): 128 | self._window_shape = window_shape 129 | self._caption = caption 130 | self._update_ms = update_ms 131 | self._video_writer = None 132 | self._user_fun = lambda: None 133 | self._keypress_fun = lambda key: None 134 | self._terminate = False 135 | 136 | self.image = np.zeros(self._window_shape + (3, ), dtype=np.uint8) 137 | self._color = (0, 0, 0) 138 | self.text_color = (255, 255, 255) 139 | self.thickness = 1 140 | 141 | @property 142 | def color(self): 143 | return self._color 144 | 145 | @color.setter 146 | def color(self, value): 147 | if len(value) != 3: 148 | raise ValueError("color must be tuple of 3") 149 | self._color = tuple(int(c) for c in value) 150 | 151 | def rectangle(self, x, y, w, h, label=None, alpha=None): 152 | """Draw a rectangle. 153 | 154 | Parameters 155 | ---------- 156 | x : float | int 157 | Top left corner of the rectangle (x-axis). 158 | y : float | int 159 | Top let corner of the rectangle (y-axis). 160 | w : float | int 161 | Width of the rectangle. 162 | h : float | int 163 | Height of the rectangle. 164 | label : Optional[str] 165 | A text label that is placed at the top left corner of the rectangle. 166 | alpha : Optional[float] 167 | Transparency between 0 and 1. 168 | 169 | """ 170 | if alpha is None: 171 | pt1 = int(x), int(y) 172 | pt2 = int(x + w), int(y + h) 173 | cv2.rectangle(self.image, pt1, pt2, self._color, self.thickness) 174 | if label is not None: 175 | text_size = cv2.getTextSize( 176 | label, cv2.FONT_HERSHEY_PLAIN, 1, self.thickness) 177 | 178 | center = pt1[0] + 5, pt1[1] + 5 + text_size[0][1] 179 | pt2 = ( 180 | pt1[0] + 10 + text_size[0][0], 181 | pt1[1] + 10 + text_size[0][1]) 182 | cv2.rectangle(self.image, pt1, pt2, self._color, -1) 183 | cv2.putText( 184 | self.image, label, center, cv2.FONT_HERSHEY_PLAIN, 1, 185 | self.text_color, self.thickness) 186 | return 187 | 188 | padding = max(0, self.thickness) 189 | roi = ( 190 | int(x - padding), int(y - padding), int(w + 2. * padding), 191 | int(h + 2 * padding)) 192 | if not is_in_bounds(self.image, roi): 193 | return 194 | 195 | image_roi = view_roi(self.image, roi) 196 | image = image_roi.copy() 197 | 198 | pt1 = int(padding), int(padding) 199 | pt2 = int(padding + w), int(padding + h) 200 | cv2.rectangle(image, pt1, pt2, self._color, self.thickness) 201 | if label is not None: 202 | text_size = cv2.getTextSize( 203 | label, cv2.FONT_HERSHEY_PLAIN, 1, self.thickness) 204 | 205 | center = pt1[0] + 5, pt1[1] + 5 + text_size[0][1] 206 | pt2 = pt1[0] + 10 + text_size[0][0], pt1[1] + 10 + text_size[0][1] 207 | cv2.rectangle(image, pt1, pt2, self._color, -1) 208 | cv2.putText( 209 | image, label, center, cv2.FONT_HERSHEY_PLAIN, 1, 210 | (255, 255, 255), self.thickness) 211 | 212 | blended = cv2.addWeighted(image, alpha, image_roi, 1. - alpha, 0) 213 | copy_to(blended, roi, self.image) 214 | 215 | def circle(self, x, y, radius, label=None, alpha=None): 216 | """Draw a circle. 217 | 218 | Parameters 219 | ---------- 220 | x : float | int 221 | Center of the circle (x-axis). 222 | y : float | int 223 | Center of the circle (y-axis). 224 | radius : float | int 225 | Radius of the circle in pixels. 226 | label : Optional[str] 227 | A text label that is placed at the center of the circle. 228 | alpha : Optional[float] 229 | Transparency between 0 and 1. 230 | 231 | """ 232 | image_size = int(radius + self.thickness + 1.5) # actually half size 233 | roi = ( 234 | int(x - image_size), int(y - image_size), int(2 * image_size), 235 | int(2 * image_size)) 236 | if not is_in_bounds(self.image, roi): 237 | return 238 | 239 | image_roi = view_roi(self.image, roi) 240 | image = image_roi if alpha is None else image_roi.copy() 241 | 242 | center = image.shape[1] // 2, image.shape[0] // 2 243 | cv2.circle( 244 | image, center, int(radius + .5), self._color, self.thickness) 245 | if label is not None: 246 | cv2.putText( 247 | self.image, label, center, cv2.FONT_HERSHEY_PLAIN, 2, 248 | self.text_color, 2) 249 | 250 | if alpha is not None: 251 | blended = cv2.addWeighted(image, alpha, image_roi, 1. - alpha, 0) 252 | copy_to(blended, roi, self.image) 253 | 254 | def arrow(self, start, end): 255 | """Draw arrow from start to end. 256 | 257 | Parameters 258 | ---------- 259 | start : array_like 260 | Vector of length 2 which contains the arrow starting position. 261 | end : array_like 262 | Vector of length 2 which contains the arrow end position. 263 | 264 | """ 265 | start = tuple(int(x) for x in start) 266 | end = tuple(int(x) for x in end) 267 | cv2.arrowedLine(self.image, start, end, self.color, self.thickness) 268 | 269 | def gaussian(self, mean, covariance, alpha=None, label=None): 270 | """Draw 95% confidence ellipse of a 2-D Gaussian distribution. 271 | 272 | Parameters 273 | ---------- 274 | mean : array_like 275 | The mean vector of the Gaussian distribution (ndim=1). 276 | covariance : array_like 277 | The 2x2 covariance matrix of the Gaussian distribution. 278 | label : Optional[str] 279 | A text label that is placed at the center of the ellipse. 280 | alpha : Optional[float] 281 | Transparency between 0 and 1. 282 | 283 | """ 284 | # chi2inv(0.95, 2) = 5.9915 285 | vals, vecs = np.linalg.eigh(5.9915 * covariance) 286 | indices = vals.argsort()[::-1] 287 | vals, vecs = np.sqrt(vals[indices]), vecs[:, indices] 288 | vals = np.clip(vals, 0, np.max(self.image.shape)) 289 | 290 | if alpha is None: 291 | center = int(mean[0] + .5), int(mean[1] + .5) 292 | axes = int(vals[0] + .5), int(vals[1] + .5) 293 | angle = 180. * np.arctan2(vecs[1, 0], vecs[0, 0]) / np.pi 294 | cv2.ellipse( 295 | self.image, center, axes, angle, 0, 360, self._color, 2) 296 | if label is not None: 297 | cv2.putText( 298 | self.image, label, center, cv2.FONT_HERSHEY_PLAIN, 2, 299 | self.text_color, 2) 300 | return 301 | 302 | padding = max(0, self.thickness) 303 | mini, maxi = mean - vals - padding, mean + vals + padding 304 | roi = tuple(mini.astype(int)) + tuple((maxi - mini + 1.).astype(int)) 305 | if not is_in_bounds(self.image, roi): 306 | return 307 | 308 | image_roi = view_roi(self.image, roi) 309 | image = image_roi.copy() 310 | 311 | center = tuple((mean - mini).astype(int)) 312 | axes = int(vals[0] + .5), int(vals[1] + .5) 313 | angle = int(180. * np.arctan2(vecs[1, 0], vecs[0, 0]) / np.pi) 314 | cv2.ellipse(image, center, axes, angle, 0, 360, self._color, 2) 315 | if label is not None: 316 | cv2.putText( 317 | image, label, center, cv2.FONT_HERSHEY_PLAIN, 2, 318 | self.text_color, 2) 319 | 320 | blended = cv2.addWeighted(image, alpha, image_roi, 1. - alpha, 0) 321 | copy_to(blended, roi, self.image) 322 | 323 | def annotate(self, x, y, text): 324 | """Draws a text string at a given location. 325 | 326 | Parameters 327 | ---------- 328 | x : int | float 329 | Bottom-left corner of the text in the image (x-axis). 330 | y : int | float 331 | Bottom-left corner of the text in the image (y-axis). 332 | text : str 333 | The text to be drawn. 334 | 335 | """ 336 | cv2.putText( 337 | self.image, text, (int(x), int(y)), cv2.FONT_HERSHEY_PLAIN, 2, 338 | self.text_color, 2) 339 | 340 | def colored_points(self, points, colors=None, skip_index_check=False): 341 | """Draw a collection of points. 342 | 343 | The point size is fixed to 1. 344 | 345 | Parameters 346 | ---------- 347 | points : ndarray 348 | The Nx2 array of image locations, where the first dimension is 349 | the x-coordinate and the second dimension is the y-coordinate. 350 | colors : Optional[ndarray] 351 | The Nx3 array of colors (dtype=np.uint8). If None, the current 352 | color attribute is used. 353 | skip_index_check : Optional[bool] 354 | If True, index range checks are skipped. This is faster, but 355 | requires all points to lie within the image dimensions. 356 | 357 | """ 358 | if not skip_index_check: 359 | cond1, cond2 = points[:, 0] >= 0, points[:, 0] < 480 360 | cond3, cond4 = points[:, 1] >= 0, points[:, 1] < 640 361 | indices = np.logical_and.reduce((cond1, cond2, cond3, cond4)) 362 | points = points[indices, :] 363 | if colors is None: 364 | colors = np.repeat(self._color, 365 | len(points)).reshape(3, len(points)).T 366 | indices = (points + .5).astype(np.int) 367 | self.image[indices[:, 1], indices[:, 0], :] = colors 368 | 369 | def polyline(self, points, alpha=None): 370 | """Draw a line 371 | 372 | Parameters 373 | ---------- 374 | points : ndarray 375 | The Nx2 array of image locations, where the first dimension is 376 | the x-coordinate and the second dimension is the y-coordinate. 377 | alpha : Optional[float] 378 | Transparency between 0 and 1. 379 | 380 | Returns 381 | ------- 382 | 383 | """ 384 | if alpha is None: 385 | cv2.polylines( 386 | self.image, [points], False, self._color, self.thickness) 387 | return 388 | 389 | padding = max(0, self.thickness) 390 | x1, y1 = np.amin(points, axis=0) 391 | x2, y2 = np.amax(points, axis=0) 392 | 393 | x = min(self.image.shape[1] - 1 - padding, max(0 + padding, x1)) 394 | y = min(self.image.shape[0] - 1 - padding, max(0 + padding, y1)) 395 | w = min(self.image.shape[1] - 1 - padding, max(0 + padding, x2)) - x 396 | h = min(self.image.shape[0] - 1 - padding, max(0 + padding, y2)) - y 397 | 398 | roi = ( 399 | int(x - padding), int(y - padding), int(w + 2 * padding), 400 | int(h + 2 * padding)) 401 | 402 | if not is_in_bounds(self.image, roi): 403 | return 404 | 405 | image_roi = view_roi(self.image, roi) 406 | image = image_roi.copy() 407 | points = points - roi[:2] 408 | cv2.polylines(image, [points], False, self._color, self.thickness) 409 | 410 | blended = cv2.addWeighted(image, alpha, image_roi, 1. - alpha, 0) 411 | copy_to(blended, roi, self.image) 412 | 413 | def enable_videowriter( 414 | self, output_filename, fourcc_string="MJPG", fps=None): 415 | """ Write images to video file. 416 | 417 | Parameters 418 | ---------- 419 | output_filename : str 420 | Output filename. 421 | fourcc_string : str 422 | The OpenCV FOURCC code that defines the video codec (check OpenCV 423 | documentation for more information). 424 | fps : Optional[float] 425 | Frames per second. If None, configured according to current 426 | parameters. 427 | 428 | """ 429 | fourcc = cv2.VideoWriter_fourcc(*fourcc_string) 430 | if fps is None: 431 | fps = int(1000. / self._update_ms) 432 | self._video_writer = cv2.VideoWriter( 433 | output_filename, fourcc, fps, self._window_shape) 434 | 435 | def disable_videowriter(self): 436 | """ Disable writing videos. 437 | """ 438 | self._video_writer = None 439 | 440 | def run(self, update_fun=None, keypress_fun=None): 441 | """Start the image viewer. 442 | 443 | This method blocks until the user requests to close the window. 444 | 445 | Parameters 446 | ---------- 447 | update_fun : Optional[Callable[] -> None] 448 | An optional callable that is invoked at each frame. May be used 449 | to play an animation/a video sequence. 450 | keypress_fun : Optional[Callable[int] -> None] 451 | An optional callable that is invoked when the user presses a 452 | button. 453 | 454 | """ 455 | if update_fun is not None: 456 | self._user_fun = update_fun 457 | if keypress_fun is not None: 458 | self._keypress_fun = keypress_fun 459 | 460 | self._terminate, is_paused = False, True 461 | print("ImageViewer is paused, press space to start.") 462 | while not self._terminate: 463 | t0 = time.time() 464 | if not is_paused: 465 | self._user_fun() 466 | if self._video_writer is not None: 467 | self._video_writer.write( 468 | cv2.resize(self.image, self._window_shape)) 469 | t1 = time.time() 470 | remaining_time = max(1, int(self._update_ms - 1e3 * (t1 - t0))) 471 | cv2.imshow( 472 | self._caption, cv2.resize(self.image, self._window_shape)) 473 | key = cv2.waitKey(remaining_time) 474 | if key & 255 == 27: # ESC 475 | print("terminating") 476 | self._terminate = True 477 | elif key & 255 == 32: # ' ' 478 | print("toggeling pause: " + str(not is_paused)) 479 | is_paused = not is_paused 480 | elif key & 255 == 115: # 's' 481 | print("stepping") 482 | self._user_fun() 483 | is_paused = True 484 | elif key != -1: 485 | self._keypress_fun(key) 486 | 487 | # Due to a bug in OpenCV we must call imshow after destroying the 488 | # window. This will make the window appear again as soon as waitKey 489 | # is called. 490 | # 491 | # see https://github.com/Itseez/opencv/issues/4535 492 | self.image[:] = 0 493 | cv2.destroyWindow(self._caption) 494 | cv2.waitKey(1) 495 | cv2.imshow(self._caption, self.image) 496 | 497 | def stop(self): 498 | """Stop the control loop. 499 | 500 | After calling this method, the viewer will stop execution before the 501 | next frame and hand over control flow to the user. 502 | 503 | Parameters 504 | ---------- 505 | 506 | """ 507 | self._terminate = True 508 | 509 | 510 | class ImageVisualization(pymotutils.Visualization): 511 | """ 512 | This is an abstract base class for image-based visualization. 513 | It implements a simple control loop based on :class:`ImageViewer`. 514 | 515 | Parameters 516 | ---------- 517 | update_ms : int 518 | Number of milliseconds between frames (1000 / frames per second). 519 | window_shape : (int, int) 520 | Shape of the window (width, height). 521 | caption : Optional[str] 522 | Title of the window. 523 | 524 | Attributes 525 | ---------- 526 | 527 | """ 528 | 529 | def __init__(self, update_ms, window_shape=None, caption="Figure 1"): 530 | self._viewer = ImageViewer(update_ms, window_shape, caption) 531 | self._frame_idx, self._end_idx = None, None 532 | self._user_callback = lambda frame_idx: None 533 | 534 | def enable_videowriter( 535 | self, video_filename, fourcc_string="FMP4", fps=None): 536 | """Write output to video. 537 | 538 | Parameters 539 | ----------- 540 | video_filename : str 541 | Output vidoe filename. 542 | fourcc_string : Optional[str] 543 | The OpenCV fourcc encoding string (see OpenCV docs) 544 | fps : Optional[float] 545 | Frames per second. If None, configured according to current 546 | visualization parameters. 547 | 548 | """ 549 | self._viewer.enable_videowriter(video_filename, fourcc_string, fps) 550 | 551 | def disable_videowriter(self): 552 | """Disable writing videos. 553 | """ 554 | self._viewer.disable_videowriter() 555 | 556 | def run(self, start_idx, end_idx, frame_callback): 557 | self._frame_idx = start_idx 558 | self._end_idx = end_idx 559 | self._user_callback = frame_callback 560 | self._viewer.run(self._next_frame, self.on_keypress) 561 | 562 | def _next_frame(self): 563 | if self._end_idx is not None and self._frame_idx >= self._end_idx: 564 | self._viewer.stop() 565 | return 566 | self._user_callback(self._frame_idx) 567 | self._frame_idx += 1 568 | 569 | def on_keypress(self, key): 570 | """ Callback function for key-press events. 571 | 572 | Parameters 573 | ---------- 574 | key : int 575 | An OpenCV key press code. 576 | 577 | """ 578 | pass 579 | -------------------------------------------------------------------------------- /pymotutils/visualization/util.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import colorsys 3 | 4 | import numpy as np 5 | import cv2 6 | 7 | 8 | def create_unique_color_float(tag, hue_step=0.41): 9 | """Create a unique RGB color code for a given track id (tag). 10 | 11 | The color code is generated in HSV color space by moving along the 12 | hue angle and gradually changing the saturation. 13 | 14 | Parameters 15 | ---------- 16 | tag : int 17 | The unique target identifying tag. 18 | hue_step : float 19 | Difference between two neighboring color codes in HSV space (more 20 | specifically, the distance in hue channel). 21 | 22 | Returns 23 | ------- 24 | (float, float, float) 25 | RGB color code in range [0, 1] 26 | 27 | """ 28 | h, v = (tag * hue_step) % 1, 1. - (int(tag * hue_step) % 4) / 5. 29 | r, g, b = colorsys.hsv_to_rgb(h, 1., v) 30 | return r, g, b 31 | 32 | 33 | def create_unique_color_uchar(tag, hue_step=0.41): 34 | """Create a unique RGB color code for a given track id (tag). 35 | 36 | The color code is generated in HSV color space by moving along the 37 | hue angle and gradually changing the saturation. 38 | 39 | Parameters 40 | ---------- 41 | tag : int 42 | The unique target identifying tag. 43 | hue_step : float 44 | Difference between two neighboring color codes in HSV space (more 45 | specifically, the distance in hue channel). 46 | 47 | Returns 48 | ------- 49 | (int, int, int) 50 | RGB color code in range [0, 255] 51 | 52 | """ 53 | r, g, b = create_unique_color_float(tag, hue_step) 54 | return int(255 * r), int(255 * g), int(255 * b) 55 | 56 | 57 | def apply_heat_map_uchar(values, mini=None, maxi=None): 58 | """Color values by their intensity. 59 | 60 | Applies an HSV color map. 61 | 62 | Parameters 63 | ---------- 64 | values: ndarray 65 | The N dimensional array of intensities (ndim=1). 66 | mini : Optional[float] 67 | The intensity value of minimum saturation (lower bound of color map). 68 | maxi : Optional[float] 69 | The intensity value of maximum saturation (upper bound of color map). 70 | 71 | Returns 72 | ------- 73 | ndarray 74 | The Nx3 shaped array of color codes in range [0, 255]. The dtype is 75 | np.int. 76 | 77 | """ 78 | if len(values) == 0: 79 | return np.zeros((0, ), dtype=np.uint8) 80 | mini, maxi = mini or np.min(values), maxi or np.max(values) 81 | valrange = maxi - mini 82 | if valrange < np.finfo(valrange).eps: 83 | valrange = np.inf 84 | normalized = (255. * (values - mini) / valrange).astype(np.uint8) 85 | colors = cv2.applyColorMap(normalized, cv2.COLORMAP_HSV) 86 | return colors.astype(np.int).reshape(-1, 3) 87 | 88 | 89 | def apply_heat_map_float(values, mini=None, maxi=None, dtype=np.float): 90 | """Color values by their intensity. 91 | 92 | Applies an HSV color map. 93 | 94 | Parameters 95 | ---------- 96 | values: ndarray 97 | The N dimensional array of intensities (ndim=1). 98 | mini : Optional[float] 99 | The intensity value of minimum saturation (lower bound of color map). 100 | maxi : Optional[float] 101 | The intensity value of maximum saturation (upper bound of color map). 102 | dtype: Optional[nd.dtype] 103 | Target numeric type for output array. 104 | 105 | Returns 106 | ------- 107 | ndarray 108 | The Nx3 shaped array of color codes in range [0, 1]. 109 | 110 | """ 111 | if len(values) == 0: 112 | return np.zeros((0, ), dtype=np.float) 113 | heat_map_uchar = apply_heat_map_uchar(values, mini, maxi) 114 | return heat_map_uchar.astype(dtype) / 255. 115 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | import os 3 | 4 | here = os.path.abspath(os.path.dirname(__file__)) 5 | with open(os.path.join(here, "README.md"), encoding="utf-8") as file_handle: 6 | long_description = file_handle.read() 7 | 8 | setuptools.setup( 9 | name="pymotutils", 10 | version="0.1.0", 11 | description="Multiple object tracking utilities", 12 | long_description=long_description, 13 | url="https://github.com/nwojke/pymotutils", 14 | author="Nicolai Wojke", 15 | author_email="nwojke@uni-koblenz.de", 16 | license="GPL3", 17 | classifiers=[ 18 | "Development Status :: 3 - Alpha", 19 | "Intended Audience :: Developers", 20 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 21 | "Programming Language :: Python :: 2.7", 22 | "Programming Language :: Python :: 3.5", 23 | ], 24 | keywords="computer_vision, tracking", 25 | packages=setuptools.find_packages(exclude=["examples"]), 26 | install_requires=[ 27 | "numpy", 28 | "opencv-python>=3.0", 29 | "six", 30 | "scipy", 31 | "scikit_learn", 32 | ], 33 | python_requires=">=2.7,>=3.0" 34 | ) 35 | --------------------------------------------------------------------------------