├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── co3d ├── __init__.py ├── challenge │ ├── README.md │ ├── __init__.py │ ├── blank_predictions_results.py │ ├── co3d_challenge_logo.png │ ├── co3d_submission.py │ ├── data_types.py │ ├── eval_visuals.png │ ├── io.py │ ├── metric_utils.py │ └── utils.py ├── co3d_sha256.json ├── dataset │ ├── __init__.py │ ├── check_checksum.py │ ├── data_types.py │ ├── download_dataset_impl.py │ └── utils.py ├── download_dataset.py ├── links.json └── utils │ ├── dbir_utils.py │ └── evaluate_implicitron_model.py ├── co3d_logo.png ├── examples ├── example_co3d_challenge_submission.py ├── print_co3d_stats.py └── show_co3d_dataset.py ├── grid.gif ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── test_challenge_evaluate.py ├── test_dataset_visualize.py └── test_types.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info/ 2 | **/__pycache__/ -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/co3d/eb51d7583c56ff23dc918d9deafee50f4d8178c3/CHANGELOG.md -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | This Code of Conduct also applies outside the project spaces when there is a 56 | reasonable belief that an individual's behavior may have a negative impact on 57 | the project or its community. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported by contacting the project team at . All 63 | complaints will be reviewed and investigated and will result in a response that 64 | is deemed necessary and appropriate to the circumstances. The project team is 65 | obligated to maintain confidentiality with regard to the reporter of an incident. 66 | Further details of specific enforcement policies may be posted separately. 67 | 68 | Project maintainers who do not follow or enforce the Code of Conduct in good 69 | faith may face temporary or permanent repercussions as determined by other 70 | members of the project's leadership. 71 | 72 | ## Attribution 73 | 74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 76 | 77 | [homepage]: https://www.contributor-covenant.org 78 | 79 | For answers to common questions about this code of conduct, see 80 | https://www.contributor-covenant.org/faq -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Common Objects In 3D 2 | We want to make contributing to this project as easy and transparent as possible. 3 | 4 | ## Pull Requests 5 | We actively welcome your pull requests. 6 | 7 | 1. Fork the repo and create your branch from `main`. 8 | 2. If you've added code that should be tested, add tests. 9 | 3. If you've changed APIs, update the documentation. 10 | 4. Ensure the test suite passes. 11 | 5. Make sure your code lints. 12 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 13 | 14 | ## Contributor License Agreement ("CLA") 15 | In order to accept your pull request, we need you to submit a CLA. You only need 16 | to do this once to work on any of Facebook's open source projects. 17 | 18 | Complete your CLA here: 19 | 20 | ## Issues 21 | We use GitHub issues to track public bugs. Please ensure your description is 22 | clear and has sufficient instructions to be able to reproduce the issue. 23 | 24 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 25 | disclosure of security bugs. In those cases, please go through the process 26 | outlined on that page and do not file a public issue. 27 | 28 | ## Coding Style 29 | * all files are processed with the `black` auto-formatter before pushing, e.g. 30 | ``` 31 | python -m black eval_demo.py 32 | ``` 33 | 34 | ## License 35 | By contributing to Common Objects In 3D, you agree that your contributions will be licensed 36 | under the LICENSE file in the root directory of this source tree. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-NonCommercial 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-NonCommercial 4.0 International Public 58 | License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-NonCommercial 4.0 International Public License ("Public 63 | License"). To the extent this Public License may be interpreted as a 64 | contract, You are granted the Licensed Rights in consideration of Your 65 | acceptance of these terms and conditions, and the Licensor grants You 66 | such rights in consideration of benefits the Licensor receives from 67 | making the Licensed Material available under these terms and 68 | conditions. 69 | 70 | Section 1 -- Definitions. 71 | 72 | a. Adapted Material means material subject to Copyright and Similar 73 | Rights that is derived from or based upon the Licensed Material 74 | and in which the Licensed Material is translated, altered, 75 | arranged, transformed, or otherwise modified in a manner requiring 76 | permission under the Copyright and Similar Rights held by the 77 | Licensor. For purposes of this Public License, where the Licensed 78 | Material is a musical work, performance, or sound recording, 79 | Adapted Material is always produced where the Licensed Material is 80 | synched in timed relation with a moving image. 81 | 82 | b. Adapter's License means the license You apply to Your Copyright 83 | and Similar Rights in Your contributions to Adapted Material in 84 | accordance with the terms and conditions of this Public License. 85 | 86 | c. Copyright and Similar Rights means copyright and/or similar rights 87 | closely related to copyright including, without limitation, 88 | performance, broadcast, sound recording, and Sui Generis Database 89 | Rights, without regard to how the rights are labeled or 90 | categorized. For purposes of this Public License, the rights 91 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 92 | Rights. 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. NonCommercial means not primarily intended for or directed towards 116 | commercial advantage or monetary compensation. For purposes of 117 | this Public License, the exchange of the Licensed Material for 118 | other material subject to Copyright and Similar Rights by digital 119 | file-sharing or similar means is NonCommercial provided there is 120 | no payment of monetary compensation in connection with the 121 | exchange. 122 | 123 | j. Share means to provide material to the public by any means or 124 | process that requires permission under the Licensed Rights, such 125 | as reproduction, public display, public performance, distribution, 126 | dissemination, communication, or importation, and to make material 127 | available to the public including in ways that members of the 128 | public may access the material from a place and at a time 129 | individually chosen by them. 130 | 131 | k. Sui Generis Database Rights means rights other than copyright 132 | resulting from Directive 96/9/EC of the European Parliament and of 133 | the Council of 11 March 1996 on the legal protection of databases, 134 | as amended and/or succeeded, as well as other essentially 135 | equivalent rights anywhere in the world. 136 | 137 | l. You means the individual or entity exercising the Licensed Rights 138 | under this Public License. Your has a corresponding meaning. 139 | 140 | Section 2 -- Scope. 141 | 142 | a. License grant. 143 | 144 | 1. Subject to the terms and conditions of this Public License, 145 | the Licensor hereby grants You a worldwide, royalty-free, 146 | non-sublicensable, non-exclusive, irrevocable license to 147 | exercise the Licensed Rights in the Licensed Material to: 148 | 149 | a. reproduce and Share the Licensed Material, in whole or 150 | in part, for NonCommercial purposes only; and 151 | 152 | b. produce, reproduce, and Share Adapted Material for 153 | NonCommercial purposes only. 154 | 155 | 2. Exceptions and Limitations. For the avoidance of doubt, where 156 | Exceptions and Limitations apply to Your use, this Public 157 | License does not apply, and You do not need to comply with 158 | its terms and conditions. 159 | 160 | 3. Term. The term of this Public License is specified in Section 161 | 6(a). 162 | 163 | 4. Media and formats; technical modifications allowed. The 164 | Licensor authorizes You to exercise the Licensed Rights in 165 | all media and formats whether now known or hereafter created, 166 | and to make technical modifications necessary to do so. The 167 | Licensor waives and/or agrees not to assert any right or 168 | authority to forbid You from making technical modifications 169 | necessary to exercise the Licensed Rights, including 170 | technical modifications necessary to circumvent Effective 171 | Technological Measures. For purposes of this Public License, 172 | simply making modifications authorized by this Section 2(a) 173 | (4) never produces Adapted Material. 174 | 175 | 5. Downstream recipients. 176 | 177 | a. Offer from the Licensor -- Licensed Material. Every 178 | recipient of the Licensed Material automatically 179 | receives an offer from the Licensor to exercise the 180 | Licensed Rights under the terms and conditions of this 181 | Public License. 182 | 183 | b. No downstream restrictions. You may not offer or impose 184 | any additional or different terms or conditions on, or 185 | apply any Effective Technological Measures to, the 186 | Licensed Material if doing so restricts exercise of the 187 | Licensed Rights by any recipient of the Licensed 188 | Material. 189 | 190 | 6. No endorsement. Nothing in this Public License constitutes or 191 | may be construed as permission to assert or imply that You 192 | are, or that Your use of the Licensed Material is, connected 193 | with, or sponsored, endorsed, or granted official status by, 194 | the Licensor or others designated to receive attribution as 195 | provided in Section 3(a)(1)(A)(i). 196 | 197 | b. Other rights. 198 | 199 | 1. Moral rights, such as the right of integrity, are not 200 | licensed under this Public License, nor are publicity, 201 | privacy, and/or other similar personality rights; however, to 202 | the extent possible, the Licensor waives and/or agrees not to 203 | assert any such rights held by the Licensor to the limited 204 | extent necessary to allow You to exercise the Licensed 205 | Rights, but not otherwise. 206 | 207 | 2. Patent and trademark rights are not licensed under this 208 | Public License. 209 | 210 | 3. To the extent possible, the Licensor waives any right to 211 | collect royalties from You for the exercise of the Licensed 212 | Rights, whether directly or through a collecting society 213 | under any voluntary or waivable statutory or compulsory 214 | licensing scheme. In all other cases the Licensor expressly 215 | reserves any right to collect such royalties, including when 216 | the Licensed Material is used other than for NonCommercial 217 | purposes. 218 | 219 | Section 3 -- License Conditions. 220 | 221 | Your exercise of the Licensed Rights is expressly made subject to the 222 | following conditions. 223 | 224 | a. Attribution. 225 | 226 | 1. If You Share the Licensed Material (including in modified 227 | form), You must: 228 | 229 | a. retain the following if it is supplied by the Licensor 230 | with the Licensed Material: 231 | 232 | i. identification of the creator(s) of the Licensed 233 | Material and any others designated to receive 234 | attribution, in any reasonable manner requested by 235 | the Licensor (including by pseudonym if 236 | designated); 237 | 238 | ii. a copyright notice; 239 | 240 | iii. a notice that refers to this Public License; 241 | 242 | iv. a notice that refers to the disclaimer of 243 | warranties; 244 | 245 | v. a URI or hyperlink to the Licensed Material to the 246 | extent reasonably practicable; 247 | 248 | b. indicate if You modified the Licensed Material and 249 | retain an indication of any previous modifications; and 250 | 251 | c. indicate the Licensed Material is licensed under this 252 | Public License, and include the text of, or the URI or 253 | hyperlink to, this Public License. 254 | 255 | 2. You may satisfy the conditions in Section 3(a)(1) in any 256 | reasonable manner based on the medium, means, and context in 257 | which You Share the Licensed Material. For example, it may be 258 | reasonable to satisfy the conditions by providing a URI or 259 | hyperlink to a resource that includes the required 260 | information. 261 | 262 | 3. If requested by the Licensor, You must remove any of the 263 | information required by Section 3(a)(1)(A) to the extent 264 | reasonably practicable. 265 | 266 | 4. If You Share Adapted Material You produce, the Adapter's 267 | License You apply must not prevent recipients of the Adapted 268 | Material from complying with this Public License. 269 | 270 | Section 4 -- Sui Generis Database Rights. 271 | 272 | Where the Licensed Rights include Sui Generis Database Rights that 273 | apply to Your use of the Licensed Material: 274 | 275 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 276 | to extract, reuse, reproduce, and Share all or a substantial 277 | portion of the contents of the database for NonCommercial purposes 278 | only; 279 | 280 | b. if You include all or a substantial portion of the database 281 | contents in a database in which You have Sui Generis Database 282 | Rights, then the database in which You have Sui Generis Database 283 | Rights (but not its individual contents) is Adapted Material; and 284 | 285 | c. You must comply with the conditions in Section 3(a) if You Share 286 | all or a substantial portion of the contents of the database. 287 | 288 | For the avoidance of doubt, this Section 4 supplements and does not 289 | replace Your obligations under this Public License where the Licensed 290 | Rights include other Copyright and Similar Rights. 291 | 292 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 293 | 294 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 295 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 296 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 297 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 298 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 299 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 300 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 301 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 302 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 303 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 304 | 305 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 306 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 307 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 308 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 309 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 310 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 311 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 312 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 313 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 314 | 315 | c. The disclaimer of warranties and limitation of liability provided 316 | above shall be interpreted in a manner that, to the extent 317 | possible, most closely approximates an absolute disclaimer and 318 | waiver of all liability. 319 | 320 | Section 6 -- Term and Termination. 321 | 322 | a. This Public License applies for the term of the Copyright and 323 | Similar Rights licensed here. However, if You fail to comply with 324 | this Public License, then Your rights under this Public License 325 | terminate automatically. 326 | 327 | b. Where Your right to use the Licensed Material has terminated under 328 | Section 6(a), it reinstates: 329 | 330 | 1. automatically as of the date the violation is cured, provided 331 | it is cured within 30 days of Your discovery of the 332 | violation; or 333 | 334 | 2. upon express reinstatement by the Licensor. 335 | 336 | For the avoidance of doubt, this Section 6(b) does not affect any 337 | right the Licensor may have to seek remedies for Your violations 338 | of this Public License. 339 | 340 | c. For the avoidance of doubt, the Licensor may also offer the 341 | Licensed Material under separate terms or conditions or stop 342 | distributing the Licensed Material at any time; however, doing so 343 | will not terminate this Public License. 344 | 345 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 346 | License. 347 | 348 | Section 7 -- Other Terms and Conditions. 349 | 350 | a. The Licensor shall not be bound by any additional or different 351 | terms or conditions communicated by You unless expressly agreed. 352 | 353 | b. Any arrangements, understandings, or agreements regarding the 354 | Licensed Material not stated herein are separate from and 355 | independent of the terms and conditions of this Public License. 356 | 357 | Section 8 -- Interpretation. 358 | 359 | a. For the avoidance of doubt, this Public License does not, and 360 | shall not be interpreted to, reduce, limit, restrict, or impose 361 | conditions on any use of the Licensed Material that could lawfully 362 | be made without permission under this Public License. 363 | 364 | b. To the extent possible, if any provision of this Public License is 365 | deemed unenforceable, it shall be automatically reformed to the 366 | minimum extent necessary to make it enforceable. If the provision 367 | cannot be reformed, it shall be severed from this Public License 368 | without affecting the enforceability of the remaining terms and 369 | conditions. 370 | 371 | c. No term or condition of this Public License will be waived and no 372 | failure to comply consented to unless expressly agreed to by the 373 | Licensor. 374 | 375 | d. Nothing in this Public License constitutes or may be interpreted 376 | as a limitation upon, or waiver of, any privileges and immunities 377 | that apply to the Licensor or You, including from the legal 378 | processes of any jurisdiction or authority. 379 | 380 | ======================================================================= 381 | 382 | Creative Commons is not a party to its public 383 | licenses. Notwithstanding, Creative Commons may elect to apply one of 384 | its public licenses to material it publishes and in those instances 385 | will be considered the “Licensor.” The text of the Creative Commons 386 | public licenses is dedicated to the public domain under the CC0 Public 387 | Domain Dedication. Except for the limited purpose of indicating that 388 | material is shared under a Creative Commons public license or as 389 | otherwise permitted by the Creative Commons policies published at 390 | creativecommons.org/policies, Creative Commons does not authorize the 391 | use of the trademark "Creative Commons" or any other trademark or logo 392 | of Creative Commons without its prior written consent including, 393 | without limitation, in connection with any unauthorized modifications 394 | to any of its public licenses or any other arrangements, 395 | understandings, or agreements concerning use of licensed material. For 396 | the avoidance of doubt, this paragraph does not form part of the 397 | public licenses. 398 | 399 | Creative Commons may be contacted at creativecommons.org. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 |
6 | 7 | CO3Dv2: Common Objects In 3D (version 2) 8 | ======================================== 9 | 10 | This repository contains a set of tools for working with the 2nd version of the Common Objects in 3D (CO3Dv2) dataset. 11 | 12 | The original dataset has been introduced in our ICCV'21 paper: [Common Objects in 3D: Large-Scale Learning and Evaluation of Real-life 3D Category Reconstruction](https://arxiv.org/abs/2109.00512). For accessing the original data, please switch to the `v1` branch of this repository. 13 | 14 |
15 | 16 |
17 | 18 | 19 | ## New features in CO3Dv2 20 | - [Common Objects in 3D Challenge](https://eval.ai/web/challenges/challenge-page/1819/overview) which allows transparent evaluation on a hidden test server - more details in the [challenge README](./co3d/challenge/README.md) 21 | - 2x larger number of sequences, and 4x larger number of frames 22 | - Improved image quality - less blocky artifacts due to better video decoding 23 | - Improved segmentation masks - stable tracking of the main foreground object without jumping to background objects 24 | - Enabled downloading of a smaller single-sequence subset of ~100 sequences consisting only of the sequences used to evalute the many-view single-sequence task 25 | - Dataset files are hosted in 20 GB chunks facilitating more stable downloads 26 | - A novel, more user-friendly, dataset format 27 | - All images within a sequence are cropped to the same height x width 28 | 29 | 30 | ## Download the dataset 31 | The links to all dataset files are present in this repository in `dataset/links.json`. 32 | 33 | 34 | ### Automatic batch-download 35 | We also provide a python script that allows downloading all dataset files at once. 36 | In order to do so, execute the download script: 37 | 38 | ``` 39 | python ./co3d/download_dataset.py --download_folder DOWNLOAD_FOLDER 40 | ``` 41 | 42 | where `DOWNLOAD_FOLDER` is a local target folder for downloading the dataset files. 43 | Make sure to create this folder before commencing the download. 44 | 45 | Size: All zip files of the dataset occupy 5.5 TB of disk-space. 46 | 47 | 48 | ### Single-sequence dataset subset 49 | We also provide a subset of the dataset consisting only of the sequences selected for the many-view single-sequence task where both training and evaluation are commonly conducted on a single image sequence. In order to download this subset add the 50 | `--single_sequence_subset` option to `download_dataset.py`: 51 | 52 | ``` 53 | python ./co3d/download_dataset.py --download_folder DOWNLOAD_FOLDER --single_sequence_subset 54 | ``` 55 | 56 | Size: The single-sequence subset is much smaller than the full dataset and takes 8.9 GB of disk-space. 57 | 58 | 59 | # Common Objects in 3D Challenge 60 |
61 | 62 |
63 | Together with releasing v2 of the dataset, we also organize the Common Objects in 3D Challenge hosted on EvalAI. 64 | Please visit the [challenge website](https://eval.ai/web/challenges/challenge-page/1819/overview) and [challenge README](./co3d/challenge/README.md) for the more information. 65 | 66 | 67 | # Installation 68 | This is a `Python 3` / `PyTorch` codebase. 69 | 1) [Install `PyTorch`.](https://pytorch.org/) 70 | 2) [Install `PyTorch3D`.](https://github.com/facebookresearch/pytorch3d/blob/main/INSTALL.md#2-install-from-a-local-clone) 71 | - Please note that Pytorch3D has to be built from source to enable the Implicitron module 72 | 3) Install the remaining dependencies in `requirements.txt`: 73 | ``` 74 | pip install visdom tqdm requests h5py 75 | ``` 76 | 4) Install the CO3D package itself: `pip install -e .` 77 | 78 | 79 | ## Dependencies 80 | - [`PyTorch`](https://pytorch.org/) 81 | - [`PyTorch3D`](https://github.com/facebookresearch/pytorch3d/blob/main/INSTALL.md#2-install-from-a-local-clone) (built from source) 82 | - [`tqdm`](https://pypi.org/project/tqdm/) 83 | - [`visdom`](https://github.com/facebookresearch/visdom) 84 | - [`requests`](https://docs.python-requests.org/en/master/) 85 | - ['h5py'](http://www.h5py.org/) 86 | 87 | Note that the core data model in `co3d/dataset/data_types.py` is independent of `PyTorch`/`PyTorch3D` and can be imported and used with other machine-learning frameworks. 88 | 89 | 90 | # Getting started 91 | 1. Install dependencies - See [Installation](#installation) above. 92 | 2. Download the dataset [here] to a given root folder `CO3DV2_DATASET_ROOT`. 93 | 3. Set the environment variable `CO3DV2_DATASET_ROOT` to the dataset root: 94 | ```bash 95 | export CO3DV2_DATASET_ROOT="your_dataset_root_folder" 96 | ``` 97 | 4. Run `example_co3d_challenge_submission.py`: 98 | ``` 99 | cd examples 100 | python example_co3d_challenge_submission.py 101 | ``` 102 | Note that `example_co3d_challenge_submission.py` runs an evaluation of a simple depth-based image rendering (DBIR) model on all challenges and sets of the CO3D Challenge. Feel free to extend the script in order to provide your own submission to the CO3D Challenge. 103 | 104 | 105 | # Running tests 106 | Unit tests can be executed with: 107 | ``` 108 | python -m unittest 109 | ``` 110 | 111 | 112 | # Reproducing results 113 | [Implicitron](https://github.com/facebookresearch/pytorch3d/tree/main/projects/implicitron_trainer) is our open-source framework used to train all implicit shape learning methods from the CO3D paper. 114 | Please visit the following link for more details: 115 | https://github.com/facebookresearch/pytorch3d/tree/main/projects/implicitron_trainer 116 | 117 | 118 | # Dataset format 119 | The dataset is organized in the filesystem as follows: 120 | 121 | ``` 122 | CO3DV2_DATASET_ROOT 123 | ├── 124 | │ ├── 125 | │ │ ├── depth_masks 126 | │ │ ├── depths 127 | │ │ ├── images 128 | │ │ ├── masks 129 | │ │ └── pointcloud.ply 130 | │ ├── 131 | │ │ ├── depth_masks 132 | │ │ ├── depths 133 | │ │ ├── images 134 | │ │ ├── masks 135 | │ │ └── pointcloud.ply 136 | │ ├── ... 137 | │ ├── 138 | │ ├── set_lists 139 | │ ├── set_lists_.json 140 | │ ├── set_lists_.json 141 | │ ├── ... 142 | │ ├── set_lists_.json 143 | │ ├── eval_batches 144 | │ │ ├── eval_batches_.json 145 | │ │ ├── eval_batches_.json 146 | │ │ ├── ... 147 | │ │ ├── eval_batches_.json 148 | │ ├── frame_annotations.jgz 149 | │ ├── sequence_annotations.jgz 150 | ├── 151 | ├── ... 152 | ├── 153 | ``` 154 | 155 | The dataset contains sequences named `` from `K` categories with 156 | names ``. Each category comprises sequence folders `/` containing the list of sequence images, depth maps, foreground masks, and valid-depth masks `images`, `depths`, `masks`, and `depth_masks` respectively. Furthermore, `//set_lists/` stores `M` json files `set_lists_.json`, each describing a certain sequence subset. 157 | 158 | Users specify the loaded dataset subset by setting `self.subset_name` to one of the 159 | available subset names ``. 160 | 161 | `frame_annotations.jgz` and `sequence_annotations.jgz` are gzipped json files containing the list of all frames and sequences of the given category stored as lists of `FrameAnnotation` and `SequenceAnnotation` objects respectivelly. 162 | 163 | 164 | ## Set lists 165 | 166 | Each `set_lists_.json` file contains the following dictionary: 167 | ``` 168 | { 169 | "train": [ 170 | (sequence_name: str, frame_number: int, image_path: str), 171 | ... 172 | ], 173 | "val": [ 174 | (sequence_name: str, frame_number: int, image_path: str), 175 | ... 176 | ], 177 | "test": [ 178 | (sequence_name: str, frame_number: int, image_path: str), 179 | ... 180 | ], 181 | } 182 | ``` 183 | defining the list of frames (identified with their `sequence_name` and `frame_number`) in the "train", "val", and "test" subsets of the dataset. 184 | 185 | Note that `frame_number` can be obtained only from `frame_annotations.jgz` and does not necesarrily correspond to the numeric suffix of the corresponding image file name (e.g. a file `//images/frame00005.jpg` can have its frame number set to 20, not 5). 186 | 187 | 188 | ### Available subset names in CO3Dv2 189 | 190 | In CO3DV2, by default, each category contains a _subset_ of the following set lists: 191 | ``` 192 | "set_lists_fewview_test.json" # Few-view task on the "test" sequence set. 193 | "set_lists_fewview_dev.json" # Few-view task on the "dev" sequence set. 194 | "set_lists_manyview_test.json" # Many-view task on the "test" sequence of a category. 195 | "set_lists_manyview_dev_0.json" # Many-view task on the 1st "dev" sequence of a category. 196 | "set_lists_manyview_dev_1.json" # Many-view task on the 2nd "dev" sequence of a category. 197 | ``` 198 | 199 | ## Eval batches 200 | 201 | Each `eval_batches_.json` file contains a list of evaluation examples in the following form: 202 | ``` 203 | [ 204 | [ # batch 1 205 | (sequence_name: str, frame_number: int, image_path: str), 206 | ... 207 | ], 208 | [ # batch 1 209 | (sequence_name: str, frame_number: int, image_path: str), 210 | ... 211 | ], 212 | ] 213 | ``` 214 | Note that the evaluation examples always come from the `"test"` part of the corresponding set list `set_lists_.json`. 215 | 216 | The evaluation task then consists of generating the first image in each batch given the knowledge of the other ones. Hence, the first image in each batch represents the (unseen) target frame, for which only the camera parameters are known, while the rest of the images in the batch are the known source frames whose cameras and colors are given. 217 | 218 | Note that for the Many-view task, where a user is given many known views of a particular sequence and the goal is to generate held-out views from the same sequence, `eval_batches_manyview__.json` contain a single (target) frame per evaluation batch. Users can obtain the known views from the corresponding `"train"` list of frames in the set list `set_lists_manyview__.json`. 219 | 220 | 221 | # PyTorch-independent usage 222 | The core data model in `co3d/dataset/data_types.py` is independent of `PyTorch`/`PyTorch3D` and can be imported and used with other machine-learning frameworks. 223 | 224 | For example, in order to load the per-category frame and sequence annotations users can execute the following code: 225 | ```python 226 | from typing import List 227 | from co3d.dataset.data_types import ( 228 | load_dataclass_jgzip, FrameAnnotation, SequenceAnnotation 229 | ) 230 | category_frame_annotations = load_dataclass_jgzip( 231 | f"{CO3DV2_DATASET_ROOT}/{category_name}/frame_annotations.jgz", List[FrameAnnotation] 232 | ) 233 | category_sequence_annotations = load_dataclass_jgzip( 234 | f"{CO3DV2_DATASET_ROOT}/{category_name}/sequence_annotations.jgz", List[SequenceAnnotation] 235 | ) 236 | ``` 237 | 238 | Furthermore, all challenge-related code under `co3d/challenge` also does not depend on `PyTorch`. 239 | 240 | 241 | # Reference 242 | If you use our dataset, please use the following citation: 243 | ``` 244 | @inproceedings{reizenstein21co3d, 245 | Author = {Reizenstein, Jeremy and Shapovalov, Roman and Henzler, Philipp and Sbordone, Luca and Labatut, Patrick and Novotny, David}, 246 | Booktitle = {International Conference on Computer Vision}, 247 | Title = {Common Objects in 3D: Large-Scale Learning and Evaluation of Real-life 3D Category Reconstruction}, 248 | Year = {2021}, 249 | } 250 | ``` 251 | 252 | 253 | # License 254 | The CO3D codebase is released under the [CC BY-NC 4.0](LICENSE). 255 | 256 | 257 | # Overview video 258 | The following presentation of the dataset was delivered at the Extreme Vision Workshop at CVPR 2021: 259 | [![Overview](https://img.youtube.com/vi/hMx9nzG50xQ/0.jpg)](https://www.youtube.com/watch?v=hMx9nzG50xQ) 260 | -------------------------------------------------------------------------------- /co3d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/co3d/eb51d7583c56ff23dc918d9deafee50f4d8178c3/co3d/__init__.py -------------------------------------------------------------------------------- /co3d/challenge/README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 | 6 | Common Objects in 3D Challenge 7 | ============================== 8 | 9 | The following describes the Common Objects in 3D Challenge (CO3DC). 10 | 11 | # Quick start 12 | See example code for creating a CO3DC submission in: 13 | ``` 14 | /examples/example_co3d_challenge_submission.py 15 | ``` 16 | Note that the codebase also requires the latest PyTorch3D installation. 17 | 18 | After running the evaluation code, please send the produced .hdf5 file to the 19 | EvalAI evaluation server: 20 | 21 | [===> CO3D challenge submission page <===](https://eval.ai/web/challenges/challenge-page/1819/submission) 22 | 23 | Note that submission using the `evalai` CLI interface is strongly encouraged. 24 | 25 | 26 | # CO3D challenge overview 27 | 28 | CO3D challenge evaluates New-view Synthesis methods. 29 | 30 | More specifically, given a set of known "source" views of an object, the goal 31 | is to generate new, previously unobserved, "target" views of the scene. 32 | 33 | The challenge has 2 tracks - _Many-view_, and _Few-view_. 34 | 35 | ### _Many-view_ task 36 | 37 | This is the standard scenario popularized by e.g. NeRF. Given many 38 | (~100) known source views of a scene, the goal is to generate target views 39 | that are relative close to the source ones. 40 | 41 | ### _Few-view_ task 42 | 43 | Here, the goal is the same as in Many-view, with the difference that only 44 | a very small number of source views is known (2-10). Methods are likely to succced 45 | only if they exploit category-centric geometry/appearance prior that can be learned 46 | from the category-centric training data. 47 | 48 | ### CO3Dv2 Dataset 49 | 50 | The CO3Dv2 dataset provides all training and testing data needed for a submission. 51 | Please follow the [main README](https://github.com/facebookresearch/co3d/tree/main#readme) file to downlaod and install the dataset. 52 | 53 | ### Evaluation data 54 | 55 | Each evaluation example contains several source views and a single 56 | target view. For each source view, the corresponding color image, 57 | foreground segmentation mask, and camera parameters are given. 58 | Given this information, the goal is to generate the target view, for which only the 59 | camera parameters are given. 60 | 61 | ### Evaluation metrics 62 | 63 |
PSNR_masked - The main evaluation metric
64 |

65 | The submissions are primarilly evaluated in terms of the color prediction accuracy 66 | measured by evaluation the Peak-signal-to-ratio metric (PSNR) between the predicted 67 | image and the ground truth image masked with the foreground mask (PSNR_masked). 68 |

69 | 70 |

71 | PSNR_masked - 72 | Since we are interested only in reconstructing the objects in the 73 | foreground, the ground truth target view for PSNR_masked consists of the original 74 | ground truth image masked using a foreground mask to produce an image containing 75 | the object of interest with a black background. 76 | Note that this requires each submission to generate renders that only contain the 77 | foreground object with the background regions colored with black color. 78 |

79 | 80 |
Additional image metrics - PSNR_fg and PSNR_full
81 |

82 | Together with PSNR_masked, we also calculate PSNR_full and PSNR_fg. 83 | PSNR_fg restricts the evaluation only to the foreground pixels as defined by the 84 | foreground segmentation mask. 85 | PSNR_full evaluates PSRN between the original unmasked image and the predicted one. 86 |

87 | 88 |
Mask metrics - IoU
89 |

90 | Each submision is also required to generate the a binary mask denoting the 91 | regions of the image that are occupied by the object. The IoU evaluation metric 92 | then computes the Intersection-over-Union between the predicted foreground 93 | region and the ground truth foreground mask. 94 |

95 | 96 |
Depth metrics - depth_abs_fg
97 |

98 | Finally, each submission should produce the render the depth render which comprises 99 | the z-component of each pixel's point location in the camera coordinates. 100 | The rendered depth is compared to the ground truth one using the absolute depth error 101 | metric. Note that the depth is computed only for the foreground pixels as defined 102 | by the ground truth segmentation mask. 103 |

104 | 105 |
106 | 107 |

108 | 109 | Note that if an evaluated method is not able to produce depth maps or segmentation 110 | masks, users are free to generate placeholder depth and mask predictions. Of course, 111 | this will lead to a low performance in depth_abs_fg or IoU. 112 | 113 |

114 | 115 |

116 | The following image illustrates the PSNR and IoU metrics:
117 | evaluation viusalisation> 118 |

119 | 120 | 121 | 122 | 123 | # CO3D challenge software framework 124 | The `co3d` repository contains tooling that allow a simple generation and submission 125 | of challenge entries. 126 | 127 | 128 | ## Submission guide 129 | 1) Install the `co3d` package: 130 | ``` 131 | git clone https://github.com/facebookresearch/co3d 132 | cd co3d 133 | pip install -e . 134 | ``` 135 | 136 | 2) Start by importing the `CO3DSubmission` class and instantiate a submission run. 137 | For example, the following code: 138 | ```python 139 | from co3d.challenge.co3d_submission import CO3DSubmission 140 | output_folder = "./co3d_submission_files" 141 | task = CO3DTask.MANY_VIEW 142 | sequence_set = CO3DSequenceSet.TEST 143 | 144 | submission = CO3DSubmission( 145 | task=task 146 | sequence_set=sequence_set, 147 | output_folder=output_folder, 148 | dataset_root=dataset_root, 149 | ) 150 | ``` 151 | will instantiate a CO3D submission object `submission` that stores (and optionally 152 | evaluates) results of the `manyview` task on the `test` set. All results will be 153 | stored in the `output_folder`. Note that a user has to also specify the local root 154 | folder of the CO3D dataset in `dataset_root`. 155 | 156 | 3) Obtain the dictionary of evaluation examples `eval_batches_map` from `submission`. 157 | ```python 158 | eval_batches_map = submission.get_eval_batches_map() 159 | ``` 160 | here, `eval_batches_map` is a dictionary of the following form: 161 | ``` 162 | {(category: str, subset_name: str): eval_batches} # eval_batches_map 163 | ``` 164 | where `eval_batches` look as follows: 165 | ```python 166 | [ 167 | [ 168 | (sequence_name_0: str, frame_number_0: int), 169 | (sequence_name_0: str, frame_number_1: int), 170 | ... 171 | (sequence_name_0: str, frame_number_M_0: int), 172 | ], 173 | ... 174 | [ 175 | (sequence_name_N: str, frame_number_0: int), 176 | (sequence_name_N: str, frame_number_1: int), 177 | ... 178 | (sequence_name_N: str, frame_number_M_N: int), 179 | ] 180 | ] # eval_batches 181 | ``` 182 | Containing a list of `N` evaluation examples, each consisting of a tuple of 183 | `M_i` frames with numbers `frame_number_j` from a given sequence name `sequence_name_i`. 184 | Note that the mapping between `frame_number` and `sequence_name` to the CO3D data 185 | is stored in the respective `frame_annotations.jgz` and `sequence_annotation.jgz` 186 | files in `/`. 187 | 188 | For the Many-view task (`CO3DTask.MANYVIEW`), each evaluation batch has a single 189 | (`M_i=1`) frame, which is the target evaluation frame. 190 | 191 | For the Few-view task (`CO3DTask.FEWVIEW`), each batch has several frames (`M_i>1`), 192 | where the first frame is the target frame which should be predicted given the knowledge 193 | of the source frames that correspondond oto the 2nd-to-last elements of each batch. 194 | 195 | 196 | 4) Next we iterate over eval_batches, predict new views, and store our predictions 197 | with the `submission` object. 198 | ```python 199 | # iterate over evaluation subsets and categories 200 | for (category, subset_name), eval_batches in eval_batches_map.items(): 201 | 202 | # iterate over all evaluation examples of a given category and subset 203 | for eval_batch in eval_batches: 204 | # parse the evaluation sequence name and target frame number from eval_batch 205 | sequence_name, frame_number = eval_batch[0][:2] 206 | 207 | # `predict_new_view` is a user-defined function which generates 208 | # the test view (corresponding to the first element of the eval batch) 209 | image, depth, mask = predict_new_view(eval_batch, ...) 210 | 211 | # add the render to the submission 212 | submission.add_result( 213 | category=category, 214 | subset_name=subset_name, 215 | sequence_name=sequence_name, 216 | frame_number=frame_number, 217 | image=image, 218 | mask=mask, 219 | depth=depth, 220 | ) 221 | ``` 222 | 223 | 5) Finally, we export the submission object to a hdf5 file that can be uploaded to the 224 | EvalAI server: 225 | ``` 226 | submission.export_results() 227 | ``` 228 | 229 | 6) Submit the resulting hdf5 file to the EvalAI submission server: 230 | 231 | There are two options for submission, with the first option strongly preferred: 232 | 233 | 1. (Preferred, please use this option) Submit the file using evalai command line interface: 234 | 1. Obtain your personal token from your EvalAI profile page: 235 | 236 | Go to https://eval.ai/web/profile, then click on 'Get your Auth Token' button. 237 | Click on "Show Token" and copy-paste the revealed token. 238 | 2. Set the environment variable EVAL_AI_PERSONAL_TOKEN to your personal token. 239 | ```bash 240 | export EVAL_AI_PERSONAL_TOKEN= 241 | ``` 242 | 3. Install EvalAI command line interface: 243 | ```bash 244 | pip install evalai 245 | ``` 246 | 4. Submit the results to the EvalAI evaluation server by calling the 247 | `submit_to_eval_ai()` of your CO3D `submission` object: 248 | ```python 249 | submission.submit_to_eval_ai() 250 | ``` 251 | 252 | 2. (Avoid this option if possible, use only if a) fails.) Submit the file using the EvalAI submission page. 253 | 1. Visit the [===> CO3D challenge submission page <===](https://eval.ai/web/challenges/challenge-page/1819/submission) 254 | 2. Submit the exported HDF5 file using the web interface. -------------------------------------------------------------------------------- /co3d/challenge/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /co3d/challenge/blank_predictions_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | from io import StringIO 9 | import os 10 | import csv 11 | from typing import List, Any 12 | 13 | from .data_types import CO3DTask, CO3DSequenceSet 14 | 15 | 16 | BLANK_PREDICTION_RESULTS = {} 17 | 18 | 19 | def _read_result_csv(s: str): 20 | # with open(fl, "r") as f: 21 | f = StringIO(s) 22 | csvreader = csv.reader(f) 23 | rows = [row for row in csvreader] 24 | rows = rows[1:] 25 | header = rows[0] 26 | data = rows[1:-1] 27 | def _getcol(col_name: str, row: List[Any]) -> Any: 28 | c = row[header.index(col_name)] 29 | try: 30 | return float(c) 31 | except: 32 | return c 33 | parsed = { 34 | (_getcol("Category", r), _getcol("Subset name", r)): { 35 | k: _getcol(k, r) for k in header 36 | } for r in data 37 | } 38 | return parsed 39 | 40 | 41 | CSVs = { 42 | "fewview_dev": """ 43 | Category,Subset name,psnr_masked,psnr_fg,psnr_full_image,depth_abs_fg,iou 44 | apple,fewview_dev,18.40938866633708,6.884780900276403,5.732459292886711,0.48950375965076004,0.0 45 | backpack,fewview_dev,18.375179837644755,11.884768822089297,5.492699127831022,0.580444590643848,0.0 46 | ball,fewview_dev,15.65596825167019,5.697924649467918,5.391241119316918,0.43991856992712286,0.0 47 | banana,fewview_dev,18.461971791362227,6.118058719441003,5.8697287026999625,0.5312080518960041,0.0 48 | baseballbat,fewview_dev,20.451565072548348,6.7702838462526325,6.133595679990426,0.787964382936369,0.0 49 | baseballglove,fewview_dev,15.899123723379235,8.491206359449485,5.952075366998026,0.5044438522210485,0.0 50 | bench,fewview_dev,13.835660454286623,6.1021708060060185,5.338972434739994,0.8728473659927769,0.0 51 | bicycle,fewview_dev,14.85079899106894,7.178515383648441,5.4468849723020165,0.7596495667817377,0.0 52 | book,fewview_dev,13.526778301589218,5.929520397898452,6.1038428839075625,0.7119168557685552,0.0 53 | bottle,fewview_dev,17.756936987543572,7.695879675777415,5.792669536453962,1.1126274259151023,0.0 54 | bowl,fewview_dev,12.117324340446702,3.522034136500667,6.132690804727037,0.548212652825193,0.0 55 | broccoli,fewview_dev,17.60270342882336,8.135587140185267,5.636059385848195,0.48109570750702163,0.0 56 | cake,fewview_dev,14.831394456777907,6.641730746137352,5.778288244687103,0.4713467452914664,0.0 57 | car,fewview_dev,12.199833440326447,6.2695458065545955,5.7147062915561,0.6731242096715442,0.0 58 | carrot,fewview_dev,18.42032666772822,6.336027619876071,5.2655157144357,0.7425826445279987,0.0 59 | cellphone,fewview_dev,18.54815997270957,9.132949039155196,5.920507132031587,0.7256476083461838,0.0 60 | chair,fewview_dev,14.254104990224922,6.8885175096457525,5.42230365019509,0.8701949198272996,0.0 61 | couch,fewview_dev,12.096141908081652,8.498063614467037,6.839693292778098,0.6672055849897333,0.0 62 | cup,fewview_dev,16.30300593190912,6.263725950094426,5.419278138684526,1.109737605178693,0.0 63 | donut,fewview_dev,17.760249549810045,7.19401090262162,5.406775287613137,0.5831024075924244,0.0 64 | frisbee,fewview_dev,13.006974807290442,5.348851057119092,6.081314892526941,0.6282357528069842,0.0 65 | hairdryer,fewview_dev,18.307693246477385,7.653327373043194,5.796698293526376,0.5692578716769887,0.0 66 | handbag,fewview_dev,16.863888776603684,9.668777191048893,5.885582988575421,0.6140305534695657,0.0 67 | hotdog,fewview_dev,16.576000201780598,6.7813353163227275,6.479828364566311,0.5515738226619902,0.0 68 | hydrant,fewview_dev,14.35863704229326,5.557106534568748,5.486735221682155,0.7370800150837736,0.0 69 | keyboard,fewview_dev,18.319239151881423,10.9398173290579,5.471888028766401,0.591969625411462,0.0 70 | kite,fewview_dev,13.759580600059902,6.095096560743659,5.5797533716568335,0.3686704352187232,0.0 71 | laptop,fewview_dev,17.958107529829775,10.58932076091378,5.9870485037655365,0.6760399403943799,0.0 72 | microwave,fewview_dev,12.641232654595555,7.5579894876019935,5.7736075695959785,0.7816656712123962,0.0 73 | motorcycle,fewview_dev,13.902730964332383,7.837737363341203,5.6993349939287,0.8026270041676278,0.0 74 | mouse,fewview_dev,22.139654039699753,11.380540045528843,5.26534717648027,0.6258851366555073,0.0 75 | orange,fewview_dev,16.965398815565717,5.392140191707388,5.868309801114943,0.45518186645635506,0.0 76 | parkingmeter,fewview_dev,17.969579417828633,8.303453741571293,5.550653705252322,2.7703986799279625,0.0 77 | pizza,fewview_dev,14.044388259713267,6.467125499434811,6.349638051827558,0.5445261030741094,0.0 78 | plant,fewview_dev,15.912698636112678,8.209728015160032,5.41847542705161,0.9729385734872266,0.0 79 | remote,fewview_dev,18.901389746835065,8.809855001539868,5.6508358729724995,0.5809070430213752,0.0 80 | sandwich,fewview_dev,14.961081916655587,5.359419050654777,6.486182655727676,0.5273259918746086,0.0 81 | skateboard,fewview_dev,15.12940600031295,6.633805444460857,6.075841409914119,0.5708218125938797,0.0 82 | stopsign,fewview_dev,18.52676122564753,6.61671306856769,5.412139613407474,6.290707304470178,0.0 83 | suitcase,fewview_dev,16.493029339685542,10.757954804495968,6.232275999259873,0.5967537541074001,0.0 84 | teddybear,fewview_dev,12.49373038673622,5.562061567728542,5.8834174182726855,0.6012993745910462,0.0 85 | toaster,fewview_dev,15.590308176317933,8.571510283192422,5.8223530170835565,0.7087675899756055,0.0 86 | toilet,fewview_dev,11.053325723237059,3.745954412389449,5.831752233322646,0.7324808735388084,0.0 87 | toybus,fewview_dev,15.74397288343334,5.87386919966778,5.694742423634763,0.644572040998336,0.0 88 | toyplane,fewview_dev,15.271423476084475,4.920347774565625,5.603913746917713,0.5686183372913356,0.0 89 | toytrain,fewview_dev,19.250492955217194,8.365187557837626,5.5957012947860445,0.6429103676877059,0.0 90 | toytruck,fewview_dev,15.813126824200825,7.367196186168707,5.59287438907558,0.5748745851615271,0.0 91 | tv,fewview_dev,18.455985344741848,11.821412211957313,5.87636504861574,0.6193668766022515,0.0 92 | umbrella,fewview_dev,13.388214509185625,6.669691460242465,5.398996667950242,0.5547154568934756,0.0 93 | vase,fewview_dev,17.385895374160103,7.695607020715037,5.667400967410725,1.0544596567185702,0.0 94 | wineglass,fewview_dev,14.92593215613611,5.489494483032894,5.883318241506832,2.09036588666451,0.0 95 | MEAN,-,16.028754842096472,7.3270142749005025,5.768476753918801,0.8374863237526772,0.0 96 | """, 97 | "fewview_test": """ 98 | Category,Subset name,psnr_masked,psnr_fg,psnr_full_image,depth_abs_fg,iou 99 | apple,fewview_test,18.51983235506069,6.710896207691665,5.622396257710374,0.45868530307683764,0.0 100 | backpack,fewview_test,15.329616295156082,9.704246779430184,6.021398266902823,0.5274631579925675,0.0 101 | ball,fewview_test,16.999140797902346,6.393148333684946,6.167099298585788,0.42074640466733093,0.0 102 | banana,fewview_test,17.20449002482513,6.2347690237546765,5.337301584435467,0.5906480660508107,0.0 103 | baseballbat,fewview_test,20.598735999896142,6.724621984421882,5.929346230877072,0.46383516633969724,0.0 104 | baseballglove,fewview_test,16.250018316676424,8.485414452103313,5.35050821728197,0.5755057054113818,0.0 105 | bench,fewview_test,13.380691505741307,6.217615311139159,5.389882231932645,0.8591881917970259,0.0 106 | bicycle,fewview_test,15.804150486121728,8.539006404409536,7.293404052140095,0.7740858337090635,0.0 107 | book,fewview_test,14.350489743207989,5.356299926470255,5.138131270946916,0.6249600811612394,0.0 108 | bottle,fewview_test,17.257503711230473,7.332068784914889,5.825424785199224,1.0062512850600411,0.0 109 | bowl,fewview_test,12.7586871865527,5.952472495887487,7.350451995400975,0.7734948803009338,0.0 110 | broccoli,fewview_test,17.69069033947863,8.250871950138103,5.718669980890903,0.5437043438960382,0.0 111 | cake,fewview_test,14.809462963950144,6.142164342026519,6.145654847812541,0.45489466623242036,0.0 112 | car,fewview_test,11.914391205648087,6.5335541836879925,5.90360267479956,0.9021454444786102,0.0 113 | carrot,fewview_test,20.060924545297425,6.219697054467009,5.261149123525815,0.7081597814658059,0.0 114 | cellphone,fewview_test,21.520117285013956,10.847631110964242,5.41747877060995,1.0517241006106035,0.0 115 | chair,fewview_test,14.691657730804202,8.959579180137167,6.878377818012938,0.8045192519054911,0.0 116 | couch,fewview_test,11.545670382508696,8.419983656626247,6.902446179473004,0.6761085327114593,0.0 117 | cup,fewview_test,17.79448614165711,6.495705819546957,5.5050360165654855,0.8834131631626546,0.0 118 | donut,fewview_test,18.596152225400257,6.892531195772306,6.240000810567556,0.5443665622620474,0.0 119 | frisbee,fewview_test,14.370690470903668,6.048295011020775,6.136056575421687,0.4830201400666513,0.0 120 | hairdryer,fewview_test,18.47390481689051,7.494774772300304,5.743646634555602,0.5239972887128962,0.0 121 | handbag,fewview_test,13.87987101022776,8.280409779606966,6.572322491579377,0.6866448922525301,0.0 122 | hotdog,fewview_test,18.436410464732152,7.713564800659037,5.859372904290447,0.5873852722036716,0.0 123 | hydrant,fewview_test,14.768617799865435,5.67036284794227,5.71565321761019,0.9328092564314482,0.0 124 | keyboard,fewview_test,18.875163364703024,10.97846088231997,5.392007807994692,0.42114457863505195,0.0 125 | kite,fewview_test,12.882975207164943,6.079375329369365,5.243720977367847,0.571440938913041,0.0 126 | laptop,fewview_test,16.68965246676936,9.765618650745138,6.127183977142236,0.8968296529628422,0.0 127 | microwave,fewview_test,13.859058432153368,8.649172226048128,6.809269971869398,0.8740670698190732,0.0 128 | motorcycle,fewview_test,12.922201328542098,7.659321482648036,5.3469570020173816,0.7923491167407205,0.0 129 | mouse,fewview_test,25.03083236821661,10.870194079196883,5.61381320415904,0.5803283306516662,0.0 130 | orange,fewview_test,17.906264108511905,5.863058031859002,5.902648030774557,0.4927651700044394,0.0 131 | parkingmeter,fewview_test,24.486359595107576,10.777998512312754,4.875545759481984,3.9189161735406275,0.0 132 | pizza,fewview_test,15.25053153218815,6.195657831341678,5.888809317232928,0.5366542850357786,0.0 133 | plant,fewview_test,14.533347345876026,8.213483475587314,5.9657101837783895,0.8745105580745663,0.0 134 | remote,fewview_test,18.685696193857062,9.167126712684974,5.283444994288521,0.5784209284648094,0.0 135 | sandwich,fewview_test,14.954638830523134,5.489779040424508,6.203690658497073,0.582476274688696,0.0 136 | skateboard,fewview_test,18.921604245076754,8.111335322871586,4.540996792864179,0.8144729054641098,0.0 137 | stopsign,fewview_test,20.83021952727707,7.7066182145576425,5.596606825038416,6.195708155269956,0.0 138 | suitcase,fewview_test,14.568523293458965,8.872585021337093,5.526936386940414,0.5437482494754128,0.0 139 | teddybear,fewview_test,13.184137897313038,5.667378086474551,5.638538121962938,0.6289599526865502,0.0 140 | toaster,fewview_test,15.398766247640951,8.138341096517484,6.073562974743127,0.7335666912630792,0.0 141 | toilet,fewview_test,10.138714105703048,3.8756171226863025,5.85450160774978,0.7892172212095283,0.0 142 | toybus,fewview_test,15.925097991923954,6.517829456639026,5.691133527297476,0.6022958688384993,0.0 143 | toyplane,fewview_test,16.703705769834098,5.323541429433026,5.46165954412417,0.5639341931778066,0.0 144 | toytrain,fewview_test,17.859279914562713,7.8933999002371715,5.604032948369101,0.6932112812874591,0.0 145 | toytruck,fewview_test,16.971557700694344,7.745719186191729,5.794916102483104,0.564653671235697,0.0 146 | tv,fewview_test,18.037750946556894,13.741247943038163,8.747561838523023,0.5162819237405952,0.0 147 | umbrella,fewview_test,13.092407842058238,6.756963662911218,5.447907114523638,0.534506784839016,0.0 148 | vase,fewview_test,18.54297573271471,8.090029952142554,5.668374190385807,0.84122947818443,0.0 149 | wineglass,fewview_test,16.386668940524114,5.5524702294978345,5.735686759902533,1.4353355366647544,0.0 150 | MEAN,-,16.463618328111792,7.555333495840728,5.871765271698825,0.8516623875064206,0.0 151 | """, 152 | "manyview_dev": """ 153 | Category,Subset name,psnr_masked,psnr_fg,psnr_full_image,depth_abs_fg,iou 154 | apple,manyview_dev_0,18.264030492114536,8.350223131127144,4.366539721003419,0.4195637484678012,0.0 155 | apple,manyview_dev_1,14.137138507072345,6.6045994842301345,6.240087240624211,0.43567804409070654,0.0 156 | ball,manyview_dev_0,14.673712693605873,6.091306495279248,5.217217027846326,0.35927968102112323,0.0 157 | ball,manyview_dev_1,11.090845071075146,4.64095367064294,2.463653189968876,0.30228020972164427,0.0 158 | bench,manyview_dev_0,13.333540945296608,4.137188797564715,3.844656341335867,0.8008696769825814,0.0 159 | bench,manyview_dev_1,11.474174975542255,3.892151505117967,4.14563643434561,0.8577265682977291,0.0 160 | book,manyview_dev_0,13.964168705937992,5.302433873449493,5.950633752149304,0.668803861808978,0.0 161 | book,manyview_dev_1,12.398406799192342,4.119572830245314,6.039375672561894,0.8608240982086351,0.0 162 | bowl,manyview_dev_0,16.958798002755774,4.9461020198227335,5.578702964374623,0.6690737351712432,0.0 163 | bowl,manyview_dev_1,12.420483353954074,5.756645234213993,6.069489156010504,0.5819949787763078,0.0 164 | broccoli,manyview_dev_0,19.630737300870244,9.406282525085935,6.402535226376115,0.7907156923061898,0.0 165 | broccoli,manyview_dev_1,18.781287064441447,8.09672300742875,4.67134680549106,0.4626196557341922,0.0 166 | cake,manyview_dev_0,14.799043006158593,5.867235047104056,5.7329760554862945,0.5205964759006821,0.0 167 | cake,manyview_dev_1,17.84162321617,9.41822453353167,3.7158681607815254,0.3612821873000541,0.0 168 | donut,manyview_dev_0,19.315033141413654,9.455566547834058,3.910254156226572,0.5413953368124613,0.0 169 | donut,manyview_dev_1,22.26734997183049,10.174649831308487,4.199195894665875,0.5521516658527057,0.0 170 | hydrant,manyview_dev_0,14.599159376924849,5.655154414726878,5.289620369144585,0.9737327772204973,0.0 171 | hydrant,manyview_dev_1,14.544431000855953,5.876377992594626,4.506377178812374,1.0210153410111495,0.0 172 | mouse,manyview_dev_0,22.553107676356586,12.793445604091437,5.927286492328659,0.5816200334131308,0.0 173 | mouse,manyview_dev_1,17.89414321396086,8.956320087603723,7.097351162295129,0.5222896946353802,0.0 174 | orange,manyview_dev_0,13.732343455171254,5.052956697685929,5.679024711561304,0.40213060027513875,0.0 175 | orange,manyview_dev_1,14.71190574360874,4.956667990371484,5.836996460679712,0.43328379232231895,0.0 176 | plant,manyview_dev_0,17.56722473025224,10.851111767732277,6.940102616941581,0.9601928359930311,0.0 177 | plant,manyview_dev_1,18.62091024389777,11.114146143571679,8.919832772445316,0.845715675126882,0.0 178 | remote,manyview_dev_0,12.004470911615606,2.3372367853347664,5.928692360063941,0.6355222400483482,0.0 179 | remote,manyview_dev_1,13.035720177392095,4.368321832863184,3.7645273565115303,0.6257342864206513,0.0 180 | skateboard,manyview_dev_0,14.087374862144243,6.183930758291541,7.7026533167035085,0.7381270587952287,0.0 181 | skateboard,manyview_dev_1,15.24606555170737,6.935641480347134,6.728247832458047,0.6846367731825937,0.0 182 | suitcase,manyview_dev_0,13.819257223346327,5.727869083939035,5.9663188950446795,0.42728104332046707,0.0 183 | suitcase,manyview_dev_1,23.33527836247522,12.70130752964975,5.440617175698944,0.7376517524662343,0.0 184 | teddybear,manyview_dev_0,15.310590723595963,7.5183318102880765,5.187722505560557,0.6132311702409632,0.0 185 | teddybear,manyview_dev_1,19.00287693135702,11.380410989980264,5.372428296399181,0.655451568067443,0.0 186 | toaster,manyview_dev_0,16.09490094737935,7.357336873218335,5.733018822009381,0.6335824697011363,0.0 187 | toaster,manyview_dev_1,13.391233953784758,6.32606222531527,6.035255066975607,0.7543408733149064,0.0 188 | toytrain,manyview_dev_0,14.60365232137707,8.252354438191217,7.28055045581793,0.5177963318470418,0.0 189 | toytrain,manyview_dev_1,20.508004149463403,10.310151926704073,8.745624247957407,0.4164560185628414,0.0 190 | toytruck,manyview_dev_0,18.495843812347488,9.077851138541167,4.742593752879244,0.8234759152694971,0.0 191 | toytruck,manyview_dev_1,12.550467820571148,5.368998580430165,6.689171662380995,0.581289871598415,0.0 192 | vase,manyview_dev_0,18.188943183563104,9.441252383753767,3.3505357321672142,0.7542355580664746,0.0 193 | vase,manyview_dev_1,18.434184156563,9.303826519080554,6.071437833814365,0.9019223769623579,0.0 194 | MEAN,-,16.092061594428568,7.352673089707325,5.58710387189748,0.635639291857879,0.0 195 | """, 196 | "manyview_test": """ 197 | Category,Subset name,psnr_masked,psnr_fg,psnr_full_image,depth_abs_fg,iou 198 | apple,manyview_test_0,16.22478731544839,6.660985912339718,8.662890866941595,0.5735152991789598,0.0 199 | backpack,manyview_test_0,18.664239087697137,12.092836660079621,3.9911394799946835,0.7187691122198704,0.0 200 | ball,manyview_test_0,17.053273275949497,11.47813547143793,5.494760070704971,0.24760313752451854,0.0 201 | banana,manyview_test_0,19.09250116156104,5.624412642679121,4.915562631182255,0.6388887597635459,0.0 202 | baseballbat,manyview_test_0,17.662719299079523,3.56448996833759,6.856655466723437,0.5858372717711078,0.0 203 | baseballglove,manyview_test_0,15.822024491958919,9.008496845518556,4.958078518403922,0.517665349356982,0.0 204 | bench,manyview_test_0,16.177405149477067,5.64144135201049,6.639758049666188,0.9396015318702626,0.0 205 | bicycle,manyview_test_0,18.929300038845177,8.384269505927424,4.978158575183426,0.7192708133061682,0.0 206 | book,manyview_test_0,14.243260388807064,6.680398318324483,5.9082871869853735,0.9097958583065434,0.0 207 | bottle,manyview_test_0,14.627587579689477,5.485474059329347,5.806882899714011,1.2365226740951725,0.0 208 | bowl,manyview_test_0,12.58297015755071,4.721445807873399,6.174942733659999,0.5651215302382757,0.0 209 | broccoli,manyview_test_0,15.348378477682894,9.138928269423888,6.406522886996562,0.46622630548488525,0.0 210 | cake,manyview_test_0,12.406031259153915,9.13497199802905,6.954300602123617,0.7135451548332193,0.0 211 | car,manyview_test_0,10.536444455719398,6.3033794761422826,5.589254154468083,0.6075981188742273,0.0 212 | carrot,manyview_test_0,15.052122330808963,5.001683408210913,6.975324034802911,0.6913476205193215,0.0 213 | cellphone,manyview_test_0,18.548592045129272,5.477199696294225,5.405821575968376,0.8925134146832333,0.0 214 | chair,manyview_test_0,9.288750627933801,5.559044610507649,5.063084903423689,0.5832447059416495,0.0 215 | couch,manyview_test_0,15.542901771081734,10.090205474555033,7.091879909602398,0.530379736402723,0.0 216 | cup,manyview_test_0,14.565042555686277,4.3989084024686305,5.8416712646107225,0.9809843195171222,0.0 217 | donut,manyview_test_0,15.455254561260311,7.186638190791148,6.08943365801032,0.42916104004956795,0.0 218 | frisbee,manyview_test_0,16.030436839496698,8.25580372425949,3.6125508386557295,0.7820506512812717,0.0 219 | hairdryer,manyview_test_0,22.640570140053246,11.702523731191262,4.159711019086314,0.616971255937149,0.0 220 | handbag,manyview_test_0,24.14781075331437,15.091930028917984,5.223221264801334,0.562664145074455,0.0 221 | hotdog,manyview_test_0,12.244917262623947,4.72460505473762,6.9914703226785,0.5147290560374835,0.0 222 | hydrant,manyview_test_0,16.892200853920816,6.5057584631969645,6.307555495359107,0.8690763104982895,0.0 223 | keyboard,manyview_test_0,14.937059706035933,10.816605585432766,4.857196169187754,0.5188802050007122,0.0 224 | kite,manyview_test_0,15.068337896849323,6.205118297721433,5.276287557112783,0.7494832801627337,0.0 225 | laptop,manyview_test_0,14.59345603707514,7.090074167371421,6.2162237610589814,0.7413216109605885,0.0 226 | motorcycle,manyview_test_0,14.442903913583953,8.56222345535462,6.50899995433291,0.7010114811016933,0.0 227 | mouse,manyview_test_0,29.8885518296015,14.145685466149715,5.406173914859613,0.5942925002348606,0.0 228 | orange,manyview_test_0,11.525661011646141,5.745001890928845,5.983235030110308,0.327592487953461,0.0 229 | parkingmeter,manyview_test_0,18.046203929985666,6.461002560728408,5.027716754597319,1.5829406195750064,0.0 230 | pizza,manyview_test_0,15.152783189315754,6.578112135320982,7.482842326935612,0.7078538179251567,0.0 231 | plant,manyview_test_0,20.369369422864448,11.73336728848978,5.490938199184393,0.5563616188902266,0.0 232 | remote,manyview_test_0,21.93996425442841,9.915599775483262,3.2277628694594647,0.8952884887902877,0.0 233 | sandwich,manyview_test_0,14.156122339232516,4.782614236412581,5.172885855269289,0.4726663784145917,0.0 234 | skateboard,manyview_test_0,17.199716318802558,9.3986630162228,6.582697215433262,0.7526901207787688,0.0 235 | suitcase,manyview_test_0,20.5543872349586,15.449636313939182,6.392103915747007,0.5623042520735794,0.0 236 | teddybear,manyview_test_0,15.056483227336162,6.023824258666201,2.385989674021068,0.6859612539860361,0.0 237 | toaster,manyview_test_0,17.538889427176077,10.389092700641873,7.350896986214959,0.6917412312874205,0.0 238 | toilet,manyview_test_0,8.581683038527455,4.304701570881858,5.715072710684154,0.5228074506396895,0.0 239 | toybus,manyview_test_0,13.421701717928093,5.104459961535013,7.832131890256459,0.5177220835646305,0.0 240 | toyplane,manyview_test_0,25.939823270757692,11.015747754038403,5.005751206904976,0.5705696772343116,0.0 241 | toytrain,manyview_test_0,17.831418296523193,7.494011795501741,4.629191510823262,0.6318052729776739,0.0 242 | toytruck,manyview_test_0,20.369297725379987,9.285414438061778,4.844672681479939,0.48828556766453685,0.0 243 | umbrella,manyview_test_0,12.752391495654509,6.657169727823324,2.556125460617257,0.428359657679186,0.0 244 | vase,manyview_test_0,20.277671704818363,6.07655429478755,4.941408622390838,0.8391219139438616,0.0 245 | wineglass,manyview_test_0,19.455250191811363,7.197566433072046,6.442702595780869,3.173690609010777,0.0 246 | MEAN,-,16.64330518875463,7.882212795773946,5.6547484431710435,0.7209548906794958,0.0 247 | """ 248 | } 249 | 250 | 251 | for task in [CO3DTask.FEW_VIEW, CO3DTask.MANY_VIEW]: 252 | for seq_set in [CO3DSequenceSet.DEV, CO3DSequenceSet.TEST]: 253 | BLANK_PREDICTION_RESULTS[(task, seq_set)] = _read_result_csv( 254 | CSVs[f"{task.value}_{seq_set.value}"] 255 | ) 256 | -------------------------------------------------------------------------------- /co3d/challenge/co3d_challenge_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/co3d/eb51d7583c56ff23dc918d9deafee50f4d8178c3/co3d/challenge/co3d_challenge_logo.png -------------------------------------------------------------------------------- /co3d/challenge/data_types.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | from enum import Enum 9 | import numpy as np 10 | from dataclasses import dataclass 11 | from typing import Optional 12 | 13 | 14 | @dataclass 15 | class RGBDAFrame: 16 | image: np.ndarray 17 | mask: np.ndarray 18 | depth: np.ndarray 19 | depth_mask: Optional[np.ndarray] = None 20 | 21 | 22 | class CO3DTask(Enum): 23 | MANY_VIEW = "manyview" 24 | FEW_VIEW = "fewview" 25 | 26 | 27 | class CO3DSequenceSet(Enum): 28 | TRAIN = "train" 29 | DEV = "dev" 30 | TEST = "test" -------------------------------------------------------------------------------- /co3d/challenge/eval_visuals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/co3d/eb51d7583c56ff23dc918d9deafee50f4d8178c3/co3d/challenge/eval_visuals.png -------------------------------------------------------------------------------- /co3d/challenge/io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import os 9 | import json 10 | import logging 11 | import numpy as np 12 | import dbm 13 | import functools 14 | import h5py 15 | 16 | from io import BytesIO 17 | from PIL import Image 18 | from typing import Optional, Callable, Dict, Union 19 | from tqdm import tqdm 20 | from .data_types import CO3DSequenceSet, CO3DTask, RGBDAFrame 21 | 22 | 23 | logger = logging.getLogger(__file__) 24 | 25 | 26 | def store_rgbda_frame(rgbda_frame: RGBDAFrame, fl: str): 27 | assert np.isfinite(rgbda_frame.depth).all() 28 | store_mask(rgbda_frame.mask[0], fl + "_mask.png") 29 | store_depth(rgbda_frame.depth[0], fl + "_depth.png") 30 | store_image(rgbda_frame.image, fl + "_image.png") 31 | if rgbda_frame.depth_mask is not None: 32 | store_1bit_png_mask(rgbda_frame.depth_mask[0], fl + "depth_mask.png") 33 | 34 | 35 | def link_rgbda_frame_files(fl_existing: str, fl_src_link: str): 36 | for pfix in ["_mask.png", "_depth.png", "_image.png", "_depth_mask.png"]: 37 | link_tgt = fl_existing+pfix 38 | link_src = fl_src_link+pfix 39 | if os.path.islink(link_src): 40 | os.remove(link_src) 41 | elif os.path.isfile(link_src): 42 | raise ValueError(f"Link source {link_src} is an actual file (not a link).") 43 | if not os.path.isfile(link_tgt): 44 | if pfix=="_depth_mask.png": 45 | pass 46 | else: 47 | raise ValueError(f"Target file {link_tgt} does not exist!") 48 | else: 49 | if os.path.islink(link_src): 50 | os.remove(link_src) 51 | os.symlink(link_tgt, link_src) 52 | 53 | 54 | def load_rgbda_frame(fl: str, check_for_depth_mask: bool = False) -> RGBDAFrame: 55 | f = RGBDAFrame( 56 | mask=load_mask(fl + "_mask.png")[None], 57 | depth=load_depth(fl + "_depth.png")[None], 58 | image=load_image(fl + "_image.png"), 59 | ) 60 | if not np.isfinite(f.depth).all(): 61 | f.depth[~np.isfinite(f.depth)] = 0.0 # chuck the infs in depth 62 | if check_for_depth_mask: 63 | depth_mask_path = fl + "_depth_mask.png" 64 | if os.path.isfile(depth_mask_path): 65 | f.depth_mask = load_1bit_png_mask(depth_mask_path)[None] 66 | return f 67 | 68 | 69 | def store_1bit_png_mask(mask: np.ndarray, fl: str): 70 | """ 71 | mask: HxW 72 | """ 73 | Image.fromarray((mask*255).astype('u1'), mode='L').convert('1').save(fl, "PNG") 74 | 75 | 76 | def load_1bit_png_mask(file: str) -> np.ndarray: 77 | with Image.open(_handle_db_file(file)) as pil_im: 78 | mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32) 79 | return mask 80 | 81 | 82 | def load_mask(fl: str): 83 | return np.array(Image.open(_handle_db_file(fl))).astype(np.float32) / 255.0 84 | 85 | 86 | def store_mask(mask: np.ndarray, fl: str, mode: str = "L"): 87 | """ 88 | mask: HxW 89 | """ 90 | assert mask.ndim == 2 91 | if mode == "L": 92 | mpil = Image.fromarray((mask * 255.0).astype(np.uint8), mode="L").convert("L") 93 | elif mode == "I;16": 94 | mpil = Image.fromarray((mask * 255.0).astype(np.uint8), mode="I;16").convert( 95 | "I;16" 96 | ) 97 | else: 98 | raise ValueError(mode) 99 | mpil.save(fl, "PNG") 100 | 101 | 102 | def load_depth(fl: str): 103 | depth_pil = Image.open(_handle_db_file(fl)) 104 | depth = ( 105 | np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16) 106 | .astype(np.float32) 107 | .reshape((depth_pil.size[1], depth_pil.size[0])) 108 | ) 109 | assert depth.ndim == 2 110 | return depth 111 | 112 | 113 | def store_depth(depth: np.ndarray, fl: str): 114 | assert depth.ndim == 2 115 | depth_uint16 = np.frombuffer(depth.astype(np.float16), dtype=np.uint16).reshape( 116 | depth.shape 117 | ) 118 | Image.fromarray(depth_uint16).save(fl) 119 | 120 | 121 | def load_image(fl: str): 122 | return np.array(Image.open(_handle_db_file(fl))).astype(np.float32).transpose(2, 0, 1) / 255.0 123 | 124 | 125 | def store_image(image: np.ndarray, fl: str): 126 | assert image.ndim == 3 127 | Image.fromarray((image.transpose(1, 2, 0) * 255.0).astype(np.uint8)).save(fl) 128 | 129 | 130 | def _handle_db_file(fl_or_db_link: str): 131 | """ 132 | In case `fl_or_db_link` is a symlink pointing at an .hdf5 or .dbm database file, 133 | this function returns a BytesIO object yielding the underlying file's binary data. 134 | 135 | Otherwise, the function simply returns `fl_or_db_link`. 136 | """ 137 | 138 | fl_or_bytes_io = fl_or_db_link 139 | for db_format, data_load_fun in ( 140 | (".hdf5", _get_image_data_from_h5), 141 | (".dbm", _get_image_data_from_dbm), 142 | ): 143 | fl_or_bytes_io = _maybe_get_db_image_data_bytes_io_from_file( 144 | fl_or_db_link, 145 | db_format, 146 | data_load_fun, 147 | ) 148 | if not isinstance(fl_or_bytes_io, str): 149 | # logger.info(f"{fl} is {db_format}!") 150 | break 151 | return fl_or_bytes_io 152 | 153 | 154 | def _maybe_get_db_image_data_bytes_io_from_file( 155 | fl_or_db_link: str, 156 | db_format: str, 157 | data_load_fun: Callable, 158 | ) -> Union[str, BytesIO]: 159 | """ 160 | In case `fl_or_db_link` is a symlink pointing at a database file `db_path` with 161 | of type `db_format`, this function calls `data_load_fun(fl_or_db_link, db_path)` 162 | to retrieve a BytesIO object yielding the `fl`s binary data. 163 | 164 | Otherwise, the function simply returns `fl_or_db_link`. 165 | """ 166 | if os.path.islink(fl_or_db_link): 167 | realpath = os.readlink(fl_or_db_link) 168 | if not realpath.endswith(db_format): 169 | return fl_or_db_link 170 | db_path = fl_or_db_link 171 | else: 172 | return fl_or_db_link 173 | return data_load_fun(realpath, db_path) 174 | 175 | 176 | @functools.lru_cache(maxsize=1) 177 | def _cached_dbm_open_for_read(dbmpath: str): 178 | db = dbm.open(dbmpath, "r") 179 | return db 180 | 181 | 182 | def _get_image_data_from_dbm(dbmpath: str, fl: str): 183 | flname = os.path.split(fl)[-1] 184 | db = _cached_dbm_open_for_read(dbmpath) 185 | # with dbm.open(dbmpath, "r") as db: 186 | bin_data = db[flname] 187 | return BytesIO(bin_data) 188 | 189 | 190 | def _get_image_data_from_h5(h5path: str, fl: str): 191 | with h5py.File(h5path, "r") as f: 192 | flname = os.path.split(fl)[-1] 193 | file_index = f["binary_data"].attrs 194 | if flname not in file_index: 195 | raise IndexError(f"{flname} not in {h5path}!") 196 | idx = file_index[flname] 197 | bin_data = f["binary_data"][idx] 198 | return BytesIO(bin_data) 199 | 200 | 201 | def get_category_to_subset_name_list( 202 | dataset_root: str, 203 | task: Optional[CO3DTask] = None, 204 | sequence_set: Optional[CO3DSequenceSet] = None, 205 | ): 206 | """ 207 | Get the mapping from categories to existing subset names. 208 | 209 | Args: 210 | dataset_root: The dataset root folder. 211 | task: CO3D Challenge task. 212 | sequence_set: CO3D Challenge sequence_set. 213 | 214 | Returns: 215 | category_to_subset_name_list: A dict of the following form: 216 | { 217 | category: [subset_name_0, subset_name_1, ...], 218 | ... 219 | } 220 | """ 221 | 222 | json_file = os.path.join(dataset_root, "category_to_subset_name_list.json") 223 | with open(json_file, "r") as f: 224 | category_to_subset_name_list = json.load(f) 225 | 226 | # filter per-category subset lists by the selected task 227 | if task is not None: 228 | category_to_subset_name_list = { 229 | category: [ 230 | subset_name 231 | for subset_name in subset_name_list 232 | if subset_name.startswith(task.value) 233 | ] 234 | for category, subset_name_list in category_to_subset_name_list.items() 235 | } 236 | 237 | # filter per-category subset lists by the selected sequence set 238 | if sequence_set is not None: 239 | category_to_subset_name_list = { 240 | category: [ 241 | subset_name 242 | for subset_name in subset_name_list 243 | if f"_{sequence_set.value}" in subset_name 244 | ] 245 | for category, subset_name_list in category_to_subset_name_list.items() 246 | } 247 | 248 | # remove the categories with completely empty subset_name_lists 249 | category_to_subset_name_list = { 250 | c: l for c, l in category_to_subset_name_list.items() if len(l) > 0 251 | } 252 | 253 | # sort by category 254 | category_to_subset_name_list = dict(sorted(category_to_subset_name_list.items())) 255 | 256 | return category_to_subset_name_list 257 | 258 | 259 | def load_all_eval_batches( 260 | dataset_root: str, 261 | task: Optional[CO3DTask] = None, 262 | sequence_set: Optional[CO3DSequenceSet] = None, 263 | remove_frame_paths: bool = False, 264 | only_target_frame: bool = True, 265 | ): 266 | """ 267 | Load eval batches files stored in dataset_root into a dictionary: 268 | { 269 | (category, subset_name): eval_batches_index, 270 | ... 271 | } 272 | 273 | Args: 274 | dataset_root: The root of the CO3DV2 dataset. 275 | task: CO3D challenge task. 276 | sequence_set: CO3D challenge sequence set. 277 | remove_frame_paths: If `True`, removes the paths to frames from the loaded 278 | dataset index. 279 | only_target_frame: Loads only the first (evaluation) frame from each eval batch. 280 | 281 | Returns: 282 | eval_batches_dict: Output dictionary. 283 | """ 284 | 285 | category_to_subset_name_list = get_category_to_subset_name_list( 286 | dataset_root, 287 | task=task, 288 | sequence_set=sequence_set, 289 | ) 290 | 291 | eval_batches_dict = {} 292 | for category, subset_name_list in category_to_subset_name_list.items(): 293 | for subset_name in subset_name_list: 294 | # load the subset eval batches 295 | eval_batches_dict[(category, subset_name)] = _load_eval_batches_file( 296 | dataset_root, 297 | category, 298 | subset_name, 299 | remove_frame_paths=remove_frame_paths, 300 | only_target_frame=only_target_frame, 301 | ) 302 | return eval_batches_dict 303 | 304 | 305 | def _load_eval_batches_file( 306 | dataset_root: str, 307 | category: str, 308 | subset_name: str, 309 | remove_frame_paths: bool = True, 310 | only_target_frame: bool = True, 311 | ): 312 | eval_batches_fl = os.path.join( 313 | dataset_root, 314 | category, 315 | "eval_batches", 316 | f"eval_batches_{subset_name}.json", 317 | ) 318 | with open(eval_batches_fl, "r") as f: 319 | eval_batches = json.load(f) 320 | 321 | if only_target_frame: 322 | eval_batches = [ 323 | b[0] for b in eval_batches 324 | ] # take only the first (target evaluation) frame 325 | 326 | if remove_frame_paths: 327 | eval_batches = [b[:2] for b in eval_batches] 328 | return eval_batches 329 | 330 | 331 | def export_result_file_dict_to_hdf5(h5path: str, filedict: Dict[str, str]): 332 | """ 333 | Export the result files to an hdf5 file that will be sent to the EvalAI server: 334 | 335 | Args: 336 | h5path: Target hdf5 file path. 337 | filedict: Dict in form {relative_file_path: absolute_file_path} 338 | """ 339 | logger.info(f"Exporting {len(filedict)} files to HDF5 file {h5path}.") 340 | if len(filedict)==0: 341 | raise ValueError("No data to export!") 342 | assert h5path.endswith(".hdf5") 343 | if os.path.isfile(h5path): 344 | os.remove(h5path) 345 | os.makedirs(os.path.dirname(h5path), exist_ok=True) 346 | with h5py.File(h5path, "w", libver='latest') as fh5: 347 | dt = h5py.special_dtype(vlen=np.dtype('uint8')) 348 | max_path_len = max(len(p) for p in filedict.keys()) 349 | dset = fh5.create_dataset( 350 | 'binary_data', (len(filedict), ), dtype=dt, compression="gzip" 351 | ) 352 | filepath_dset = fh5.create_dataset( 353 | 'filepaths', 354 | (len(filedict), ), 355 | dtype=h5py.string_dtype('utf-8', max_path_len), 356 | # dtype=np.dtype(f'U{max_path_len}'), 357 | compression="gzip" 358 | ) 359 | index = {} 360 | for idx, (rel_path, store_file) in enumerate(tqdm(filedict.items(), total=len(filedict))): 361 | _store_binary_file_data_to_hd5_dataset(dset, store_file, idx) 362 | flname = os.path.split(rel_path)[-1] 363 | assert flname not in index, "Duplicate filenames!" 364 | index[flname] = idx 365 | filepath_dset[idx] = rel_path 366 | logger.info(f"Updating index of {h5path}.") 367 | dset.attrs.update(index) 368 | 369 | 370 | def make_hdf5_file_links(h5path: str, root: str): 371 | """ 372 | Link all files whose binary data are stored in an HDF5 file `h5path` to 373 | files under the root folder. 374 | 375 | Args: 376 | h5path: HDF5 file. 377 | root: The root folder for exporting symlinks. 378 | """ 379 | logger.info(f"Making file links in {root} to DB data in {h5path}.") 380 | assert h5path.endswith(".hdf5") 381 | with h5py.File(h5path, "r") as fh5: 382 | filepaths = [f.decode("UTF-8") for f in np.array(fh5["filepaths"])] 383 | file_name_to_tgt_file = { 384 | os.path.split(p)[-1]: os.path.join(root, p) for p in filepaths 385 | } 386 | dset = fh5["binary_data"] 387 | index = dset.attrs 388 | all_dirs = set(os.path.dirname(p) for p in file_name_to_tgt_file.values()) 389 | for dir_ in all_dirs: 390 | os.makedirs(dir_, exist_ok=True) 391 | for flname, _ in tqdm(index.items(), total=len(index)): 392 | tgt_file = file_name_to_tgt_file[flname] 393 | link_file_to_db_file(h5path, tgt_file) 394 | 395 | 396 | def link_file_to_db_file(db_file: str, file: str, overwrite: bool = True): 397 | """ 398 | Make a symlink file->db_file 399 | """ 400 | if db_file.endswith(".hdf5"): 401 | token = "__HDF5__:" 402 | elif db_file.endswith(".dbm"): 403 | token = "__DBM__:" 404 | else: 405 | raise ValueError(db_file) 406 | if overwrite and (os.path.isfile(file) or os.path.islink(file)): 407 | os.remove(file) 408 | os.symlink(db_file, file) 409 | 410 | # symlinks are cleaner ... do not use this anymore: 411 | # with open(file, "w") as f: 412 | # f.write(token+os.path.normpath(os.path.abspath(db_file))) 413 | 414 | 415 | def _store_binary_file_data_to_hd5_dataset(dset, fl: str, idx: int): 416 | with open(fl, "rb") as fin: 417 | binary_data = fin.read() 418 | dset[idx] = np.fromstring(binary_data, dtype='uint8') -------------------------------------------------------------------------------- /co3d/challenge/metric_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | import numpy as np 9 | import logging 10 | import time 11 | from typing import Optional 12 | from typing import Tuple 13 | from .data_types import RGBDAFrame 14 | 15 | 16 | EVAL_METRIC_NAMES = ["psnr_masked", "psnr_fg", "psnr_full_image", "depth_abs_fg", "iou"] 17 | EVAL_METRIC_MISSING_VALUE = { 18 | "psnr_masked": 0.0, 19 | "psnr_fg": 0.0, 20 | "psnr_full_image": 0.0, 21 | "depth_abs_fg": 100000.0, 22 | "iou": 0.0, 23 | } 24 | 25 | 26 | logger = logging.getLogger(__file__) 27 | 28 | 29 | def eval_one( 30 | pred: RGBDAFrame, 31 | target: RGBDAFrame, 32 | ): 33 | return eval_one_rgbda( 34 | pred.image, 35 | pred.depth, 36 | pred.mask, 37 | target.image, 38 | target.depth, 39 | target.mask, 40 | gt_depth_mask=target.depth_mask, 41 | ) 42 | 43 | 44 | def eval_one_rgbda( 45 | image_rgb: np.ndarray, 46 | depth_map: np.ndarray, 47 | fg_mask: np.ndarray, 48 | gt_image_rgb: np.ndarray, 49 | gt_depth_map: np.ndarray, 50 | gt_fg_mask: np.ndarray, 51 | gt_depth_mask: Optional[np.ndarray] = None, 52 | crop_around_fg_mask: bool = False, 53 | gt_fg_mask_threshold: Optional[float] = 0.5, 54 | ): 55 | """ 56 | Args: 57 | image_rgb: 3xHxW, black background 58 | depth_map: 1xHxW 59 | fg_mask: 1xHxW in {0, 1} 60 | gt_image_rgb: 3xHxW, black background 61 | gt_depth_map: 1xHxW 62 | gt_fg_mask: 1xHxW in {0, 1} 63 | gt_depth_mask: 1xHxW in {0, 1} 64 | 65 | Returns: 66 | eval_result: a dictionary {metric_name: str: metric_value: float} 67 | """ 68 | 69 | # with Timer("start"): 70 | for xn, x in zip( 71 | ("image_rgb", "fg_mask", "depth_map"), 72 | (image_rgb, fg_mask, depth_map), 73 | ): 74 | if not np.isfinite(x).all(): 75 | raise ValueError(f"Non-finite element in {xn}") 76 | 77 | if gt_fg_mask_threshold is not None: 78 | # threshold the gt mask if note done before 79 | gt_fg_mask = (gt_fg_mask > gt_fg_mask_threshold).astype(np.float32) 80 | 81 | # chuck non-finite depth 82 | gt_depth_map[~np.isfinite(gt_depth_map)] = 0 83 | 84 | if gt_depth_mask is not None: 85 | gt_depth_map = gt_depth_map * gt_depth_mask 86 | 87 | if crop_around_fg_mask: 88 | raise NotImplementedError("") 89 | fg_mask_box_xxyy = _get_bbox_from_mask(gt_fg_mask[0]) 90 | [ 91 | image_rgb, 92 | depth_map, 93 | fg_mask, 94 | gt_image_rgb, 95 | gt_depth_map, 96 | gt_fg_mask, 97 | gt_depth_mask, 98 | ] = [ 99 | x[ 100 | :, 101 | fg_mask_box_xxyy[2]:fg_mask_box_xxyy[3], 102 | fg_mask_box_xxyy[0]:fg_mask_box_xxyy[1], 103 | ] for x in [ 104 | image_rgb, 105 | depth_map, 106 | fg_mask, 107 | gt_image_rgb, 108 | gt_depth_map, 109 | gt_fg_mask, 110 | gt_depth_mask, 111 | ] 112 | ] 113 | 114 | gt_image_rgb_masked = gt_image_rgb * gt_fg_mask 115 | 116 | # with Timer("psnrs"): 117 | psnr_masked = calc_psnr(image_rgb, gt_image_rgb_masked) 118 | 119 | psnr_full_image = calc_psnr(image_rgb, gt_image_rgb) 120 | psnr_fg = calc_psnr(image_rgb, gt_image_rgb_masked, mask=gt_fg_mask) 121 | 122 | # with Timer("depth"): 123 | mse_depth, abs_depth, aux_depth = calc_mse_abs_depth( 124 | depth_map, 125 | gt_depth_map, 126 | gt_fg_mask, 127 | crop=5, 128 | ) 129 | 130 | # with Timer("iou"): 131 | iou = calc_iou(fg_mask, gt_fg_mask) 132 | 133 | return { 134 | "psnr_masked": psnr_masked, 135 | "psnr_fg": psnr_fg, 136 | "psnr_full_image": psnr_full_image, 137 | "depth_abs_fg": abs_depth, 138 | "iou": iou, 139 | } 140 | 141 | 142 | def calc_psnr( 143 | x: np.ndarray, 144 | y: np.ndarray, 145 | mask: Optional[np.ndarray] = None, 146 | ) -> np.float32: 147 | """ 148 | Calculates the Peak-signal-to-noise ratio between tensors `x` and `y`. 149 | """ 150 | mse = calc_mse(x, y, mask=mask) 151 | psnr = np.log10(np.clip(mse, 1e-10, None)) * (-10.0) 152 | return psnr 153 | 154 | 155 | def calc_mse( 156 | x: np.ndarray, 157 | y: np.ndarray, 158 | mask: Optional[np.ndarray] = None, 159 | ) -> np.float32: 160 | """ 161 | Calculates the mean square error between tensors `x` and `y`. 162 | """ 163 | if mask is None: 164 | return np.mean((x - y) ** 2) 165 | else: 166 | mask_expand = np.broadcast_to(mask, x.shape) 167 | return (((x - y) ** 2) * mask_expand).sum() / np.clip( 168 | mask_expand.sum(), 1e-5, None 169 | ) 170 | 171 | 172 | def rgb_l1( 173 | pred: np.ndarray, target: np.ndarray, mask: Optional[np.ndarray] = None 174 | ) -> np.float32: 175 | """ 176 | Calculates the mean absolute error between the predicted colors `pred` 177 | and ground truth colors `target`. 178 | """ 179 | if mask is None: 180 | mask = np.ones_like(pred[:1]) 181 | return (np.abs(pred - target) * mask).sum() / np.clip(mask.sum(), 1, None) 182 | 183 | 184 | def calc_mse_abs_depth( 185 | pred: np.ndarray, 186 | target: np.ndarray, 187 | mask: np.ndarray, 188 | crop: int, 189 | get_best_scale: bool = True, 190 | best_scale_clamp_thr: float = 1e-4, 191 | ) -> np.float32: 192 | 193 | # crop 194 | if crop > 0: 195 | target = target[:, crop:-crop, crop:-crop] 196 | pred = pred[:, crop:-crop, crop:-crop] 197 | mask = mask[:, crop:-crop, crop:-crop] 198 | 199 | target = target * mask 200 | dmask = (target > 0.0).astype(np.float32) 201 | dmask_mass = np.clip(dmask.sum(), 1e-4, None) 202 | 203 | scale_l1 = scale_l2 = None 204 | for l_norm in ["l1", "l2"]: 205 | if get_best_scale: 206 | # mult preds by a scalar "scale_best" 207 | # s.t. we get best possible mse error 208 | _optimal_scale = { 209 | "l1": _optimal_l1_scale, 210 | "l2": _optimal_l2_scale, 211 | }[l_norm] 212 | scale_best = _optimal_scale( 213 | pred * dmask, target * dmask, best_scale_clamp_thr 214 | ) 215 | pred_scaled = pred * scale_best 216 | if l_norm=="l1": 217 | scale_l1 = scale_best 218 | elif l_norm=="l2": 219 | scale_l2 = scale_best 220 | else: 221 | raise ValueError(l_norm) 222 | else: 223 | pred_scaled = pred 224 | 225 | df = target - pred_scaled 226 | 227 | if l_norm=="l1": 228 | abs_depth = (dmask * np.abs(df)).sum() / dmask_mass 229 | elif l_norm=="l2": 230 | mse_depth = (dmask * (df ** 2)).sum() / dmask_mass 231 | else: 232 | raise ValueError(l_norm) 233 | 234 | return mse_depth, abs_depth, {"scale_l1": scale_l1, "scale_l2": scale_l2} 235 | 236 | 237 | def _optimal_l2_scale(pred, gt, clamp_thr): 238 | """ 239 | Return the scale s that minimizes ||gt - s pred||^2. 240 | The inverse scale is clamped to [eps, Inf] 241 | """ 242 | xy = pred * gt 243 | xx = pred * pred 244 | scale_best = xy.mean() / np.clip(xx.mean(), clamp_thr, None) 245 | return scale_best 246 | 247 | 248 | def _optimal_l1_scale(pred, gt, clamp_thr): 249 | """ 250 | Return the scale s that minimizes |gt - s pred|_1. 251 | The scale is clamped in [-max_scale, max_scale]. 252 | This function operates along the specified axis. 253 | """ 254 | max_scale = 1 / clamp_thr 255 | x, y = pred.reshape(-1), gt.reshape(-1) 256 | pivots = y / np.clip(x, 1e-10, None) 257 | perm = np.argsort(pivots) 258 | pivots = pivots[perm] 259 | x_sorted = x[perm] 260 | score = -np.abs(x).sum() + 2 * np.cumsum(np.abs(x_sorted)) 261 | # find the index of first positive score 262 | i = (score <= 0).astype(np.float32).sum().astype(np.int64) 263 | # i = torch.unsqueeze(i, dim) 264 | if i >= len(pivots.reshape(-1)): 265 | # logger.warning("Scale outside of bounds!") 266 | return 1.0 267 | else: 268 | scale = pivots[i] 269 | scale = np.clip(scale, -max_scale, max_scale) 270 | # scale = torch.take_along_dim(pivots, i, dim=dim) 271 | # scale = torch.clip(scale, min=-max_scale, max=max_scale) 272 | # outshape = [s for si, s in enumerate(y.shape) if si != dim] 273 | # scale = scale.view(outshape) 274 | return float(scale) 275 | 276 | 277 | 278 | def calc_iou( 279 | predict: np.ndarray, 280 | target: np.ndarray, 281 | mask: Optional[np.ndarray] = None, 282 | threshold: Optional[float] = 0.5, 283 | ) -> np.float32: 284 | """ 285 | This is a great loss because it emphasizes on the active 286 | regions of the predict and targets 287 | """ 288 | if threshold is not None: 289 | predict = (predict >= threshold).astype(np.float32) 290 | target = (target >= threshold).astype(np.float32) 291 | if mask is not None: 292 | predict = predict * mask 293 | target = target * mask 294 | intersect = (predict * target).sum() 295 | union = (predict + target - predict * target).sum() + 1e-4 296 | return intersect / union 297 | 298 | 299 | def _get_bbox_from_mask( 300 | mask: np.ndarray, 301 | box_crop_context: float = 0.1, 302 | thr: float = 0.5, 303 | decrease_quant: float = 0.05, 304 | ) -> Tuple[int, int, int, int]: 305 | # bbox in xywh 306 | masks_for_box = np.zeros_like(mask) 307 | while masks_for_box.sum() <= 1.0: 308 | masks_for_box = (mask > thr).astype(np.float32) 309 | thr -= decrease_quant 310 | assert thr > 0.0 311 | x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) 312 | y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) 313 | h, w = y1 - y0 + 1, x1 - x0 + 1 314 | if box_crop_context > 0.0: 315 | c = box_crop_context 316 | x0 -= w * c / 2 317 | y0 -= h * c / 2 318 | h += h * c 319 | w += w * c 320 | x1 = x0 + w 321 | y1 = y0 + h 322 | x0, x1 = [np.clip(x_, 0, mask.shape[1]) for x_ in [x0, x1]] 323 | y0, y1 = [np.clip(y_, 0, mask.shape[0]) for y_ in [y0, y1]] 324 | return np.round(np.array(x0, x1, y0, y1)).astype(int).tolist() 325 | 326 | 327 | def _get_1d_bounds(arr: np.ndarray) -> Tuple[int, int]: 328 | nz = np.flatnonzero(arr) 329 | return nz[0], nz[-1] 330 | 331 | 332 | class Timer: 333 | def __init__(self, name=None): 334 | self.name = name if name is not None else "timer" 335 | 336 | def __enter__(self): 337 | self.start = time.time() 338 | 339 | def __exit__(self, exc_type, exc_value, traceback): 340 | logger.info(f"{self.name} - {time.time() - self.start:.3e} sec") -------------------------------------------------------------------------------- /co3d/challenge/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | import zipfile 9 | import glob 10 | import logging 11 | import multiprocessing 12 | import numpy as np 13 | import time 14 | 15 | from tqdm import tqdm 16 | from collections import defaultdict 17 | from typing import List, Dict, Tuple 18 | from .data_types import CO3DSequenceSet, CO3DTask, RGBDAFrame 19 | from .metric_utils import eval_one, EVAL_METRIC_NAMES, Timer 20 | from .io import load_rgbda_frame 21 | 22 | 23 | logger = logging.getLogger(__file__) 24 | 25 | 26 | def get_co3d_task_from_subset_name(subset_name: str) -> CO3DTask: 27 | if subset_name.startswith("manyview"): 28 | return CO3DTask.MANY_VIEW 29 | elif subset_name.startswith("fewview"): 30 | return CO3DTask.FEW_VIEW 31 | else: 32 | raise ValueError(f"Invalid subset name {subset_name}!") 33 | 34 | 35 | def get_co3d_sequence_set_from_subset_name(subset_name: str) -> CO3DSequenceSet: 36 | return CO3DSequenceSet(subset_name.split("_")[1]) 37 | 38 | 39 | def unzip(file_path: str, output_dir: str): 40 | with zipfile.ZipFile(file_path, "r") as zip_ref: 41 | zip_ref.extractall(output_dir) 42 | 43 | 44 | def check_user_submission_file_paths( 45 | ground_truth_files: Dict[str, str], 46 | user_submission_files: Dict[str, str], 47 | ): 48 | missing_gt_examples = [ 49 | gt_example_name 50 | for gt_example_name in ground_truth_files 51 | if gt_example_name not in user_submission_files 52 | ] 53 | if len(missing_gt_examples) > 0: 54 | raise ValueError( 55 | f"There are missing evaluation examples: {str(missing_gt_examples)}" 56 | ) 57 | 58 | additional_user_examples = [ 59 | user_example 60 | for user_example in user_submission_files 61 | if user_example not in ground_truth_files 62 | ] 63 | if len(additional_user_examples) > 0: 64 | raise ValueError( 65 | f"Unexpected submitted evaluation examples {str(additional_user_examples)}" 66 | ) 67 | 68 | 69 | def get_data_type_postfix(data_type: str): 70 | assert data_type in ["image", "mask", "depth", "depth_mask"] 71 | return f"_{data_type}.png" 72 | 73 | 74 | def get_result_directory_file_names( 75 | result_dir: str, has_depth_masks: bool = False, 76 | ) -> Dict[str, str]: 77 | """ 78 | Result directory structure: 79 | -image.png 80 | -mask.png 81 | -depth.png 82 | ... 83 | 84 | Returns: 85 | result_files: dict {test_example_name_i: root_path_i} 86 | """ 87 | 88 | result_type_files = {} 89 | for result_type in ("image", "mask", "depth"): 90 | postfix = get_data_type_postfix(result_type) 91 | matching_files = sorted(glob.glob(os.path.join(result_dir, f"*{postfix}"))) 92 | if has_depth_masks and result_type=="mask": 93 | matching_files = [ 94 | f for f in matching_files 95 | if not f.endswith(get_data_type_postfix("depth_mask")) 96 | ] 97 | result_type_files[result_type] = { 98 | os.path.split(f)[-1][: -len(postfix)]: f for f in matching_files 99 | } 100 | 101 | example_names = sorted( 102 | list( 103 | set( 104 | [ 105 | n 106 | for t in ("image", "mask", "depth") 107 | for n in result_type_files[t].keys() 108 | ] 109 | ) 110 | ) 111 | ) 112 | 113 | missing_examples = defaultdict(list) 114 | for example_name in example_names: 115 | for result_type in ("image", "mask", "depth"): 116 | if example_name not in result_type_files[result_type]: 117 | missing_examples[example_name].append(result_type) 118 | 119 | if len(missing_examples) > 0: 120 | msg = "\n".join( 121 | [f" {k} missing {str(v)}" for k, v in missing_examples.items()] 122 | ) 123 | raise ValueError( 124 | f"Some evaluation examples in {result_dir} are incomplete:\n" 125 | + msg 126 | ) 127 | 128 | result_files = { 129 | example_name: result_type_files["image"][example_name][: -len("_image.png")] 130 | for example_name in example_names 131 | } 132 | 133 | return result_files 134 | 135 | def _evaluate_pred_gt_pair(args: Tuple[str, str, str, float, bool]): 136 | gt_example, gt_file, pred_file, max_time, print_status = args 137 | cur_time = time.time() 138 | if cur_time > max_time: 139 | raise ValueError( 140 | " @@@@@@@@@@@@@@@@@@@@@\n" 141 | " Evaluation timed out!\n" 142 | " @@@@@@@@@@@@@@@@@@@@@" 143 | ) 144 | # with Timer("io"): 145 | gt_rgbda = load_rgbda_frame(gt_file, check_for_depth_mask=True) 146 | pred_rgbda = load_rgbda_frame(pred_file) 147 | # with Timer("check"): 148 | check_same_rgbda_sizes(gt_rgbda, pred_rgbda, gt_example) 149 | # with Timer("eval"): 150 | eval_result_one = eval_one(pred_rgbda, gt_rgbda) 151 | for k, v in eval_result_one.items(): 152 | if not np.isfinite(v): 153 | raise ValueError(f"{gt_example} - {k} is does not have a finite value.") 154 | if print_status: 155 | msg = "; ".join([f"{k}={v:.3f}" for k, v in eval_result_one.items()]) 156 | sz = str(list(gt_rgbda.image.shape[-2:])).replace(" ", "") 157 | logger.info( 158 | f"eval_one({gt_example}-[{sz}]): {msg}; {max_time-cur_time:.1f} sec left" 159 | ) 160 | return eval_result_one 161 | 162 | 163 | def evaluate_file_folders( 164 | pred_folder: str, 165 | gt_folder: str, 166 | num_workers: int = 0, 167 | remaining_time: float = float("Inf"), 168 | print_per_example_results: bool = True, 169 | ): 170 | # determine how much time do we have for the evaluation 171 | max_time = time.time() + remaining_time 172 | 173 | user_submission_files = get_result_directory_file_names(pred_folder) 174 | ground_truth_files = get_result_directory_file_names(gt_folder, has_depth_masks=True) 175 | 176 | logger.info(f"Evaluating folders: prediction={pred_folder}; gt={gt_folder}") 177 | check_user_submission_file_paths( 178 | ground_truth_files, 179 | user_submission_files, 180 | ) 181 | 182 | # At this point we are sure that ground_truth_files contain the same 183 | # examples as user_submission_files. 184 | 185 | if num_workers <= 0: 186 | # Iterate over the gt examples: 187 | per_example_results = [ 188 | _evaluate_pred_gt_pair( 189 | ( 190 | gt_example, 191 | ground_truth_files[gt_example], 192 | user_submission_files[gt_example], 193 | max_time, 194 | print_per_example_results, 195 | ) 196 | ) for gt_example in tqdm(list(ground_truth_files)) 197 | ] 198 | # gt_rgbda = load_rgbda_frame(ground_truth_files[gt_example], check_for_depth_mask=True) 199 | # pred_rgbda = load_rgbda_frame(user_submission_files[gt_example]) 200 | # check_same_rgbda_sizes(gt_rgbda, pred_rgbda, gt_example) 201 | # per_example_results.append(eval_one(pred_rgbda, gt_rgbda)) 202 | else: 203 | # parallel processing 204 | arg_list = [ 205 | ( 206 | gt_example, 207 | ground_truth_files[gt_example], 208 | user_submission_files[gt_example], 209 | max_time, 210 | print_per_example_results, 211 | ) for gt_example in list(ground_truth_files) 212 | ] 213 | pool = multiprocessing.Pool(num_workers) 214 | per_example_results = [ 215 | result for result in tqdm( 216 | pool.imap(_evaluate_pred_gt_pair, arg_list), 217 | total=len(arg_list), 218 | ) 219 | ] 220 | pool.terminate() 221 | 222 | result = { 223 | metric: (sum(r[metric] for r in per_example_results) / len(per_example_results)) 224 | for metric in EVAL_METRIC_NAMES 225 | } 226 | 227 | return result, per_example_results 228 | 229 | 230 | def check_same_rgbda_sizes(gt: RGBDAFrame, pred: RGBDAFrame, example_name: str): 231 | for data_type in ("image", "mask", "depth"): 232 | gt_size, pred_size = [getattr(x, data_type).shape for x in [gt, pred]] 233 | if gt_size != pred_size: 234 | raise ValueError( 235 | f"{example_name}'s size does not match the ground truth." 236 | f"{data_type} size: {str(gt_size)} != {str(pred_size)}" 237 | " (ground-truth vs. prediction)." 238 | ) 239 | return True 240 | 241 | 242 | def get_annotations_folder(phase_codename: str): 243 | assert phase_codename in {"dev", "test"} 244 | return os.path.join("annotations", phase_codename) -------------------------------------------------------------------------------- /co3d/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /co3d/dataset/check_checksum.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import os 9 | import glob 10 | import argparse 11 | import hashlib 12 | import json 13 | 14 | from typing import Optional 15 | from multiprocessing import Pool 16 | from tqdm import tqdm 17 | 18 | 19 | DEFAULT_SHA256S_FILE = os.path.join(__file__.rsplit(os.sep, 2)[0], "co3d_sha256.json") 20 | BLOCKSIZE = 65536 21 | 22 | 23 | def main( 24 | download_folder: str, 25 | sha256s_file: str, 26 | dump: bool = False, 27 | n_sha256_workers: int = 4, 28 | single_sequence_subset: bool = False, 29 | ): 30 | if not os.path.isfile(sha256s_file): 31 | raise ValueError(f"The SHA256 file does not exist ({sha256s_file}).") 32 | 33 | expected_sha256s = get_expected_sha256s( 34 | sha256s_file=sha256s_file, 35 | single_sequence_subset=single_sequence_subset, 36 | ) 37 | 38 | zipfiles = sorted(glob.glob(os.path.join(download_folder, "*.zip"))) 39 | print(f"Extracting SHA256 hashes for {len(zipfiles)} files in {download_folder}.") 40 | extracted_sha256s_list = [] 41 | with Pool(processes=n_sha256_workers) as sha_pool: 42 | for extracted_hash in tqdm( 43 | sha_pool.imap(_sha256_file_and_print, zipfiles), 44 | total=len(zipfiles), 45 | ): 46 | extracted_sha256s_list.append(extracted_hash) 47 | pass 48 | 49 | extracted_sha256s = dict( 50 | zip([os.path.split(z)[-1] for z in zipfiles], extracted_sha256s_list) 51 | ) 52 | 53 | if dump: 54 | print(extracted_sha256s) 55 | with open(sha256s_file, "w") as f: 56 | json.dump(extracted_sha256s, f, indent=2) 57 | 58 | 59 | missing_keys, invalid_keys = [], [] 60 | for k in expected_sha256s.keys(): 61 | if k not in extracted_sha256s: 62 | print(f"{k} missing!") 63 | missing_keys.append(k) 64 | elif expected_sha256s[k] != extracted_sha256s[k]: 65 | print( 66 | f"'{k}' does not match!" 67 | + f" ({expected_sha256s[k]} != {extracted_sha256s[k]})" 68 | ) 69 | invalid_keys.append(k) 70 | if len(invalid_keys) + len(missing_keys) > 0: 71 | raise ValueError( 72 | f"Checksum checker failed!" 73 | + f" Non-matching checksums: {str(invalid_keys)};" 74 | + f" missing files: {str(missing_keys)}." 75 | ) 76 | 77 | 78 | def get_expected_sha256s( 79 | sha256s_file: str, 80 | single_sequence_subset: bool = False, 81 | ): 82 | with open(sha256s_file, "r") as f: 83 | expected_sha256s = json.load(f) 84 | if single_sequence_subset: 85 | return expected_sha256s["singlesequence"] 86 | else: 87 | return expected_sha256s["full"] 88 | 89 | 90 | def check_co3d_sha256( 91 | path: str, 92 | sha256s_file: str, 93 | expected_sha256s: Optional[dict] = None, 94 | single_sequence_subset: bool = False, 95 | do_assertion: bool = True, 96 | ): 97 | zipname = os.path.split(path)[-1] 98 | if expected_sha256s is None: 99 | expected_sha256s = get_expected_sha256s( 100 | sha256s_file=sha256s_file, 101 | single_sequence_subset=single_sequence_subset, 102 | ) 103 | extracted_hash = sha256_file(path) 104 | if do_assertion: 105 | assert ( 106 | extracted_hash == expected_sha256s[zipname] 107 | ), f"{zipname}: ({extracted_hash} != {expected_sha256s[zipname]})" 108 | else: 109 | return extracted_hash == expected_sha256s[zipname] 110 | 111 | 112 | def sha256_file(path: str): 113 | sha256_hash = hashlib.sha256() 114 | with open(path, "rb") as f: 115 | file_buffer = f.read(BLOCKSIZE) 116 | while len(file_buffer) > 0: 117 | sha256_hash.update(file_buffer) 118 | file_buffer = f.read(BLOCKSIZE) 119 | digest_ = sha256_hash.hexdigest() 120 | # print(f"{digest_} {path}") 121 | return digest_ 122 | 123 | 124 | def _sha256_file_and_print(path: str): 125 | digest_ = sha256_file(path) 126 | print(f"{path}: {digest_}") 127 | return digest_ 128 | 129 | 130 | 131 | if __name__ == "__main__": 132 | parser = argparse.ArgumentParser( 133 | description="Check SHA256 hashes of the CO3D dataset." 134 | ) 135 | parser.add_argument( 136 | "--download_folder", 137 | type=str, 138 | help="A local target folder for downloading the the dataset files.", 139 | ) 140 | parser.add_argument( 141 | "--sha256s_file", 142 | type=str, 143 | help="A local target folder for downloading the the dataset files.", 144 | default=DEFAULT_SHA256S_FILE, 145 | ) 146 | parser.add_argument( 147 | "--num_workers", 148 | type=int, 149 | default=4, 150 | help="The number of sha256 extraction workers.", 151 | ) 152 | parser.add_argument( 153 | "--dump_sha256s", 154 | action="store_true", 155 | help="Store sha256s hashes.", 156 | ) 157 | parser.add_argument( 158 | "--single_sequence_subset", 159 | action="store_true", 160 | default=False, 161 | help="Check the single-sequence subset of the dataset.", 162 | ) 163 | 164 | args = parser.parse_args() 165 | main( 166 | str(args.download_folder), 167 | dump=bool(args.dump_sha256s), 168 | n_sha256_workers=int(args.num_workers), 169 | single_sequence_subset=bool(args.single_sequence_subset), 170 | sha256s_file=str(args.sha256s_file), 171 | ) 172 | -------------------------------------------------------------------------------- /co3d/dataset/data_types.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import sys 8 | import dataclasses 9 | import gzip 10 | import json 11 | from dataclasses import dataclass, Field, MISSING 12 | from typing import Any, cast, Dict, IO, Optional, Tuple, Type, TypeVar, Union 13 | 14 | import numpy as np 15 | 16 | 17 | if sys.version_info >= (3, 8, 0): 18 | from typing import get_args, get_origin 19 | elif sys.version_info >= (3, 7, 0): 20 | 21 | def get_origin(cls): # pragma: no cover 22 | return getattr(cls, "__origin__", None) 23 | 24 | def get_args(cls): # pragma: no cover 25 | return getattr(cls, "__args__", None) 26 | 27 | 28 | else: 29 | raise ImportError("This module requires Python 3.7+") 30 | 31 | 32 | _X = TypeVar("_X") 33 | 34 | TF3 = Tuple[float, float, float] 35 | 36 | 37 | @dataclass 38 | class ImageAnnotation: 39 | # path to jpg file, relative w.r.t. dataset_root 40 | path: str 41 | # H x W 42 | size: Tuple[int, int] # TODO: rename size_hw? 43 | 44 | 45 | @dataclass 46 | class DepthAnnotation: 47 | # path to png file, relative w.r.t. dataset_root, storing `depth / scale_adjustment` 48 | path: str 49 | # a factor to convert png values to actual depth: `depth = png * scale_adjustment` 50 | scale_adjustment: float 51 | # path to png file, relative w.r.t. dataset_root, storing binary `depth` mask 52 | mask_path: Optional[str] 53 | 54 | 55 | @dataclass 56 | class MaskAnnotation: 57 | # path to png file storing (Prob(fg | pixel) * 255) 58 | path: str 59 | # (soft) number of pixels in the mask; sum(Prob(fg | pixel)) 60 | mass: Optional[float] = None 61 | 62 | 63 | @dataclass 64 | class ViewpointAnnotation: 65 | # In right-multiply (PyTorch3D) format. X_cam = X_world @ R + T 66 | R: Tuple[TF3, TF3, TF3] 67 | T: TF3 68 | 69 | focal_length: Tuple[float, float] 70 | principal_point: Tuple[float, float] 71 | 72 | intrinsics_format: str = "ndc_norm_image_bounds" 73 | # Defines the co-ordinate system where focal_length and principal_point live. 74 | # Possible values: ndc_isotropic | ndc_norm_image_bounds (default) 75 | # ndc_norm_image_bounds: legacy PyTorch3D NDC format, where image boundaries 76 | # correspond to [-1, 1] x [-1, 1], and the scale along x and y may differ 77 | # ndc_isotropic: PyTorch3D 0.5+ NDC convention where the shorter side has 78 | # the range [-1, 1], and the longer one has the range [-s, s]; s >= 1, 79 | # where s is the aspect ratio. The scale is same along x and y. 80 | 81 | 82 | @dataclass 83 | class FrameAnnotation: 84 | """A dataclass used to load annotations from json.""" 85 | 86 | # can be used to join with `SequenceAnnotation` 87 | sequence_name: str 88 | # 0-based, continuous frame number within sequence 89 | frame_number: int 90 | # timestamp in seconds from the video start 91 | frame_timestamp: float 92 | 93 | image: ImageAnnotation 94 | depth: Optional[DepthAnnotation] = None 95 | mask: Optional[MaskAnnotation] = None 96 | viewpoint: Optional[ViewpointAnnotation] = None 97 | meta: Optional[Dict[str, Any]] = None 98 | 99 | 100 | @dataclass 101 | class PointCloudAnnotation: 102 | # path to ply file with points only, relative w.r.t. dataset_root 103 | path: str 104 | # the bigger the better 105 | quality_score: float 106 | n_points: Optional[int] 107 | 108 | 109 | @dataclass 110 | class VideoAnnotation: 111 | # path to the original video file, relative w.r.t. dataset_root 112 | path: str 113 | # length of the video in seconds 114 | length: float 115 | 116 | 117 | @dataclass 118 | class SequenceAnnotation: 119 | sequence_name: str 120 | category: str 121 | video: Optional[VideoAnnotation] = None 122 | point_cloud: Optional[PointCloudAnnotation] = None 123 | # the bigger the better 124 | viewpoint_quality_score: Optional[float] = None 125 | 126 | 127 | def dump_dataclass(obj: Any, f: IO, binary: bool = False) -> None: 128 | """ 129 | Args: 130 | f: Either a path to a file, or a file opened for writing. 131 | obj: A @dataclass or collection hierarchy including dataclasses. 132 | binary: Set to True if `f` is a file handle, else False. 133 | """ 134 | if binary: 135 | f.write(json.dumps(_asdict_rec(obj)).encode("utf8")) 136 | else: 137 | json.dump(_asdict_rec(obj), f) 138 | 139 | 140 | def load_dataclass(f: IO, cls: Type[_X], binary: bool = False) -> _X: 141 | """ 142 | Loads to a @dataclass or collection hierarchy including dataclasses 143 | from a json recursively. 144 | Call it like load_dataclass(f, typing.List[FrameAnnotationAnnotation]). 145 | raises KeyError if json has keys not mapping to the dataclass fields. 146 | 147 | Args: 148 | f: Either a path to a file, or a file opened for writing. 149 | cls: The class of the loaded dataclass. 150 | binary: Set to True if `f` is a file handle, else False. 151 | """ 152 | if binary: 153 | asdict = json.loads(f.read().decode("utf8")) 154 | else: 155 | asdict = json.load(f) 156 | 157 | if isinstance(asdict, list): 158 | # in the list case, run a faster "vectorized" version 159 | cls = get_args(cls)[0] 160 | res = list(_dataclass_list_from_dict_list(asdict, cls)) 161 | else: 162 | res = _dataclass_from_dict(asdict, cls) 163 | 164 | return res 165 | 166 | 167 | def _dataclass_list_from_dict_list(dlist, typeannot): 168 | """ 169 | Vectorised version of `_dataclass_from_dict`. 170 | The output should be equivalent to 171 | `[_dataclass_from_dict(d, typeannot) for d in dlist]`. 172 | 173 | Args: 174 | dlist: list of objects to convert. 175 | typeannot: type of each of those objects. 176 | Returns: 177 | iterator or list over converted objects of the same length as `dlist`. 178 | 179 | Raises: 180 | ValueError: it assumes the objects have None's in consistent places across 181 | objects, otherwise it would ignore some values. This generally holds for 182 | auto-generated annotations, but otherwise use `_dataclass_from_dict`. 183 | """ 184 | 185 | cls = get_origin(typeannot) or typeannot 186 | 187 | if typeannot is Any: 188 | return dlist 189 | if all(obj is None for obj in dlist): # 1st recursion base: all None nodes 190 | return dlist 191 | if any(obj is None for obj in dlist): 192 | # filter out Nones and recurse on the resulting list 193 | idx_notnone = [(i, obj) for i, obj in enumerate(dlist) if obj is not None] 194 | idx, notnone = zip(*idx_notnone) 195 | converted = _dataclass_list_from_dict_list(notnone, typeannot) 196 | res = [None] * len(dlist) 197 | for i, obj in zip(idx, converted): 198 | res[i] = obj 199 | return res 200 | 201 | is_optional, contained_type = _resolve_optional(typeannot) 202 | if is_optional: 203 | return _dataclass_list_from_dict_list(dlist, contained_type) 204 | 205 | # otherwise, we dispatch by the type of the provided annotation to convert to 206 | if issubclass(cls, tuple) and hasattr(cls, "_fields"): # namedtuple 207 | # For namedtuple, call the function recursively on the lists of corresponding keys 208 | types = cls._field_types.values() 209 | dlist_T = zip(*dlist) 210 | res_T = [ 211 | _dataclass_list_from_dict_list(key_list, tp) 212 | for key_list, tp in zip(dlist_T, types) 213 | ] 214 | return [cls(*converted_as_tuple) for converted_as_tuple in zip(*res_T)] 215 | elif issubclass(cls, (list, tuple)): 216 | # For list/tuple, call the function recursively on the lists of corresponding positions 217 | types = get_args(typeannot) 218 | if len(types) == 1: # probably List; replicate for all items 219 | types = types * len(dlist[0]) 220 | dlist_T = zip(*dlist) 221 | res_T = ( 222 | _dataclass_list_from_dict_list(pos_list, tp) 223 | for pos_list, tp in zip(dlist_T, types) 224 | ) 225 | if issubclass(cls, tuple): 226 | return list(zip(*res_T)) 227 | else: 228 | return [cls(converted_as_tuple) for converted_as_tuple in zip(*res_T)] 229 | elif issubclass(cls, dict): 230 | # For the dictionary, call the function recursively on concatenated keys and vertices 231 | key_t, val_t = get_args(typeannot) 232 | all_keys_res = _dataclass_list_from_dict_list( 233 | [k for obj in dlist for k in obj.keys()], key_t 234 | ) 235 | all_vals_res = _dataclass_list_from_dict_list( 236 | [k for obj in dlist for k in obj.values()], val_t 237 | ) 238 | indices = np.cumsum([len(obj) for obj in dlist]) 239 | assert indices[-1] == len(all_keys_res) 240 | 241 | keys = np.split(list(all_keys_res), indices[:-1]) 242 | # vals = np.split(all_vals_res, indices[:-1]) 243 | all_vals_res_iter = iter(all_vals_res) 244 | return [cls(zip(k, all_vals_res_iter)) for k in keys] 245 | elif not dataclasses.is_dataclass(typeannot): 246 | return dlist 247 | 248 | # dataclass node: 2nd recursion base; call the function recursively on the lists 249 | # of the corresponding fields 250 | assert dataclasses.is_dataclass(cls) 251 | fieldtypes = { 252 | f.name: (_unwrap_type(f.type), _get_dataclass_field_default(f)) 253 | for f in dataclasses.fields(typeannot) 254 | } 255 | 256 | # NOTE the default object is shared here 257 | key_lists = ( 258 | _dataclass_list_from_dict_list([obj.get(k, default) for obj in dlist], type_) 259 | for k, (type_, default) in fieldtypes.items() 260 | ) 261 | transposed = zip(*key_lists) 262 | return [cls(*vals_as_tuple) for vals_as_tuple in transposed] 263 | 264 | 265 | def _dataclass_from_dict(d, typeannot): 266 | if d is None or typeannot is Any: 267 | return d 268 | is_optional, contained_type = _resolve_optional(typeannot) 269 | if is_optional: 270 | # an Optional not set to None, just use the contents of the Optional. 271 | return _dataclass_from_dict(d, contained_type) 272 | 273 | cls = get_origin(typeannot) or typeannot 274 | if issubclass(cls, tuple) and hasattr(cls, "_fields"): # namedtuple 275 | types = cls._field_types.values() 276 | return cls(*[_dataclass_from_dict(v, tp) for v, tp in zip(d, types)]) 277 | elif issubclass(cls, (list, tuple)): 278 | types = get_args(typeannot) 279 | if len(types) == 1: # probably List; replicate for all items 280 | types = types * len(d) 281 | return cls(_dataclass_from_dict(v, tp) for v, tp in zip(d, types)) 282 | elif issubclass(cls, dict): 283 | key_t, val_t = get_args(typeannot) 284 | return cls( 285 | (_dataclass_from_dict(k, key_t), _dataclass_from_dict(v, val_t)) 286 | for k, v in d.items() 287 | ) 288 | elif not dataclasses.is_dataclass(typeannot): 289 | return d 290 | 291 | assert dataclasses.is_dataclass(cls) 292 | fieldtypes = {f.name: _unwrap_type(f.type) for f in dataclasses.fields(typeannot)} 293 | return cls(**{k: _dataclass_from_dict(v, fieldtypes[k]) for k, v in d.items()}) 294 | 295 | 296 | def _unwrap_type(tp): 297 | # strips Optional wrapper, if any 298 | if get_origin(tp) is Union: 299 | args = get_args(tp) 300 | if len(args) == 2 and any(a is type(None) for a in args): # noqa: E721 301 | # this is typing.Optional 302 | return args[0] if args[1] is type(None) else args[1] # noqa: E721 303 | return tp 304 | 305 | 306 | def _get_dataclass_field_default(field: Field) -> Any: 307 | if field.default_factory is not MISSING: 308 | # pyre-fixme[29]: `Union[dataclasses._MISSING_TYPE, 309 | # dataclasses._DefaultFactory[typing.Any]]` is not a function. 310 | return field.default_factory() 311 | elif field.default is not MISSING: 312 | return field.default 313 | else: 314 | return None 315 | 316 | 317 | def _asdict_rec(obj): 318 | return dataclasses._asdict_inner(obj, dict) 319 | 320 | 321 | def dump_dataclass_jgzip(outfile: str, obj: Any) -> None: 322 | """ 323 | Dumps obj to a gzipped json outfile. 324 | 325 | Args: 326 | obj: A @dataclass or collection hiererchy including dataclasses. 327 | outfile: The path to the output file. 328 | """ 329 | with gzip.GzipFile(outfile, "wb") as f: 330 | dump_dataclass(obj, cast(IO, f), binary=True) 331 | 332 | 333 | def load_dataclass_jgzip(outfile, cls): 334 | """ 335 | Loads a dataclass from a gzipped json outfile. 336 | 337 | Args: 338 | outfile: The path to the loaded file. 339 | cls: The type annotation of the loaded dataclass. 340 | 341 | Returns: 342 | loaded_dataclass: The loaded dataclass. 343 | """ 344 | with gzip.GzipFile(outfile, "rb") as f: 345 | return load_dataclass(cast(IO, f), cls, binary=True) 346 | 347 | 348 | def _resolve_optional(type_: Any) -> Tuple[bool, Any]: 349 | """Check whether `type_` is equivalent to `typing.Optional[T]` for some T.""" 350 | if get_origin(type_) is Union: 351 | args = get_args(type_) 352 | if len(args) == 2 and args[1] == type(None): # noqa E721 353 | return True, args[0] 354 | if type_ is Any: 355 | return True, Any 356 | 357 | return False, type_ 358 | -------------------------------------------------------------------------------- /co3d/dataset/download_dataset_impl.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import os 9 | import shutil 10 | import requests 11 | import functools 12 | import json 13 | import warnings 14 | 15 | from argparse import ArgumentParser 16 | from typing import List, Optional 17 | from multiprocessing import Pool 18 | from tqdm import tqdm 19 | 20 | from .check_checksum import check_co3d_sha256 21 | 22 | 23 | def download_dataset( 24 | link_list_file: str, 25 | download_folder: str, 26 | n_download_workers: int = 4, 27 | n_extract_workers: int = 4, 28 | download_categories: Optional[List[str]] = None, 29 | checksum_check: bool = False, 30 | single_sequence_subset: bool = False, 31 | clear_archives_after_unpacking: bool = False, 32 | skip_downloaded_archives: bool = True, 33 | sha256s_file: Optional[str] = None, 34 | ): 35 | """ 36 | Downloads and unpacks the dataset in CO3D format. 37 | 38 | Note: The script will make a folder `/_in_progress`, which 39 | stores files whose download is in progress. The folder can be safely deleted 40 | the download is finished. 41 | 42 | Args: 43 | link_list_file: A text file with the list of zip file download links. 44 | download_folder: A local target folder for downloading the 45 | the dataset files. 46 | n_download_workers: The number of parallel workers 47 | for downloading the dataset files. 48 | n_extract_workers: The number of parallel workers 49 | for extracting the dataset files. 50 | download_categories: A list of categories to download. 51 | If `None`, downloads all. 52 | checksum_check: Enable validation of the downloaded file's checksum before 53 | extraction. 54 | single_sequence_subset: Whether the downloaded dataset is the single-sequence 55 | subset of the full dataset. 56 | clear_archives_after_unpacking: Delete the unnecessary downloaded archive files 57 | after unpacking. 58 | skip_downloaded_archives: Skip re-downloading already downloaded archives. 59 | """ 60 | 61 | if checksum_check and not sha256s_file: 62 | raise ValueError( 63 | "checksum_check is requested but ground-truth SHA256 file not provided!" 64 | ) 65 | 66 | if not os.path.isfile(link_list_file): 67 | raise ValueError( 68 | "Please specify `link_list_file` with a valid path to a json" 69 | " with zip file download links." 70 | " For CO3Dv2, the file is stored in the co3d github:" 71 | " https://github.com/facebookresearch/co3d/blob/main/co3d/links.json" 72 | ) 73 | 74 | if not os.path.isdir(download_folder): 75 | raise ValueError( 76 | "Please specify `download_folder` with a valid path to a target folder" 77 | + " for downloading the dataset." 78 | + f" {download_folder} does not exist." 79 | ) 80 | 81 | # read the link file 82 | with open(link_list_file, "r") as f: 83 | links = json.load(f) 84 | 85 | # get the full dataset links or the single-sequence subset links 86 | links = links["singlesequence"] if single_sequence_subset else links["full"] 87 | 88 | # split to data links and the links containing json metadata 89 | metadata_links = [] 90 | data_links = [] 91 | for category_name, urls in links.items(): 92 | for url in urls: 93 | link_name = os.path.split(url)[-1] 94 | if single_sequence_subset: 95 | link_name = link_name.replace("_singlesequence", "") 96 | if category_name.upper() == "METADATA": 97 | metadata_links.append((link_name, url)) 98 | else: 99 | data_links.append((category_name, link_name, url)) 100 | 101 | if download_categories is not None: 102 | co3d_categories = set(l[0] for l in data_links) 103 | not_in_co3d = [c for c in download_categories if c not in co3d_categories] 104 | if len(not_in_co3d) > 0: 105 | raise ValueError( 106 | f"download_categories {str(not_in_co3d)} are not valid" 107 | + "dataset categories." 108 | ) 109 | data_links = [(c, ln, l) for c, ln, l in data_links if c in download_categories] 110 | 111 | with Pool(processes=n_download_workers) as download_pool: 112 | print(f"Downloading {len(metadata_links)} dataset metadata files ...") 113 | for _ in tqdm( 114 | download_pool.imap( 115 | functools.partial(_download_metadata_file, download_folder), 116 | metadata_links, 117 | ), 118 | total=len(metadata_links), 119 | ): 120 | pass 121 | 122 | print(f"Downloading {len(data_links)} dataset files ...") 123 | download_ok = {} 124 | for link_name, ok in tqdm( 125 | download_pool.imap( 126 | functools.partial( 127 | _download_category_file, 128 | download_folder, 129 | checksum_check, 130 | single_sequence_subset, 131 | sha256s_file, 132 | skip_downloaded_archives, 133 | ), 134 | data_links, 135 | ), 136 | total=len(data_links), 137 | ): 138 | download_ok[link_name] = ok 139 | 140 | if not all(download_ok.values()): 141 | not_ok_links = [n for n, ok in download_ok.items() if not ok] 142 | not_ok_links_str = "\n".join(not_ok_links) 143 | raise AssertionError( 144 | "The SHA256 checksums did not match for some of the downloaded files:\n" 145 | + not_ok_links_str + "\n" 146 | + "This is most likely due to a network failure." 147 | + " Please restart the download script." 148 | ) 149 | 150 | metadata_links = [ml for ml in metadata_links if ml[1].endswith(".zip")] 151 | print(f"Extracting {len(data_links)} dataset files and {len(metadata_links)} metadata files...") 152 | with Pool(processes=n_extract_workers) as extract_pool: 153 | for _ in tqdm( 154 | extract_pool.imap( 155 | functools.partial( 156 | _unpack_category_file, 157 | download_folder, 158 | clear_archives_after_unpacking, 159 | ), 160 | metadata_links + data_links, 161 | ), 162 | total=len(metadata_links) + len(data_links), 163 | ): 164 | pass 165 | 166 | print("Done") 167 | 168 | 169 | def build_arg_parser( 170 | dataset_name: str, 171 | default_link_list_file: str, 172 | default_sha256_file: str, 173 | ) -> ArgumentParser: 174 | parser = ArgumentParser(description=f"Download the {dataset_name} dataset.") 175 | parser.add_argument( 176 | "--download_folder", 177 | type=str, 178 | required=True, 179 | help="A local target folder for downloading the the dataset files.", 180 | ) 181 | parser.add_argument( 182 | "--n_download_workers", 183 | type=int, 184 | default=4, 185 | help="The number of parallel workers for downloading the dataset files.", 186 | ) 187 | parser.add_argument( 188 | "--n_extract_workers", 189 | type=int, 190 | default=4, 191 | help="The number of parallel workers for extracting the dataset files.", 192 | ) 193 | parser.add_argument( 194 | "--download_categories", 195 | type=lambda x: [x_.strip() for x_ in x.split(",")], 196 | default=None, 197 | help=f"A comma-separated list of {dataset_name} categories to download." 198 | + " Example: 'orange,car' will download only oranges and cars", 199 | ) 200 | parser.add_argument( 201 | "--link_list_file", 202 | type=str, 203 | default=default_link_list_file, 204 | help=( 205 | f"The file with html links to the {dataset_name} dataset files." 206 | + " In most cases the default local file `links.json` should be used." 207 | ), 208 | ) 209 | parser.add_argument( 210 | "--sha256_file", 211 | type=str, 212 | default=default_sha256_file, 213 | help=( 214 | f"The file with SHA256 hashes of {dataset_name} dataset files." 215 | + " In most cases the default local file `co3d_sha256.json` should be used." 216 | ), 217 | ) 218 | parser.add_argument( 219 | "--checksum_check", 220 | action="store_true", 221 | default=True, 222 | help="Check the SHA256 checksum of each downloaded file before extraction.", 223 | ) 224 | parser.add_argument( 225 | "--no_checksum_check", 226 | action="store_false", 227 | dest="checksum_check", 228 | default=False, 229 | help="Does not check the SHA256 checksum of each downloaded file before extraction.", 230 | ) 231 | parser.set_defaults(checksum_check=True) 232 | parser.add_argument( 233 | "--clear_archives_after_unpacking", 234 | action="store_true", 235 | default=False, 236 | help="Delete the unnecessary downloaded archive files after unpacking.", 237 | ) 238 | parser.add_argument( 239 | "--redownload_existing_archives", 240 | action="store_true", 241 | default=False, 242 | help="Redownload the already-downloaded archives.", 243 | ) 244 | 245 | return parser 246 | 247 | 248 | def _unpack_category_file( 249 | download_folder: str, 250 | clear_archive: bool, 251 | link: str, 252 | ): 253 | *_, link_name, url = link 254 | local_fl = os.path.join(download_folder, link_name) 255 | print(f"Unpacking dataset file {local_fl} ({link_name}) to {download_folder}.") 256 | shutil.unpack_archive(local_fl, download_folder) 257 | if clear_archive: 258 | os.remove(local_fl) 259 | 260 | 261 | def _download_category_file( 262 | download_folder: str, 263 | checksum_check: bool, 264 | single_sequence_subset: bool, 265 | sha256s_file: Optional[str], 266 | skip_downloaded_files: bool, 267 | link: str, 268 | ): 269 | category, link_name, url = link 270 | local_fl_final = os.path.join(download_folder, link_name) 271 | 272 | if skip_downloaded_files and os.path.isfile(local_fl_final): 273 | print(f"Skipping {local_fl_final}, already downloaded!") 274 | return link_name, True 275 | 276 | in_progress_folder = os.path.join(download_folder, "_in_progress") 277 | os.makedirs(in_progress_folder, exist_ok=True) 278 | local_fl = os.path.join(in_progress_folder, link_name) 279 | 280 | print(f"Downloading dataset file {link_name} ({url}) to {local_fl}.") 281 | _download_with_progress_bar(url, local_fl, link_name) 282 | if checksum_check: 283 | print(f"Checking SHA256 for {local_fl}.") 284 | try: 285 | check_co3d_sha256( 286 | local_fl, 287 | sha256s_file=sha256s_file, 288 | single_sequence_subset=single_sequence_subset, 289 | ) 290 | except AssertionError: 291 | warnings.warn( 292 | f"Checksums for {local_fl} did not match!" 293 | + " This is likely due to a network failure," 294 | + " please restart the download script." 295 | ) 296 | return link_name, False 297 | 298 | os.rename(local_fl, local_fl_final) 299 | return link_name, True 300 | 301 | 302 | def _download_metadata_file(download_folder: str, link: str): 303 | local_fl = os.path.join(download_folder, link[0]) 304 | # remove the singlesequence postfix in case we are downloading the s.s. subset 305 | local_fl = local_fl.replace("_singlesequence", "") 306 | print(f"Downloading dataset metadata file {link[1]} ({link[0]}) to {local_fl}.") 307 | _download_with_progress_bar(link[1], local_fl, link[0]) 308 | 309 | 310 | def _download_with_progress_bar(url: str, fname: str, filename: str): 311 | # taken from https://stackoverflow.com/a/62113293/986477 312 | resp = requests.get(url, stream=True) 313 | print(url) 314 | total = int(resp.headers.get("content-length", 0)) 315 | with open(fname, "wb") as file, tqdm( 316 | desc=fname, 317 | total=total, 318 | unit="iB", 319 | unit_scale=True, 320 | unit_divisor=1024, 321 | ) as bar: 322 | for datai, data in enumerate(resp.iter_content(chunk_size=1024)): 323 | size = file.write(data) 324 | bar.update(size) 325 | if datai % max((max(total // 1024, 1) // 20), 1) == 0: 326 | print(f"{filename}: Downloaded {100.0*(float(bar.n)/max(total, 1)):3.1f}%.") 327 | print(bar) 328 | -------------------------------------------------------------------------------- /co3d/dataset/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import torch 9 | import copy 10 | from pytorch3d.implicitron.dataset.dataset_base import FrameData 11 | from co3d.challenge.data_types import CO3DTask, CO3DSequenceSet 12 | 13 | 14 | def redact_eval_frame_data(fd: FrameData) -> FrameData: 15 | """ 16 | Redact all information about the test element (1st image) 17 | of the evaluation frame data `fd`. 18 | 19 | This is done by zeroing all elements of the relevant tensors in `fd` 20 | followed by removing the sequence_point_cloud field. 21 | """ 22 | fd_redacted = copy.deepcopy(fd) 23 | for redact_field_name in [ 24 | "fg_probability", 25 | "image_rgb", 26 | "depth_map", 27 | "mask_crop", 28 | ]: 29 | # zero-out all elements in the redacted tensor 30 | field_val = getattr(fd, redact_field_name) 31 | field_val[:1] *= 0 32 | # also remove the point cloud info 33 | fd_redacted.sequence_point_cloud_idx = None 34 | fd_redacted.sequence_point_cloud = None 35 | return fd_redacted 36 | 37 | 38 | def _check_valid_eval_frame_data( 39 | fd: FrameData, 40 | task: CO3DTask, 41 | sequence_set: CO3DSequenceSet, 42 | ): 43 | """ 44 | Check that the evaluation batch `fd` is redacted correctly. 45 | """ 46 | is_redacted = torch.stack( 47 | [ 48 | getattr(fd, k).abs().sum((1,2,3)) <= 0 49 | for k in ["image_rgb", "depth_map", "fg_probability"] 50 | ] 51 | ) 52 | if sequence_set==CO3DSequenceSet.TEST: 53 | # first image has to be redacted 54 | assert is_redacted[:, 0].all() 55 | # all depth maps have to be redacted 56 | assert is_redacted[1, :].all() 57 | # no known views should be redacted 58 | assert not is_redacted[:, 1:].all(dim=0).any() 59 | elif sequence_set==CO3DSequenceSet.DEV: 60 | # nothing should be redacted 61 | assert not is_redacted.all(dim=0).any() 62 | else: 63 | raise ValueError(sequence_set) -------------------------------------------------------------------------------- /co3d/download_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import os 9 | 10 | from dataset.download_dataset_impl import build_arg_parser, download_dataset 11 | 12 | 13 | DEFAULT_LINK_LIST_FILE = os.path.join(os.path.dirname(__file__), "links.json") 14 | DEFAULT_SHA256S_FILE = os.path.join(os.path.dirname(__file__), "co3d_sha256.json") 15 | 16 | 17 | if __name__ == "__main__": 18 | parser = build_arg_parser("CO3D", DEFAULT_LINK_LIST_FILE, DEFAULT_SHA256S_FILE) 19 | parser.add_argument( 20 | "--single_sequence_subset", 21 | action="store_true", 22 | default=False, 23 | help="Download the single-sequence subset of the dataset.", 24 | ) 25 | args = parser.parse_args() 26 | download_dataset( 27 | str(args.link_list_file), 28 | str(args.download_folder), 29 | n_download_workers=int(args.n_download_workers), 30 | n_extract_workers=int(args.n_extract_workers), 31 | download_categories=args.download_categories, 32 | checksum_check=bool(args.checksum_check), 33 | single_sequence_subset=bool(args.single_sequence_subset), 34 | clear_archives_after_unpacking=bool(args.clear_archives_after_unpacking), 35 | sha256s_file=str(args.sha256_file), 36 | skip_downloaded_archives=not bool(args.redownload_existing_archives), 37 | ) 38 | -------------------------------------------------------------------------------- /co3d/utils/dbir_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import dataclasses 9 | import torch 10 | from typing import Tuple 11 | from pytorch3d.renderer.cameras import CamerasBase 12 | from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset 13 | from pytorch3d.implicitron.dataset.dataset_base import FrameData 14 | from pytorch3d.structures import Pointclouds 15 | from pytorch3d.implicitron.dataset.json_index_dataset import _get_clamp_bbox 16 | from pytorch3d.implicitron.models.base_model import ImplicitronRender 17 | from pytorch3d.implicitron.dataset.visualize import get_implicitron_sequence_pointcloud 18 | from pytorch3d.implicitron.tools.point_cloud_utils import ( 19 | render_point_cloud_pytorch3d, 20 | get_rgbd_point_cloud, 21 | ) 22 | 23 | 24 | def render_point_cloud( 25 | camera: CamerasBase, 26 | render_size: Tuple[int, int], 27 | pointcloud: Pointclouds, 28 | point_radius: float = 0.03, 29 | ) -> ImplicitronRender: 30 | """ 31 | Render the point cloud `pointcloud` to the camera `camera` using the 32 | PyTorch3D point cloud renderer. 33 | 34 | Args: 35 | camera: Rendering camera. 36 | render_size: 2-tuple of integers denoting the render size (HxW) 37 | pointcloud: The point cloud to render. 38 | point_radius: Radius of the rendered points. 39 | """ 40 | # render the sequence point cloud to each evaluation view 41 | data_rendered, render_mask, depth_rendered = render_point_cloud_pytorch3d( 42 | camera, 43 | pointcloud, 44 | render_size=render_size, 45 | point_radius=point_radius, 46 | topk=10, 47 | eps=1e-2, 48 | bin_size=0, 49 | ) 50 | 51 | # cast to the implicitron render 52 | return ImplicitronRender( 53 | depth_render=depth_rendered, 54 | image_render=data_rendered, 55 | mask_render=render_mask, 56 | ) 57 | 58 | 59 | def paste_render_to_original_image( 60 | frame_data: FrameData, 61 | render: ImplicitronRender, 62 | ) -> ImplicitronRender: 63 | """ 64 | Paste a rendering result `render` into the original image coordinate frame. 65 | 66 | Args: 67 | frame_data: The `FrameData` object as returned by the `JsonIndexDataset`. 68 | render: A render to be pasted into the original image coordinates. 69 | """ 70 | # size of the render 71 | render_size = render.image_render.shape[2:] 72 | 73 | # estimate render scale w.r.t. the frame_data images 74 | render_scale_factors = [ 75 | sr / s for sr, s in zip(render_size, frame_data.image_rgb.shape[2:]) 76 | ] 77 | assert abs(render_scale_factors[0]-render_scale_factors[1]) <= 1e-2, ( 78 | "non-isotropic render rescale" 79 | ) 80 | 81 | # original image size 82 | orig_size = frame_data.image_size_hw[0].tolist() 83 | 84 | # bounding box of the crop in the original image 85 | if frame_data.crop_bbox_xywh is not None: 86 | bbox_xywh = frame_data.crop_bbox_xywh[0] 87 | else: 88 | bbox_xywh = torch.LongTensor([0, 0, orig_size[1], orig_size[0]]) 89 | 90 | # get the valid part of the render 91 | render_bounds_wh = [None, None] 92 | for axis in [0, 1]: 93 | # resize the mask crop to the size of the render 94 | if render_size != frame_data.mask_crop.shape[2:]: 95 | mask_crop_render_size = torch.nn.functional.interpolate( 96 | frame_data.mask_crop, size=render_size, mode="nearest" 97 | ) 98 | else: 99 | mask_crop_render_size = frame_data.mask_crop 100 | # get the bounds of the mask_crop along dimemsion = 1-axis 101 | valid_dim_pix = mask_crop_render_size[0, 0].sum(dim=axis).reshape(-1).nonzero() 102 | assert valid_dim_pix.min()==0 103 | render_bounds_wh[axis] = valid_dim_pix.max().item() + 1 104 | 105 | render_out = {} 106 | for render_type, render_val in dataclasses.asdict(render).items(): 107 | if render_val is None: 108 | continue 109 | # get the valid part of the render 110 | render_valid_ = render_val[..., :render_bounds_wh[1], :render_bounds_wh[0]] 111 | 112 | # resize the valid part to the original size 113 | render_resize_ = torch.nn.functional.interpolate( 114 | render_valid_, 115 | size=tuple(reversed(bbox_xywh[2:].tolist())), 116 | mode="bilinear" if render_type=="image_render" else "nearest", 117 | align_corners=False if render_type=="image_render" else None, 118 | ) 119 | 120 | # paste the original-sized crop to the original image 121 | render_pasted_ = render_resize_.new_zeros(1, render_resize_.shape[1], *orig_size) 122 | render_pasted_[ 123 | ..., 124 | bbox_xywh[1]:(bbox_xywh[1]+render_resize_.shape[2]), 125 | bbox_xywh[0]:(bbox_xywh[0]+render_resize_.shape[3]), 126 | ] = render_resize_ 127 | render_out[render_type] = render_pasted_ 128 | 129 | # if True: 130 | # # debug visualize 131 | # from visdom import Visdom 132 | # viz = Visdom() 133 | # visdom_env = "debug_paste_render_to_original_image" 134 | # viz.image( 135 | # render.image_render[0], 136 | # env=visdom_env, 137 | # win="original", 138 | # ) 139 | # viz.image( 140 | # render_out["image_render"][0], 141 | # env=visdom_env, 142 | # win="pasted", 143 | # ) 144 | # import pdb; pdb.set_trace() 145 | # pass 146 | 147 | return ImplicitronRender(**render_out) 148 | 149 | 150 | def get_sequence_pointcloud( 151 | dataset: JsonIndexDataset, 152 | sequence_name: str, 153 | num_workers: int = 12, 154 | max_loaded_frames: int = 50, 155 | max_n_points: int = int(1e5), 156 | seed: int = 42, 157 | load_dataset_pointcloud: bool = False, 158 | ) -> Pointclouds: 159 | """ 160 | Given a `dataset` object and the name of a sequence in it (`sequence_name`), 161 | generate a 3D pointcloud containing the main foreground object of the scene. 162 | 163 | Args: 164 | dataset: A dataset of containing sequence annotations. 165 | sequence_name: The name of the sequence to reconstruct. 166 | num_workers: Number of cores to use for loading the sequence data. 167 | max_n_points: Maximum number of points to keep in the point cloud. 168 | seed: Random seed for reproducibility. 169 | load_dataset_pointcloud: If `True` uses the CO3D ground truth dataset 170 | point cloud, otherwise generates the point cloud by unprojecting 171 | the depth maps of known frames. 172 | """ 173 | with torch.random.fork_rng(): # fork rng for reproducibility 174 | torch.manual_seed(seed) 175 | sequence_pointcloud, _ = get_implicitron_sequence_pointcloud( 176 | dataset, 177 | sequence_name, 178 | mask_points=True, 179 | max_frames=max_loaded_frames, 180 | num_workers=num_workers, 181 | load_dataset_point_cloud=load_dataset_pointcloud, 182 | ) 183 | sequence_pointcloud = _subsample_pointcloud(sequence_pointcloud, max_n_points) 184 | return sequence_pointcloud 185 | 186 | 187 | def get_eval_frame_data_pointcloud( 188 | eval_frame_data: FrameData, 189 | max_n_points: int = int(3e4), 190 | ): 191 | """ 192 | Generate a pointcloud by unprojecting the known depth maps of a `FrameData` object 193 | `eval_frame_data`. 194 | 195 | Args: 196 | eval_frame_data: `FrameData` to unproject. 197 | max_n_points: Maximum number of points to keep in the point cloud. 198 | """ 199 | batch_size = eval_frame_data.image_rgb.shape[0] 200 | pointcloud = get_rgbd_point_cloud( 201 | eval_frame_data.camera[list(range(1, batch_size))], 202 | eval_frame_data.image_rgb[1:], 203 | eval_frame_data.depth_map[1:], 204 | (eval_frame_data.fg_probability[1:] > 0.5).float(), 205 | mask_points=True, 206 | ) 207 | return _subsample_pointcloud(pointcloud, max_n_points) 208 | 209 | 210 | def _subsample_pointcloud(p: Pointclouds, n: int): 211 | n_points = p.num_points_per_cloud().item() 212 | if n_points > n: 213 | # subsample the point cloud in case it is bigger than max_n_points 214 | subsample_idx = torch.randperm( 215 | n_points, 216 | device=p.points_padded().device, 217 | )[:n] 218 | p = Pointclouds( 219 | points=p.points_padded()[:, subsample_idx], 220 | features=p.features_padded()[:, subsample_idx], 221 | ) 222 | return p 223 | -------------------------------------------------------------------------------- /co3d/utils/evaluate_implicitron_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | """ 8 | Evaluation of Implicitron models on CO3Dv2 challenge. 9 | """ 10 | 11 | 12 | import logging 13 | import os 14 | import torch 15 | import json 16 | import warnings 17 | from typing import Optional, Union, Dict, Tuple 18 | from tqdm import tqdm 19 | from omegaconf import DictConfig, OmegaConf 20 | import numpy as np 21 | 22 | import pytorch3d 23 | from pytorch3d.implicitron.models.generic_model import ImplicitronRender, GenericModel 24 | from pytorch3d.implicitron.tools.config import get_default_args 25 | from pytorch3d.implicitron.dataset.dataset_base import FrameData 26 | from pytorch3d.implicitron.dataset.dataset_map_provider import DatasetMap 27 | from pytorch3d.implicitron.dataset.json_index_dataset_map_provider_v2 import ( 28 | JsonIndexDatasetMapProviderV2 29 | ) 30 | from pytorch3d.implicitron.tools.config import expand_args_fields 31 | from pytorch3d.implicitron.tools.model_io import ( 32 | parse_epoch_from_model_path, 33 | find_last_checkpoint, 34 | ) 35 | from pytorch3d.implicitron.models.renderer.base import ( 36 | # BaseRenderer, 37 | EvaluationMode, 38 | # ImplicitFunctionWrapper, 39 | # RendererOutput, 40 | # RenderSamplingMode, 41 | ) 42 | 43 | 44 | from co3d.utils import dbir_utils 45 | from co3d.challenge.co3d_submission import CO3DSubmission 46 | from co3d.challenge.data_types import CO3DTask, CO3DSequenceSet 47 | from co3d.challenge.utils import ( 48 | get_co3d_task_from_subset_name, 49 | get_co3d_sequence_set_from_subset_name, 50 | ) 51 | from co3d.dataset.utils import redact_eval_frame_data, _check_valid_eval_frame_data 52 | from co3d.challenge.metric_utils import EVAL_METRIC_NAMES 53 | 54 | 55 | DATASET_ROOT = os.getenv("CO3DV2_DATASET_ROOT") 56 | DATASET_ROOT_HIDDEN = os.getenv("CO3DV2_HIDDEN_DATASET_ROOT") 57 | 58 | 59 | # HACK: implicitron_trainer is not part of a package; forcing it in the path 60 | _pytorch3d_root = os.path.dirname(os.path.dirname(pytorch3d.__file__)) 61 | implicitron_trainer_dir = os.path.join(_pytorch3d_root, "projects", "implicitron_trainer") 62 | # sys.path.insert(0, implicitron_trainer_dir) 63 | from projects.implicitron_trainer.experiment import Experiment 64 | 65 | 66 | logger = logging.getLogger(__name__) 67 | 68 | 69 | def evaluate_implicitron_exp_dir_map( 70 | category_subset_implicitron_exp_dirs: Union[Dict[Tuple[str, str], str], str], 71 | task: CO3DTask, 72 | sequence_set: CO3DSequenceSet, 73 | submission_output_folder: str, 74 | num_eval_workers: int = 4, 75 | submit_to_eval_ai: bool = False, 76 | skip_evaluation: bool = False, 77 | fill_results_from_cache: bool = False, 78 | implicitron_exp_dir_submission_output_subfolder: Optional[str] = None, 79 | ): 80 | """ 81 | Evalulates and submits to EvalAI either: 82 | 1) all Implicitron class-specific models, or 83 | 2) a single model trained for all categories. 84 | 85 | Args: 86 | category_subset_implicitron_exp_dirs: Two options: 87 | 1) a dict {(category_name, subset_name): implicitron_exp_dir_path} containing 88 | a mapping from each CO3Dv2 category and subset to the path of the 89 | corresponding implicitron model exp dir. 90 | 2) a string containing the path to a single model used for reconstructing 91 | all categories. 92 | task: The co3d task - either CO3DTask.MANY_VIEW or CO3DTask.FEW_VIEW. 93 | sequence_set: The sequence set to evaluate on: 94 | CO3DSequenceSet.DEV for for the development set 95 | CO3DSequenceSet.TEST for for the test set 96 | submission_output_folder: Directory containing the submission output files. 97 | num_eval_workers: Number of processes that conduct evaluation. 98 | submit_to_eval_ai: If `True`, will automatically submit the exported result 99 | archive to EvalAI using the CLI interface (needs to be installed with 100 | `pip install evalai`). This requires setting the EVAL_AI_PERSONAL_TOKEN 101 | environment variable to your personal EVAL_AI token. 102 | skip_evaluation: Skip the local evaluation. 103 | implicitron_exp_dir_submission_output_subfolder: 104 | If set to a string, loads precomputed results from 105 | ``` 106 | category_subset_implicitron_exp_dirs[(category, subset)] 107 | /implicitron_exp_dir_submission_output_subfolder 108 | ``` 109 | for each (category, subset). 110 | Such precomputed results are typically output by: 111 | ``` 112 | evaluate_implicitron_exp_dir( 113 | category_subset_implicitron_exp_dirs[(category, subset)], 114 | ... 115 | ) 116 | """ 117 | 118 | submission = CO3DSubmission( 119 | task=task, 120 | sequence_set=sequence_set, 121 | output_folder=submission_output_folder, 122 | dataset_root=DATASET_ROOT, 123 | ) 124 | 125 | if fill_results_from_cache: 126 | submission.fill_results_from_cache() 127 | 128 | else: 129 | 130 | if not isinstance(category_subset_implicitron_exp_dirs, str): 131 | # check that we have all models in case the we were given one model per 132 | # category/subset_name 133 | for category, subset_name in submission.get_eval_batches_map(): 134 | if (category, subset_name) not in category_subset_implicitron_exp_dirs: 135 | raise ValueError( 136 | f"Missing implicitron exp dir for {category}/{subset_name}." 137 | ) 138 | 139 | for category, subset_name in submission.get_eval_batches_map(): 140 | if isinstance(category_subset_implicitron_exp_dirs, str): 141 | # a single model that does it all 142 | current_implicitron_exp_dir = category_subset_implicitron_exp_dirs 143 | else: 144 | # subset-specific models 145 | current_implicitron_exp_dir = category_subset_implicitron_exp_dirs[ 146 | (category, subset_name) 147 | ] 148 | 149 | if implicitron_exp_dir_submission_output_subfolder is not None: 150 | submission.link_results_from_existing_output_folder( 151 | os.path.join( 152 | current_implicitron_exp_dir, 153 | implicitron_exp_dir_submission_output_subfolder, 154 | ) 155 | ) 156 | 157 | else: 158 | update_implicitron_submission_with_category_and_subset_predictions( 159 | submission=submission, 160 | implicitron_exp_dir=current_implicitron_exp_dir, 161 | dataset_root=DATASET_ROOT, 162 | category=category, 163 | subset_name=subset_name, 164 | n_known_frames_for_test=9 if task==CO3DTask.MANY_VIEW else 0, 165 | ) 166 | 167 | # Locally evaluate the submission in case we dont evaluate on the hidden test set. 168 | if sequence_set != CO3DSequenceSet.TEST and not skip_evaluation: 169 | submission.evaluate(num_workers=num_eval_workers) 170 | 171 | if submit_to_eval_ai: 172 | # Export the submission predictions for submition to the evaluation server. 173 | # This also validates completeness of the produced predictions. 174 | submission.export_results(validate_results=True) 175 | # submit the results to the EvalAI server. 176 | submission.submit_to_eval_ai() 177 | 178 | 179 | def evaluate_implicitron_exp_dir( 180 | implicitron_exp_dir: str, 181 | task: Optional[CO3DTask] = None, 182 | sequence_set: Optional[CO3DSequenceSet] = None, 183 | subset_name: Optional[str] = None, 184 | category: Optional[str] = None, 185 | result_dump_file: Optional[str] = None, 186 | clear_submission_cache_before_evaluation: bool = False, 187 | clear_submission_cache_after_evaluation: bool = False, 188 | submission_output_folder: Optional[str] = None, 189 | num_eval_workers: int = 4, 190 | ): 191 | """ 192 | Run evaluation for an experiment directory of Implicitron. 193 | Unless overriden by the user, this function automatically parses the 194 | category / subset / task / sequence_set / dataset_root 195 | from the implicitron experiment config stored in implicitron_exp_dir. 196 | 197 | Args: 198 | implicitron_exp_dir: The directory of an Implicitron experiment. 199 | task: The co3d task - either CO3DTask.MANY_VIEW or CO3DTask.FEW_VIEW. 200 | sequence_set: The sequence set to evaluate on: 201 | CO3DSequenceSet.DEV for for the development set 202 | CO3DSequenceSet.TEST for for the test set 203 | subset_name: The name of the CO3Dv2 subset. 204 | E.g. "manyview_dev_0", "fewview_dev", ... 205 | category: The name of the CO3Dv2 category to evaluate. 206 | result_dump_file: Path to the json file with evaluation results. 207 | clear_submission_cache_before_evaluation: Delete all previous intermediate 208 | submission files before commencing the current evaluation run. 209 | clear_submission_cache_after_evaluation: Delete all intermediate 210 | submission files after the evaluation run. 211 | submission_output_folder: The path to the folder with intermediate 212 | submission files. 213 | num_eval_workers: Number of processes that conduct evaluation. 214 | """ 215 | 216 | if result_dump_file is None: 217 | result_dump_file = os.path.join( 218 | implicitron_exp_dir, "results_challenge_eval.json" 219 | ) 220 | 221 | cfg = load_implicitron_config_from_exp_dir(implicitron_exp_dir) 222 | 223 | # assert few config settings 224 | assert ( 225 | cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type 226 | =="JsonIndexDatasetMapProviderV2" 227 | ) 228 | 229 | # read the category / subset / task / sequence_set / dataset_root from 230 | # the implicitron config 231 | dataset_provider_args = ( 232 | cfg 233 | .data_source_ImplicitronDataSource_args 234 | .dataset_map_provider_JsonIndexDatasetMapProviderV2_args 235 | ) 236 | if subset_name is None: 237 | subset_name = dataset_provider_args.subset_name 238 | if category is None: 239 | category = dataset_provider_args.category 240 | if task is None: 241 | task = get_co3d_task_from_subset_name(subset_name) 242 | if sequence_set is None: 243 | sequence_set = get_co3d_sequence_set_from_subset_name(subset_name) 244 | 245 | dataset_root = ( 246 | DATASET_ROOT 247 | if DATASET_ROOT is not None 248 | else dataset_provider_args.dataset_root 249 | ) 250 | 251 | logger.info( 252 | f"Evaluating Implicitron model on category {category}; subset {subset_name}" 253 | ) 254 | 255 | # the folder storing all predictions and results of the submission 256 | if submission_output_folder is None: 257 | submission_output_folder = get_default_implicitron_exp_dir_submission_output_folder( 258 | implicitron_exp_dir, 259 | task, 260 | sequence_set, 261 | ) 262 | 263 | # create the submission object 264 | submission = CO3DSubmission( 265 | task=task, 266 | sequence_set=sequence_set, 267 | output_folder=submission_output_folder, 268 | dataset_root=DATASET_ROOT, 269 | ) 270 | 271 | if task==CO3DTask.FEW_VIEW and submission.has_only_single_sequence_subset(): 272 | # if only a single-sequence dataset is downloaded, only the many-view task 273 | # is available 274 | raise ValueError( 275 | f"Cannot evaluate the few-view task in {sequence_set.value} when only the" 276 | " singlesequence subset of CO3D is present." 277 | ) 278 | 279 | if clear_submission_cache_before_evaluation: 280 | submission.clear_files() 281 | 282 | # Generate new views for all evaluation examples in category/subset_name. 283 | update_implicitron_submission_with_category_and_subset_predictions( 284 | submission=submission, 285 | implicitron_exp_dir=implicitron_exp_dir, 286 | dataset_root=dataset_root, 287 | category=category, 288 | subset_name=subset_name, 289 | n_known_frames_for_test=9 if task==CO3DTask.MANY_VIEW else 0, 290 | ) 291 | 292 | # Locally evaluate the submission in case we dont evaluate on the hidden test set. 293 | if sequence_set == CO3DSequenceSet.TEST: 294 | logger.warning("Cannot evaluate on the hidden test set. Skipping evaluation.") 295 | category_subset_results = {m: 0.0 for m in EVAL_METRIC_NAMES} 296 | else: 297 | results = submission.evaluate(num_workers=num_eval_workers) 298 | category_subset_results = results[(category, subset_name)][0] 299 | 300 | # add the eval epoch as well 301 | category_subset_results["eval_epoch"] = parse_epoch_from_model_path( 302 | find_last_checkpoint(implicitron_exp_dir) 303 | ) 304 | 305 | logger.info("Implicitron model results:") 306 | logger.info(f"category={category} / subset_name={subset_name}") 307 | print_category_subset_results(category_subset_results) 308 | 309 | if clear_submission_cache_after_evaluation: 310 | submission.clear_files() 311 | 312 | logger.info(f"Dumping challenge eval results to {result_dump_file}.") 313 | with open(result_dump_file, "w") as f: 314 | json.dump(category_subset_results, f) 315 | 316 | return category_subset_results 317 | 318 | 319 | @torch.no_grad() 320 | def update_implicitron_submission_with_category_and_subset_predictions( 321 | submission: CO3DSubmission, 322 | implicitron_exp_dir: str, 323 | dataset_root: str, 324 | category: str, 325 | subset_name: str, 326 | num_workers: int = 12, 327 | n_known_frames_for_test: int = 0, 328 | ): 329 | """ 330 | Updates the CO3DSubmission object `submission` with predictions of a DBIR 331 | model extracted for a given category, and a dataset subset. 332 | 333 | Args: 334 | submission: CO3DSubmission object. 335 | implicitron_exp_dir: Implicitron experiment directory to load the model from. 336 | dataset_root: Path to the root dataset folder containing CO3Dv2. 337 | category: A CO3Dv2 category to evaluate. 338 | subset_name: The name of the evaluation subset of the category. 339 | num_workers: Number of processes to use for evaluation. 340 | n_known_frames_for_test: The number of known frames to append to the test batches. 341 | """ 342 | 343 | logger.info( 344 | "Runing depth-based image rendering (DBIR) new view synthesis " 345 | f"on category '{category}' subset '{subset_name}'" 346 | ) 347 | 348 | # Get the evaluation device. 349 | device = torch.device("cuda") if torch.cuda.is_available() else device("cpu") 350 | 351 | # load the implicitron model 352 | model = load_model_from_implicitron_exp_dir(implicitron_exp_dir) 353 | 354 | # Determine the sequence set and the task we are solving 355 | sequence_set = submission.sequence_set 356 | task = submission.task 357 | 358 | # Obtain the CO3Dv2 dataset map 359 | dataset_map = get_dataset_map( 360 | dataset_root, 361 | category, 362 | subset_name, 363 | n_known_frames_for_test=n_known_frames_for_test, 364 | ) 365 | 366 | # The test dataloader simply iterates over test_dataset.eval_batches 367 | # this is done by setting test_dataset.eval_batches as the batch sampler 368 | test_dataset = dataset_map["test"] 369 | eval_batches = test_dataset.get_eval_batches() 370 | 371 | test_dataloader = torch.utils.data.DataLoader( 372 | test_dataset, 373 | batch_sampler=eval_batches, 374 | num_workers=num_workers, 375 | collate_fn=FrameData.collate, 376 | ) 377 | 378 | # loop over eval examples 379 | logger.info( 380 | f"Rendering {len(test_dataloader)} test views for {category}/{subset_name}" 381 | ) 382 | 383 | if sequence_set==CO3DSequenceSet.TEST: 384 | # the test set contains images with redacted foreground masks which cause 385 | # the test dataloader to spam a warning message, 386 | # we suppress this warning with the following line 387 | warnings.filterwarnings("ignore", message="Empty masks_for_bbox.*") 388 | 389 | for eval_index, eval_frame_data in enumerate(tqdm(test_dataloader)): 390 | # the first element of eval_frame_data is the actual evaluation image, 391 | # the 2nd-to-last elements are the knwon source images used for building 392 | # the reconstruction (source images are present only for the few-view task) 393 | 394 | # move the eval data to the requested device 395 | eval_frame_data = eval_frame_data.to(device) 396 | 397 | # sanity check that the eval frame data has correctly redacted entries 398 | _check_valid_eval_frame_data(eval_frame_data, task, sequence_set) 399 | 400 | # Redact the frame data so we are sure we cannot use the data 401 | # from the actual unobserved evaluation sample 402 | eval_frame_data = redact_eval_frame_data(eval_frame_data) 403 | 404 | # Obtain the image render. In case dataset_test.box_crop==True, 405 | # we need to paste the render back to the original image bounds. 406 | model_preds = model( 407 | **eval_frame_data, 408 | eval_mode=EvaluationMode.EVALUATION, 409 | ) 410 | render_crop = model_preds["implicitron_render"] 411 | 412 | # cut the valid part of the render and paste into the original image canvas 413 | render_full_image = dbir_utils.paste_render_to_original_image( 414 | eval_frame_data, render_crop 415 | ) 416 | 417 | # get the image, mask, depth as numpy arrays for the challenge submission 418 | image, mask, depth = [ 419 | getattr(render_full_image, f"{data_type}_render").cpu().numpy()[0] 420 | for data_type in ["image", "mask", "depth"] 421 | ] 422 | 423 | # clip the rendered image to [0, 1] range 424 | image = image.clip(0.0, 1.0) 425 | 426 | # add the results to the submission object 427 | submission.add_result( 428 | category=category, 429 | subset_name=subset_name, 430 | sequence_name=eval_frame_data.sequence_name[0], 431 | frame_number=int(eval_frame_data.frame_number[0]), 432 | image=image, 433 | mask=mask, 434 | depth=depth, 435 | ) 436 | 437 | # reset all warnings 438 | warnings.simplefilter("always") 439 | 440 | 441 | def get_default_implicitron_exp_dir_submission_output_folder( 442 | implicitron_exp_dir: str, 443 | task: CO3DTask, 444 | sequence_set: CO3DSequenceSet, 445 | ): 446 | return os.path.join( 447 | implicitron_exp_dir, 448 | f"implicitron_submission_output_{task.value}_{sequence_set.value}", 449 | ) 450 | 451 | 452 | def parse_co3d_challenge_settings_from_implicitron_exp_dir( 453 | implicitron_exp_dir: str 454 | ) -> Tuple[CO3DSequenceSet, CO3DTask, str, str]: 455 | """ 456 | Reads the config of an implicitron experiment stored in `implicitron_exp_dir` and 457 | returns the configuration of the corresponding challenge entry. 458 | 459 | Args: 460 | implicitron_exp_dir: The directory of an Implicitron experiment. 461 | Returns: 462 | sequence_set: CO3D sequence set of the experiment. 463 | task: The CO3D task of the experiment. 464 | category: The category of the experiment. 465 | subset_name: The name of the CO3D subset. 466 | """ 467 | 468 | cfg = load_implicitron_config_from_exp_dir(implicitron_exp_dir) 469 | dataset_provider_args = ( 470 | cfg 471 | .data_source_ImplicitronDataSource_args 472 | .dataset_map_provider_JsonIndexDatasetMapProviderV2_args 473 | ) 474 | subset_name = dataset_provider_args.subset_name 475 | category = dataset_provider_args.category 476 | task = get_co3d_task_from_subset_name(subset_name) 477 | sequence_set = get_co3d_sequence_set_from_subset_name(subset_name) 478 | return sequence_set, task, category, subset_name 479 | 480 | 481 | def load_implicitron_config_from_exp_dir(implicitron_exp_dir: str): 482 | cfg_filename = os.path.join(implicitron_exp_dir, "expconfig.yaml") 483 | cfg_load = OmegaConf.load(cfg_filename) 484 | cfg_default = get_default_args(Experiment) 485 | cfg = OmegaConf.merge(cfg_default, cfg_load) 486 | cfg.exp_dir = implicitron_exp_dir 487 | return cfg 488 | 489 | 490 | def load_model_from_implicitron_exp_dir(exp_dir: str) -> GenericModel: 491 | cfg = load_implicitron_config_from_exp_dir(exp_dir) 492 | experiment = Experiment(**cfg) 493 | experiment.model_factory.force_resume = True 494 | model = experiment.model_factory(accelerator=None, exp_dir=exp_dir) 495 | model.cuda() 496 | model.eval() 497 | return model 498 | 499 | 500 | def get_dataset_map( 501 | dataset_root: str, 502 | category: str, 503 | subset_name: str, 504 | n_known_frames_for_test: int = 0, 505 | ) -> DatasetMap: 506 | """ 507 | Obtain the dataset map that contains the train/val/test dataset objects. 508 | """ 509 | expand_args_fields(JsonIndexDatasetMapProviderV2) 510 | dataset_map_provider = JsonIndexDatasetMapProviderV2( 511 | category=category, 512 | subset_name=subset_name, 513 | dataset_root=dataset_root, 514 | test_on_train=False, 515 | only_test_set=False, 516 | load_eval_batches=True, 517 | dataset_JsonIndexDataset_args=DictConfig({"remove_empty_masks": False}), 518 | n_known_frames_for_test=n_known_frames_for_test, 519 | ) 520 | return dataset_map_provider.get_dataset_map() 521 | 522 | 523 | def print_category_subset_results(category_subset_results: Dict[str, float]): 524 | for k, v in category_subset_results.items(): 525 | print(f"{k:20s}: {v:1.3f}") 526 | -------------------------------------------------------------------------------- /co3d_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/co3d/eb51d7583c56ff23dc918d9deafee50f4d8178c3/co3d_logo.png -------------------------------------------------------------------------------- /examples/example_co3d_challenge_submission.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import logging 9 | import os 10 | import torch 11 | import warnings 12 | from tqdm import tqdm 13 | from omegaconf import DictConfig 14 | 15 | 16 | from pytorch3d.implicitron.models.generic_model import ImplicitronRender 17 | from pytorch3d.implicitron.dataset.dataset_base import FrameData 18 | from pytorch3d.implicitron.dataset.dataset_map_provider import DatasetMap 19 | from pytorch3d.implicitron.dataset.json_index_dataset_map_provider_v2 import ( 20 | JsonIndexDatasetMapProviderV2 21 | ) 22 | from pytorch3d.implicitron.tools.config import expand_args_fields 23 | 24 | from co3d.utils import dbir_utils 25 | from co3d.challenge.co3d_submission import CO3DSubmission 26 | from co3d.challenge.data_types import CO3DTask, CO3DSequenceSet 27 | from co3d.dataset.utils import redact_eval_frame_data, _check_valid_eval_frame_data 28 | 29 | 30 | DATASET_ROOT = os.getenv("CO3DV2_DATASET_ROOT") 31 | DATASET_ROOT_HIDDEN = os.getenv("CO3DV2_HIDDEN_DATASET_ROOT") 32 | 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | 37 | def get_dataset_map( 38 | dataset_root: str, 39 | category: str, 40 | subset_name: str, 41 | ) -> DatasetMap: 42 | """ 43 | Obtain the dataset map that contains the train/val/test dataset objects. 44 | """ 45 | expand_args_fields(JsonIndexDatasetMapProviderV2) 46 | dataset_map_provider = JsonIndexDatasetMapProviderV2( 47 | category=category, 48 | subset_name=subset_name, 49 | dataset_root=dataset_root, 50 | test_on_train=False, 51 | only_test_set=False, 52 | load_eval_batches=True, 53 | dataset_JsonIndexDataset_args=DictConfig({"remove_empty_masks": False}), 54 | ) 55 | return dataset_map_provider.get_dataset_map() 56 | 57 | 58 | @torch.no_grad() 59 | def update_dbir_submission_with_category_and_subset_predictions( 60 | submission: CO3DSubmission, 61 | dataset_root: str, 62 | category: str, 63 | subset_name: str, 64 | num_workers: int = 12, 65 | cheat_with_gt_data: bool = True, 66 | load_dataset_pointcloud: bool = False, 67 | point_radius: float = 0.01, 68 | ): 69 | """ 70 | Updates the CO3DSubmission object `submission` with predictions of a DBIR 71 | model extracted for a given category, and a dataset subset. 72 | 73 | Args: 74 | submission: CO3DSubmission object. 75 | dataset_root: Path to the root dataset folder containing CO3Dv2. 76 | category: A CO3Dv2 category to evaluate. 77 | subset_name: The name of the evaluation subset of the category. 78 | num_workers: Number of processes to use for evaluation. 79 | cheat_with_gt_data: If `True`, bypasses the DBIR stage and only simply 80 | uses ground truth test data. This, of course, only works for the 81 | development set which is not redacted. 82 | load_dataset_pointcloud: If `True`, uses the ground truth dataset 83 | pointclouds instead of unprojecting known views. 84 | point_radius: The radius of the rendered points. 85 | """ 86 | 87 | logger.info( 88 | "Runing depth-based image rendering (DBIR) new view synthesis " 89 | f"on category '{category}' subset '{subset_name}'" 90 | ) 91 | 92 | # Get the evaluation device. 93 | device = torch.device("cuda") if torch.cuda.is_available() else device("cpu") 94 | 95 | # Determine the sequence set and the task we are solving 96 | sequence_set = submission.sequence_set 97 | task = submission.task 98 | 99 | # Obtain the CO3Dv2 dataset map 100 | dataset_map = get_dataset_map(dataset_root, category, subset_name) 101 | 102 | if task==CO3DTask.MANY_VIEW and not cheat_with_gt_data: 103 | # Obtain the point cloud of the corresponding evaluation sequence 104 | # by unprojecting depth maps of the known training views in the sequence: 105 | train_dataset = dataset_map["train"] 106 | sequence_name = train_dataset[0].sequence_name 107 | sequence_pointcloud = dbir_utils.get_sequence_pointcloud( 108 | train_dataset, 109 | sequence_name, 110 | load_dataset_pointcloud=load_dataset_pointcloud, 111 | ) 112 | # Move the pointcloud to the right device 113 | sequence_pointcloud = sequence_pointcloud.to(device) 114 | 115 | # The test dataloader simply iterates over test_dataset.eval_batches 116 | # this is done by setting test_dataset.eval_batches as the batch sampler 117 | test_dataset = dataset_map["test"] 118 | test_dataloader = torch.utils.data.DataLoader( 119 | test_dataset, 120 | batch_sampler=test_dataset.eval_batches, 121 | num_workers=num_workers, 122 | collate_fn=FrameData.collate, 123 | ) 124 | 125 | # loop over eval examples 126 | logger.info( 127 | f"Rendering {len(test_dataloader)} test views for {category}/{subset_name}" 128 | ) 129 | 130 | if sequence_set==CO3DSequenceSet.TEST: 131 | # the test set contains images with redacted foreground masks which cause 132 | # the test dataloader to spam a warning message, 133 | # we suppress this warning with the following line 134 | warnings.filterwarnings("ignore", message="Empty masks_for_bbox.*") 135 | 136 | for eval_index, eval_frame_data in enumerate(tqdm(test_dataloader)): 137 | # the first element of eval_frame_data is the actual evaluation image, 138 | # the 2nd-to-last elements are the knwon source images used for building 139 | # the reconstruction (source images are present only for the few-view task) 140 | 141 | # move the eval data to the requested device 142 | eval_frame_data = eval_frame_data.to(device) 143 | 144 | # sanity check that the eval frame data has correctly redacted entries 145 | _check_valid_eval_frame_data(eval_frame_data, task, sequence_set) 146 | 147 | if cheat_with_gt_data: 148 | # Cheat by taking the ground truth data. This should give in perfect metrics. 149 | mask_render = (eval_frame_data.fg_probability[:1] > 0.5).float() 150 | render_crop = ImplicitronRender( 151 | depth_render = eval_frame_data.depth_map[:1], 152 | image_render = eval_frame_data.image_rgb[:1] * mask_render, 153 | mask_render = mask_render, 154 | ) 155 | 156 | else: 157 | if task==CO3DTask.MANY_VIEW: 158 | # we use the sequence pointcloud extracted above 159 | scene_pointcloud = sequence_pointcloud 160 | elif task==CO3DTask.FEW_VIEW: 161 | # we build the pointcloud by unprojecting the depth maps of the known views 162 | # which are elements (1:end) of the eval batch 163 | scene_pointcloud = dbir_utils.get_eval_frame_data_pointcloud( 164 | eval_frame_data, 165 | ) 166 | else: 167 | raise ValueError(task) 168 | # Redact the frame data so we are sure we cannot use the data 169 | # from the actual unobserved evaluation sample 170 | eval_frame_data = redact_eval_frame_data(eval_frame_data) 171 | # Obtain the image render. In case dataset_test.box_crop==True, 172 | # we need to paste the render back to the original image bounds. 173 | render_crop = dbir_utils.render_point_cloud( 174 | eval_frame_data.camera[[0]], 175 | eval_frame_data.image_rgb.shape[-2:], 176 | scene_pointcloud, 177 | point_radius=point_radius, 178 | ) 179 | 180 | # cut the valid part of the render and paste into the original image canvas 181 | render_full_image = dbir_utils.paste_render_to_original_image( 182 | eval_frame_data, render_crop 183 | ) 184 | 185 | # get the image, mask, depth as numpy arrays for the challenge submission 186 | image, mask, depth = [ 187 | getattr(render_full_image, f"{data_type}_render").cpu().numpy()[0] 188 | for data_type in ["image", "mask", "depth"] 189 | ] 190 | 191 | # add the results to the submission object 192 | submission.add_result( 193 | category=category, 194 | subset_name=subset_name, 195 | sequence_name=eval_frame_data.sequence_name[0], 196 | frame_number=int(eval_frame_data.frame_number[0]), 197 | image=image, 198 | mask=mask, 199 | depth=depth, 200 | ) 201 | 202 | # reset all warnings 203 | warnings.simplefilter("always") 204 | 205 | 206 | def make_dbir_submission( 207 | dataset_root = DATASET_ROOT, 208 | task = CO3DTask.MANY_VIEW, 209 | sequence_set = CO3DSequenceSet.DEV, 210 | clear_submission_files: bool = False, 211 | num_eval_workers: int = 4, 212 | cheat_with_gt_data: bool = False, 213 | fill_results_from_cache: bool = False, 214 | skip_evaluation: bool = False, 215 | submit_to_eval_ai: bool = False, 216 | ): 217 | """ 218 | Make a Depth-based-image-rendering (DBIR) submission for the CO3DChallenge. 219 | 220 | Args: 221 | dataset_root: Path to the root dataset folder. 222 | task: The co3d task - either CO3DTask.MANY_VIEW or CO3DTask.FEW_VIEW. 223 | sequence_set: The sequence set to evaluate on: 224 | CO3DSequenceSet.DEV for for the development set 225 | CO3DSequenceSet.TEST for for the test set 226 | clear_submission_files: Delete all previous intermediate submission files before 227 | commencing the current submission run. 228 | num_eval_workers: Number of processes that conduct evaluation. 229 | cheat_with_gt_data: If `True`, bypasses the DBIR stage and only simply 230 | uses ground truth test data. This, of course, only works for the 231 | development set which is not redacted. 232 | fill_results_from_cache: If `True`, skips running the DBIR model and rather 233 | loads the results exported from a previous run. 234 | skip_evaluation: If `True`, will not locally evaluate the predictions. 235 | submit_to_eval_ai: If `True`, will automatically submit the exported result 236 | archive to EvalAI using the CLI interface (needs to be installed with 237 | `pip install evalai`). This requires setting the EVAL_AI_PERSONAL_TOKEN 238 | environment variable to your personal EVAL_AI token. 239 | """ 240 | # the folder storing all predictions and results of the submission 241 | submission_output_folder = os.path.join( 242 | os.path.split(os.path.abspath(__file__))[0], 243 | f"dbir_submission_output_{task.value}_{sequence_set.value}", 244 | ) 245 | 246 | if cheat_with_gt_data: 247 | # make sure that the cheated results have a cheater stamp in their name 248 | submission_output_folder += "_cheating" 249 | 250 | # create the submission object 251 | submission = CO3DSubmission( 252 | task=task, 253 | sequence_set=sequence_set, 254 | output_folder=submission_output_folder, 255 | dataset_root=DATASET_ROOT, 256 | ) 257 | 258 | if task==CO3DTask.FEW_VIEW and submission.has_only_single_sequence_subset(): 259 | # if only a single-sequence dataset is downloaded, only the many-view task 260 | # is available 261 | logger.warning( 262 | f"Cannot evaluate the few-view task in {sequence_set.value} when only the" 263 | " singlesequence subset of CO3D is present." 264 | ) 265 | return 266 | 267 | if fill_results_from_cache: 268 | # only take existing results 269 | submission.fill_results_from_cache() 270 | 271 | else: 272 | # Clear all files generated by potential previous submissions. 273 | # Hint: disable this in case you want to resume an evaluation. 274 | if clear_submission_files: 275 | submission.clear_files() 276 | 277 | # Get all category names and subset names for the selected task/sequence_set 278 | eval_batches_map = submission.get_eval_batches_map() 279 | 280 | # Iterate over the categories and the corresponding subset lists. 281 | for eval_i, (category, subset_name) in enumerate(eval_batches_map.keys()): 282 | logger.info( 283 | f"Evaluating category {category}; subset {subset_name}" 284 | + f" ({eval_i+1} / {len(eval_batches_map)})" 285 | ) 286 | 287 | # Generate new views for all evaluation examples in category/subset_name. 288 | update_dbir_submission_with_category_and_subset_predictions( 289 | submission=submission, 290 | dataset_root=dataset_root, 291 | category=category, 292 | subset_name=subset_name, 293 | cheat_with_gt_data=cheat_with_gt_data, 294 | ) 295 | 296 | # Locally evaluate the submission in case we dont evaluate on the hidden test set. 297 | if (not skip_evaluation and sequence_set != CO3DSequenceSet.TEST): 298 | submission.evaluate(num_workers=num_eval_workers) 299 | 300 | # Export the submission predictions for submition to the evaluation server. 301 | # This also validates completeness of the produced predictions. 302 | submission.export_results(validate_results=True) 303 | 304 | if submit_to_eval_ai: 305 | # submit the results to the EvalAI server. 306 | submission.submit_to_eval_ai() 307 | 308 | # sanity check - reevaluate the archive file and copare results 309 | # submission_reeval = CO3DSubmission( 310 | # task=task, 311 | # sequence_set=sequence_set, 312 | # output_folder=os.path.join(submission_output_folder, "_reeval"), 313 | # dataset_root=DATASET_ROOT, 314 | # on_server=True, 315 | # server_data_folder=DATASET_ROOT_HIDDEN, 316 | # ) 317 | # submission_reeval.evaluate_archive_file( 318 | # submission.submission_archive, num_workers=num_eval_workers 319 | # ) 320 | 321 | 322 | if __name__ == "__main__": 323 | logging.basicConfig(level=logging.INFO) 324 | 325 | # iterate over all tasks and sequence sets 326 | for sequence_set in [CO3DSequenceSet.DEV, CO3DSequenceSet.TEST]: 327 | for task in [CO3DTask.MANY_VIEW, CO3DTask.FEW_VIEW]: 328 | make_dbir_submission( 329 | task=task, 330 | sequence_set=sequence_set, 331 | cheat_with_gt_data=False, 332 | fill_results_from_cache=False, 333 | skip_evaluation=False, 334 | submit_to_eval_ai=True, 335 | ) -------------------------------------------------------------------------------- /examples/print_co3d_stats.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import os 9 | import json 10 | from joblib import Parallel, delayed 11 | from collections import defaultdict 12 | from tabulate import tabulate 13 | from typing import List 14 | from collections import Counter 15 | from co3d.dataset.data_types import ( 16 | load_dataclass_jgzip, 17 | FrameAnnotation, 18 | SequenceAnnotation, 19 | ) 20 | 21 | 22 | DATASET_ROOT = os.getenv("CO3DV2_DATASET_ROOT") 23 | 24 | 25 | def _count_category(category): 26 | fa_file = os.path.join(DATASET_ROOT, category, "frame_annotations.jgz") 27 | sa_file = os.path.join(DATASET_ROOT, category, "sequence_annotations.jgz") 28 | 29 | frame_annos = load_dataclass_jgzip(fa_file, List[FrameAnnotation]) 30 | # sequence_annos = load_dataclass_jgzip(sa_file, List[SequenceAnnotation]) 31 | 32 | seq_to_frame_annos = defaultdict(list) 33 | for fa in frame_annos: 34 | seq_to_frame_annos[fa.sequence_name].append(fa) 35 | seq_to_frame_annos = dict(seq_to_frame_annos) 36 | 37 | seq_set_cnt = Counter() 38 | for _, frame_anno_list in seq_to_frame_annos.items(): 39 | seq_set, _ = frame_anno_list[0].meta["frame_type"].split("_") 40 | seq_set_cnt.update([seq_set]) 41 | seq_set_cnt.update(["all"]) 42 | 43 | return dict(seq_set_cnt) 44 | 45 | 46 | def main(): 47 | # get the category list 48 | with open(os.path.join(DATASET_ROOT, "category_to_subset_name_list.json"), "r") as f: 49 | category_to_subset_name_list = json.load(f) 50 | 51 | categories = sorted(list(category_to_subset_name_list.keys())) 52 | cat_to_n_per_set = {} 53 | 54 | counts_per_category = Parallel(n_jobs=20)( 55 | delayed(_count_category)(c) for c in categories 56 | ) 57 | 58 | cat_to_n_per_set = dict(zip(categories, counts_per_category)) 59 | 60 | seq_sets_ = list(cat_to_n_per_set[categories[0]].keys()) 61 | tab = [] 62 | for category in cat_to_n_per_set: 63 | n_per_set = [cat_to_n_per_set[category].get(set_, 0) for set_ in seq_sets_] 64 | tab.append([category, *n_per_set]) 65 | 66 | totals = [sum(t[i] for t in tab) for i in [1, 2, 3, 4]] 67 | tab.append(["TOTAL", *totals]) 68 | 69 | print(tabulate(tab, headers=["category", *seq_sets_])) 70 | 71 | 72 | if __name__=="__main__": 73 | main() -------------------------------------------------------------------------------- /examples/show_co3d_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import logging 9 | import os 10 | import torch 11 | import math 12 | import sys 13 | import json 14 | import random 15 | 16 | from tqdm import tqdm 17 | from omegaconf import DictConfig 18 | from typing import Tuple 19 | 20 | from co3d.utils import dbir_utils 21 | from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras 22 | from pytorch3d.renderer.camera_utils import join_cameras_as_batch 23 | from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset 24 | from pytorch3d.implicitron.dataset.json_index_dataset_map_provider_v2 import ( 25 | JsonIndexDatasetMapProviderV2 26 | ) 27 | from pytorch3d.implicitron.tools.config import expand_args_fields 28 | from pytorch3d.implicitron.models.visualization.render_flyaround import render_flyaround 29 | from pytorch3d.implicitron.dataset.dataset_base import FrameData 30 | from pytorch3d.vis.plotly_vis import plot_scene 31 | from pytorch3d.implicitron.tools.vis_utils import ( 32 | get_visdom_connection, 33 | make_depth_image, 34 | ) 35 | from pytorch3d.implicitron.tools.point_cloud_utils import ( 36 | get_rgbd_point_cloud, 37 | ) 38 | 39 | 40 | DATASET_ROOT = os.getenv("CO3DV2_DATASET_ROOT") 41 | 42 | 43 | logger = logging.getLogger(__file__) 44 | 45 | 46 | def main( 47 | output_dir: str = os.path.join(os.path.dirname(__file__), "show_co3d_dataset_files"), 48 | n_show_sequences_per_category: int = 2, 49 | visdom_env: str = "show_co3d_dataset", 50 | visualize_point_clouds: bool = False, 51 | visualize_3d_scene: bool = True, 52 | n_frames_show: int = 20, 53 | ): 54 | """ 55 | Visualizes object point clouds from the CO3D dataset. 56 | 57 | Note that the code iterates over all CO3D categories and (by default) exports 58 | 2 videos per a category subset. Hence, the whole loop will run for 59 | a long time (3-4 hours). 60 | """ 61 | 62 | # make the script reproducible 63 | random.seed(30) 64 | 65 | # log info messages 66 | logging.basicConfig(level=logging.INFO) 67 | 68 | # make the output dir 69 | os.makedirs(output_dir, exist_ok=True) 70 | 71 | # get the category list 72 | if DATASET_ROOT is None: 73 | raise ValueError( 74 | "Please set the CO3DV2_DATASET_ROOT environment variable to a valid" 75 | " CO3Dv2 dataset root folder." 76 | ) 77 | with open(os.path.join(DATASET_ROOT, "category_to_subset_name_list.json"), "r") as f: 78 | category_to_subset_name_list = json.load(f) 79 | 80 | # get the visdom connection 81 | viz = get_visdom_connection() 82 | 83 | # iterate over the co3d categories 84 | categories = sorted(list(category_to_subset_name_list.keys())) 85 | for category in tqdm(categories): 86 | 87 | subset_name_list = category_to_subset_name_list[category] 88 | 89 | for subset_name in subset_name_list: 90 | 91 | # obtain the dataset 92 | expand_args_fields(JsonIndexDatasetMapProviderV2) 93 | dataset_map = JsonIndexDatasetMapProviderV2( 94 | category=category, 95 | subset_name=subset_name, 96 | test_on_train=False, 97 | only_test_set=False, 98 | load_eval_batches=True, 99 | dataset_JsonIndexDataset_args=DictConfig( 100 | {"remove_empty_masks": False, "load_point_clouds": True} 101 | ), 102 | ).get_dataset_map() 103 | 104 | train_dataset = dataset_map["train"] 105 | 106 | # select few sequences to visualize 107 | sequence_names = list(train_dataset.seq_annots.keys()) 108 | 109 | # select few sequence names 110 | show_sequence_names = random.sample( 111 | sequence_names, 112 | k=min(n_show_sequences_per_category, len(sequence_names)), 113 | ) 114 | 115 | for sequence_name in show_sequence_names: 116 | 117 | # load up a bunch of frames 118 | show_dataset_idx = [ 119 | x[2] for x in list(train_dataset.sequence_frames_in_order(sequence_name)) 120 | ] 121 | random.shuffle(show_dataset_idx) 122 | show_dataset_idx = show_dataset_idx[:n_frames_show] 123 | data_to_show = [train_dataset[i] for i in show_dataset_idx] 124 | data_to_show_collated = data_to_show[0].collate(data_to_show) 125 | 126 | # show individual frames 127 | all_ims = [] 128 | for k in ["image_rgb", "depth_map", "depth_mask", "fg_probability"]: 129 | # all_ims_now = torch.stack([d[k] for d in data_to_show]) 130 | all_ims_now = getattr(data_to_show_collated, k) 131 | if k=="depth_map": 132 | all_ims_now = make_depth_image( 133 | all_ims_now, torch.ones_like(all_ims_now) 134 | ) 135 | if k in ["depth_mask", "fg_probability", "depth_map"]: 136 | all_ims_now = all_ims_now.repeat(1, 3, 1, 1) 137 | all_ims.append(all_ims_now.clamp(0.0, 1.0)) 138 | all_ims = torch.cat(all_ims, dim=2) 139 | title = f"random_frames" 140 | viz.images( 141 | all_ims, nrow=all_ims.shape[-1], env=visdom_env, 142 | win=title, opts={"title": title}, 143 | ) 144 | 145 | if visualize_3d_scene: 146 | # visualize a 3d plotly plot of the scene 147 | camera_show = data_to_show_collated.camera 148 | pointcloud_show = get_rgbd_point_cloud( 149 | data_to_show_collated.camera, 150 | data_to_show_collated.image_rgb, 151 | data_to_show_collated.depth_map, 152 | (data_to_show_collated.fg_probability > 0.5).float(), 153 | mask_points=True, 154 | ) 155 | viz.plotlyplot( 156 | plot_scene( 157 | { 158 | sequence_name: { 159 | "camera":camera_show, 160 | "point_cloud": pointcloud_show 161 | } 162 | } 163 | ), 164 | env=visdom_env, 165 | win="3d_scene", 166 | ) 167 | 168 | if not visualize_point_clouds: 169 | continue 170 | 171 | for load_dataset_pointcloud in [True, False]: 172 | 173 | model = PointcloudRenderingModel( 174 | train_dataset, 175 | sequence_name, 176 | device="cuda:0", 177 | load_dataset_pointcloud=load_dataset_pointcloud, 178 | ) 179 | 180 | video_path = os.path.join( 181 | output_dir, 182 | category, 183 | f"{subset_name}_l{load_dataset_pointcloud}", 184 | ) 185 | 186 | os.makedirs(os.path.dirname(video_path), exist_ok=True) 187 | 188 | logger.info(f"Rendering rotating video {video_path}") 189 | 190 | render_flyaround( 191 | train_dataset, 192 | sequence_name, 193 | model, 194 | video_path, 195 | n_flyaround_poses=40, 196 | fps=20, 197 | trajectory_type="circular_lsq_fit", 198 | max_angle=2 * math.pi, 199 | trajectory_scale=1.5, 200 | scene_center=(0.0, 0.0, 0.0), 201 | up=(0.0, -1.0, 0.0), 202 | traj_offset=1.0, 203 | n_source_views=1, 204 | visdom_show_preds=True, 205 | visdom_environment=visdom_env, 206 | visualize_preds_keys=( 207 | "images_render", 208 | "masks_render", 209 | "depths_render", 210 | ), 211 | ) 212 | 213 | 214 | class PointcloudRenderingModel(torch.nn.Module): 215 | def __init__( 216 | self, 217 | train_dataset: JsonIndexDataset, 218 | sequence_name: str, 219 | render_size: Tuple[int, int] = [400, 400], 220 | device = None, 221 | load_dataset_pointcloud: bool = False, 222 | ): 223 | super().__init__() 224 | self._render_size = render_size 225 | self._pointcloud = dbir_utils.get_sequence_pointcloud( 226 | train_dataset, 227 | sequence_name, 228 | load_dataset_pointcloud=load_dataset_pointcloud, 229 | ).to(device) 230 | 231 | def forward( 232 | self, 233 | camera: CamerasBase, 234 | **kwargs, 235 | ): 236 | render = dbir_utils.render_point_cloud( 237 | camera[[0]], 238 | self._render_size, 239 | self._pointcloud, 240 | point_radius=0.01, 241 | ) 242 | return { 243 | "images_render": render.image_render, 244 | "masks_render": render.mask_render, 245 | "depths_render": render.depth_render, 246 | } 247 | 248 | 249 | if __name__=="__main__": 250 | main() 251 | -------------------------------------------------------------------------------- /grid.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/co3d/eb51d7583c56ff23dc918d9deafee50f4d8178c3/grid.gif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | visdom 2 | tqdm 3 | requests 4 | h5py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import setuptools 9 | 10 | setuptools.setup( 11 | name="co3d", 12 | version="2.1.0", 13 | author="FAIR", 14 | author_email="dnovotny@fb.com", 15 | packages=setuptools.find_packages(exclude=["tests", "examples"]), 16 | license="LICENSE", 17 | description="Common Objects in 3D codebase", 18 | long_description=open("README.md").read(), 19 | install_requires=[ 20 | "numpy", 21 | "Pillow", 22 | "requests", 23 | "tqdm", 24 | "plyfile", 25 | ], 26 | ) 27 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /tests/test_challenge_evaluate.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import os 9 | import unittest 10 | import numpy as np 11 | import tempfile 12 | import torch 13 | 14 | from pytorch3d.renderer.cameras import look_at_view_transform, PerspectiveCameras 15 | from pytorch3d.implicitron.dataset.json_index_dataset import FrameData 16 | from pytorch3d.implicitron.evaluation.evaluate_new_view_synthesis import eval_batch 17 | from pytorch3d.implicitron.models.base_model import ImplicitronRender 18 | 19 | from co3d.challenge.io import ( 20 | load_mask, 21 | store_mask, 22 | load_depth, 23 | store_depth, 24 | load_image, 25 | store_image, 26 | load_1bit_png_mask, 27 | store_1bit_png_mask, 28 | store_rgbda_frame, 29 | load_rgbda_frame, 30 | ) 31 | from co3d.challenge.utils import get_result_directory_file_names, evaluate_file_folders 32 | from co3d.challenge.metric_utils import eval_one 33 | from co3d.challenge.data_types import RGBDAFrame 34 | 35 | 36 | class TestIO(unittest.TestCase): 37 | def test_save_load(self): 38 | H = 100 39 | W = 200 40 | with tempfile.TemporaryDirectory() as tmpd: 41 | for data_type in ["image", "mask", "depth", "depth_mask"]: 42 | with self.subTest(data_type): 43 | for _ in range(10): 44 | C = {"depth_mask": 1, "mask": 1, "depth": 1, "image": 3}[data_type] 45 | data = np.random.uniform(size=(C, H, W)) 46 | if data_type in ("mask", "depth_mask"): 47 | data = (data > 0.5).astype(np.float32) 48 | if C == 1: 49 | data = data[0] 50 | load_fun, store_fun = { 51 | "mask": (load_mask, store_mask), 52 | "depth": (load_depth, store_depth), 53 | "image": (load_image, store_image), 54 | "depth_mask": (load_1bit_png_mask, store_1bit_png_mask), 55 | }[data_type] 56 | fl = os.path.join(tmpd, f"{data_type}.png") 57 | store_fun(data, fl) 58 | data_ = load_fun(fl) 59 | self.assertTrue(np.allclose(data, data_, atol=1 / 255)) 60 | 61 | 62 | class TestMetricUtils(unittest.TestCase): 63 | def test_against_eval_batch(self): 64 | H = 100 65 | W = 200 66 | for _ in range(20): 67 | implicitron_render = _random_implicitron_render(2, H, W, "cpu") 68 | 69 | for has_depth_mask in [True, False]: 70 | 71 | frame_data = _random_frame_data(2, H, W, "cpu") 72 | if not has_depth_mask: 73 | frame_data.depth_mask = None 74 | 75 | eval_batch_result = eval_batch( 76 | frame_data, 77 | implicitron_render, 78 | ) 79 | 80 | pred_rgbda = RGBDAFrame( 81 | image=implicitron_render.image_render[0].numpy(), 82 | mask=implicitron_render.mask_render[0].numpy(), 83 | depth=implicitron_render.depth_render[0].numpy(), 84 | ) 85 | 86 | gt_rgbda = RGBDAFrame( 87 | image=frame_data.image_rgb[0].numpy(), 88 | mask=frame_data.fg_probability[0].numpy(), 89 | depth=frame_data.depth_map[0].numpy(), 90 | depth_mask=frame_data.depth_mask[0].numpy() if has_depth_mask else None, 91 | ) 92 | 93 | eval_one_result = eval_one( 94 | pred=pred_rgbda, 95 | target=gt_rgbda, 96 | ) 97 | 98 | # print("eval_batch; eval_one") 99 | for k in ["iou", "psnr_fg", "psnr", "depth_abs_fg"]: 100 | self.assertTrue( 101 | np.allclose(eval_batch_result[k], eval_one_result[k], atol=1e-5) 102 | ) 103 | # print(f"{k:15s}: {eval_batch_result[k]:1.3e} - {eval_one_result[k]:1.3e}") 104 | 105 | 106 | class TestEvalScript(unittest.TestCase): 107 | def test_fake_data(self): 108 | N = 30 109 | H = 120 110 | W = 200 111 | with tempfile.TemporaryDirectory() as tmp_pred, tempfile.TemporaryDirectory() as tmp_gt: 112 | _generate_random_submission_data(tmp_pred, N, H, W) 113 | _generate_random_submission_data(tmp_gt, N, H, W) 114 | avg_result, per_example_result = evaluate_file_folders(tmp_pred, tmp_gt) 115 | metrics = list(avg_result.keys()) 116 | for m in metrics: 117 | self.assertTrue( 118 | np.allclose( 119 | np.mean([r[m] for r in per_example_result]), 120 | avg_result[m], 121 | ) 122 | ) 123 | self.assertTrue(len(per_example_result) == N) 124 | 125 | 126 | def test_wrong_fake_data(self): 127 | N = 30 128 | H = 120 129 | W = 200 130 | 131 | # different number of eval/test examples 132 | for N_pred in [N - 2, N + 2]: 133 | with tempfile.TemporaryDirectory() as tmp_pred, tempfile.TemporaryDirectory() as tmp_gt: 134 | _generate_random_submission_data(tmp_pred, N_pred, H, W) 135 | _generate_random_submission_data(tmp_gt, N, H, W) 136 | msg = ( 137 | "Unexpected submitted evaluation examples" 138 | if N_pred > N 139 | else "There are missing evaluation examples" 140 | ) 141 | with self.assertRaisesRegex(ValueError, msg): 142 | evaluate_file_folders(tmp_pred, tmp_gt) 143 | 144 | # some eval examples missing depth/image 145 | with tempfile.TemporaryDirectory() as tmp_pred, tempfile.TemporaryDirectory() as tmp_gt: 146 | _generate_random_submission_data(tmp_pred, N_pred, H, W) 147 | _generate_random_submission_data(tmp_gt, N, H, W) 148 | pred_file_names = get_result_directory_file_names(tmp_pred) 149 | first_ex = pred_file_names[list(pred_file_names.keys())[0]] 150 | for file_type in ["depth", "image"]: 151 | os.remove(first_ex + f"_{file_type}.png") 152 | with self.assertRaisesRegex( 153 | ValueError, 154 | "Some evaluation examples are incomplete", 155 | ): 156 | evaluate_file_folders(tmp_pred, tmp_gt) 157 | 158 | 159 | def _generate_random_submission_data(folder, N, H, W): 160 | for example_num in range(N): 161 | root_path = os.path.join(folder, f"example_{example_num}") 162 | store_rgbda_frame(_random_rgbda_frame(H, W), root_path) 163 | 164 | 165 | def _random_implicitron_render( 166 | N: int, 167 | H: int, 168 | W: int, 169 | device: torch.device, 170 | ): 171 | mask = _random_input_tensor(N, 1, H, W, True, device) 172 | return ImplicitronRender( 173 | depth_render=_random_input_tensor(N, 1, H, W, False, device), 174 | image_render=_random_input_tensor(N, 3, H, W, False, device) * mask, 175 | mask_render=mask, 176 | ) 177 | 178 | 179 | def _random_rgbda_frame(H: int, W: int): 180 | return RGBDAFrame( 181 | image=np.random.uniform(size=(3, H, W)).astype(np.float32), 182 | mask=(np.random.uniform(size=(1, H, W)) > 0.5).astype(np.float32), 183 | depth=np.random.uniform(size=(1, H, W)).astype(np.float32) + 0.1, 184 | ) 185 | 186 | 187 | def _random_frame_data( 188 | N: int, 189 | H: int, 190 | W: int, 191 | device: torch.device, 192 | ): 193 | R, T = look_at_view_transform(azim=torch.rand(N) * 360) 194 | cameras = PerspectiveCameras(R=R, T=T, device=device) 195 | depth_map_common = ( 196 | torch.stack( 197 | torch.meshgrid( 198 | torch.linspace(0.0, 1.0, H), 199 | torch.linspace(0.0, 1.0, W), 200 | ) 201 | ).mean(dim=0) 202 | + 0.1 203 | ) 204 | depth_map = _random_input_tensor(N, 1, H, W, False, device) + depth_map_common[None] 205 | random_args = { 206 | "frame_number": torch.arange(N), 207 | "frame_timestamp": torch.linspace(0.0, 1.0, N), 208 | "sequence_category": ["random"] * N, 209 | "camera": cameras, 210 | "fg_probability": _random_input_tensor(N, 1, H, W, True, device), 211 | "depth_map": depth_map, 212 | "mask_crop": torch.ones(N, 1, H, W, device=device), 213 | "depth_mask": _random_input_tensor(N, 1, H, W, True, device), 214 | "sequence_name": ["sequence"] * N, 215 | "image_rgb": _random_input_tensor(N, 3, H, W, False, device), 216 | "frame_type": ["test_unseen", *(["test_known"] * (N - 1))], 217 | } 218 | return FrameData(**random_args) 219 | 220 | 221 | def _random_input_tensor( 222 | N: int, 223 | C: int, 224 | H: int, 225 | W: int, 226 | is_binary: bool, 227 | device: torch.device, 228 | ) -> torch.Tensor: 229 | T = torch.rand(N, C, H, W, device=device) 230 | if is_binary: 231 | T = (T > 0.5).float() 232 | return T 233 | 234 | 235 | if __name__ == "__main__": 236 | unittest.main() 237 | -------------------------------------------------------------------------------- /tests/test_dataset_visualize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import os 9 | import unittest 10 | import torch 11 | import torchvision 12 | 13 | from visdom import Visdom 14 | 15 | _CO3DV2_DATASET_ROOT: str = os.getenv("CO3DV2_DATASET_ROOT", "") 16 | 17 | 18 | from pytorch3d.implicitron.tools.point_cloud_utils import render_point_cloud_pytorch3d 19 | from pytorch3d.implicitron.tools.config import expand_args_fields 20 | from pytorch3d.implicitron.dataset.visualize import get_implicitron_sequence_pointcloud 21 | from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset 22 | from pytorch3d.vis.plotly_vis import plot_scene 23 | 24 | 25 | class TestDatasetVisualize(unittest.TestCase): 26 | def setUp(self): 27 | torch.manual_seed(42) 28 | category = "skateboard" 29 | dataset_root = _CO3DV2_DATASET_ROOT 30 | frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz") 31 | sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz") 32 | self.image_size = 256 33 | expand_args_fields(JsonIndexDataset) 34 | self.datasets = { 35 | "simple": JsonIndexDataset( 36 | frame_annotations_file=frame_file, 37 | sequence_annotations_file=sequence_file, 38 | dataset_root=dataset_root, 39 | image_height=self.image_size, 40 | image_width=self.image_size, 41 | box_crop=True, 42 | load_point_clouds=True, 43 | ), 44 | "nonsquare": JsonIndexDataset( 45 | frame_annotations_file=frame_file, 46 | sequence_annotations_file=sequence_file, 47 | dataset_root=dataset_root, 48 | image_height=self.image_size, 49 | image_width=self.image_size // 2, 50 | box_crop=True, 51 | load_point_clouds=True, 52 | ), 53 | "nocrop": JsonIndexDataset( 54 | frame_annotations_file=frame_file, 55 | sequence_annotations_file=sequence_file, 56 | dataset_root=dataset_root, 57 | image_height=self.image_size, 58 | image_width=self.image_size // 2, 59 | box_crop=False, 60 | load_point_clouds=True, 61 | ), 62 | "nocrop2": JsonIndexDataset( 63 | frame_annotations_file=frame_file, 64 | sequence_annotations_file=sequence_file, 65 | dataset_root=dataset_root, 66 | image_height=self.image_size // 2, 67 | image_width=self.image_size, 68 | box_crop=False, 69 | load_point_clouds=True, 70 | ), 71 | } 72 | self.visdom = Visdom() 73 | if not self.visdom.check_connection(): 74 | print("Visdom server not running! Disabling visdom visualisations.") 75 | self.visdom = None 76 | 77 | def _render_one_pointcloud(self, point_cloud, cameras, render_size): 78 | (_image_render, _, _) = render_point_cloud_pytorch3d( 79 | cameras, 80 | point_cloud, 81 | render_size=render_size, 82 | point_radius=2e-2, 83 | topk=10, 84 | bg_color=0.0, 85 | bin_size=0, 86 | ) 87 | return _image_render.clamp(0.0, 1.0) 88 | 89 | def test_one(self): 90 | """Test dataset visualisation.""" 91 | point_clouds = {} 92 | for max_frames in (16,): 93 | for load_dataset_point_cloud in (True, False): 94 | for dataset_key in self.datasets: 95 | point_cloud, cameras = self._gen_and_render_pointcloud( 96 | max_frames, load_dataset_point_cloud, dataset_key 97 | ) 98 | test_name = f"{max_frames}_{load_dataset_point_cloud}_{dataset_key}" 99 | point_clouds[test_name] = point_cloud 100 | 101 | if self.visdom is not None: 102 | plotlyplot = plot_scene( 103 | { 104 | "point_clouds": { 105 | "cameras": cameras, 106 | **point_clouds, 107 | } 108 | }, 109 | camera_scale=1.0, 110 | pointcloud_max_points=10000, 111 | pointcloud_marker_size=1.0, 112 | ) 113 | self.visdom.plotlyplot( 114 | plotlyplot, 115 | env="test_dataset_visualize", 116 | win=f"pcl", 117 | ) 118 | 119 | def _gen_and_render_pointcloud( 120 | self, max_frames, load_dataset_point_cloud, dataset_key 121 | ): 122 | dataset = self.datasets[dataset_key] 123 | # load the point cloud of the first sequence 124 | sequence_show = list(dataset.seq_annots.keys())[0] 125 | device = torch.device("cuda:0") 126 | 127 | point_cloud, sequence_frame_data = get_implicitron_sequence_pointcloud( 128 | dataset, 129 | sequence_name=sequence_show, 130 | mask_points=True, 131 | max_frames=max_frames, 132 | num_workers=10, 133 | load_dataset_point_cloud=load_dataset_point_cloud, 134 | ) 135 | 136 | # render on gpu 137 | point_cloud = point_cloud.to(device) 138 | cameras = sequence_frame_data.camera.to(device) 139 | 140 | # render the point_cloud from the viewpoint of loaded cameras 141 | images_render = torch.cat( 142 | [ 143 | self._render_one_pointcloud( 144 | point_cloud, 145 | cameras[frame_i], 146 | ( 147 | dataset.image_height, 148 | dataset.image_width, 149 | ), 150 | ) 151 | for frame_i in range(len(cameras)) 152 | ] 153 | ).cpu() 154 | images_gt_and_render = torch.cat( 155 | [ 156 | sequence_frame_data.image_rgb, 157 | images_render, 158 | (sequence_frame_data.image_rgb-images_render).abs(), 159 | ], dim=3 160 | ) 161 | 162 | imfile = os.path.join( 163 | os.path.split(os.path.abspath(__file__))[0], 164 | f"test_dataset_visualize" 165 | + f"_max_frames={max_frames}" 166 | + f"_load_pcl={load_dataset_point_cloud}" 167 | + f"_dataset_key={dataset_key}.png", 168 | ) 169 | print(f"Exporting image {imfile}.") 170 | torchvision.utils.save_image(images_gt_and_render, imfile, nrow=2) 171 | 172 | if self.visdom is not None: 173 | test_name = f"{max_frames}_{load_dataset_point_cloud}_{dataset_key}" 174 | self.visdom.images( 175 | images_gt_and_render, 176 | env="test_dataset_visualize", 177 | win=f"pcl_renders_{test_name}", 178 | opts={"title": f"pcl_renders_{test_name}"}, 179 | ) 180 | 181 | return point_cloud, cameras 182 | 183 | 184 | if __name__ == "__main__": 185 | unittest.main() -------------------------------------------------------------------------------- /tests/test_types.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import dataclasses 9 | from typing import Dict, List, NamedTuple, Tuple 10 | import unittest 11 | 12 | from co3d.dataset import data_types as types 13 | from co3d.dataset.data_types import FrameAnnotation 14 | 15 | 16 | class TestDatasetTypes(unittest.TestCase): 17 | def setUp(self): 18 | self.entry = FrameAnnotation( 19 | frame_number=23, 20 | sequence_name="1", 21 | frame_timestamp=1.2, 22 | image=types.ImageAnnotation(path="/tmp/1.jpg", size=(224, 224)), 23 | mask=types.MaskAnnotation(path="/tmp/1.png", mass=42.0), 24 | viewpoint=types.ViewpointAnnotation( 25 | R=( 26 | (1, 0, 0), 27 | (1, 0, 0), 28 | (1, 0, 0), 29 | ), 30 | T=(0, 0, 0), 31 | principal_point=(100, 100), 32 | focal_length=(200, 200), 33 | ), 34 | ) 35 | 36 | def test_asdict_rec(self): 37 | first = [dataclasses.asdict(self.entry)] 38 | second = types._asdict_rec([self.entry]) 39 | self.assertEqual(first, second) 40 | 41 | def test_parsing(self): 42 | """Test that we handle collections enclosing dataclasses.""" 43 | 44 | class NT(NamedTuple): 45 | annot: FrameAnnotation 46 | 47 | dct = dataclasses.asdict(self.entry) 48 | 49 | parsed = types._dataclass_from_dict(dct, FrameAnnotation) 50 | self.assertEqual(parsed, self.entry) 51 | 52 | # namedtuple 53 | parsed = types._dataclass_from_dict(NT(dct), NT) 54 | self.assertEqual(parsed.annot, self.entry) 55 | 56 | # tuple 57 | parsed = types._dataclass_from_dict((dct,), Tuple[FrameAnnotation]) 58 | self.assertEqual(parsed, (self.entry,)) 59 | 60 | # list 61 | parsed = types._dataclass_from_dict( 62 | [ 63 | dct, 64 | ], 65 | List[FrameAnnotation], 66 | ) 67 | self.assertEqual( 68 | parsed, 69 | [ 70 | self.entry, 71 | ], 72 | ) 73 | 74 | # dict 75 | parsed = types._dataclass_from_dict({"k": dct}, Dict[str, FrameAnnotation]) 76 | self.assertEqual(parsed, {"k": self.entry}) 77 | --------------------------------------------------------------------------------