├── .github ├── ISSUE_TEMPLATE │ ├── bug-issue-report.md │ └── feature_request.md └── workflows │ └── static.yml ├── .gitignore ├── .gitmodules ├── .vscode └── settings.json ├── CITATION.cff ├── LICENSE ├── README.md ├── browser ├── app.py └── templates │ └── index.html ├── database ├── mesh-templates │ └── cat-pikachu-remeshed.obj └── vid_data │ ├── ama-bouncing-4v.txt │ ├── ama-bouncing.txt │ ├── ama-samba-4v.txt │ ├── ama-samba.txt │ ├── car-turnaround-2.txt │ ├── car-turnaround.txt │ ├── cat-85.txt │ ├── cat-pikachu-0.txt │ ├── cat-pikachu.txt │ ├── dog-98.txt │ ├── dog-robolounge.txt │ ├── human-48.txt │ ├── human-cap.txt │ ├── room.txt │ ├── shiba-haru.txt │ ├── squirrel-baseball.txt │ └── squirrel.txt ├── docs ├── .gitignore ├── Makefile ├── README.md ├── env_min.yml ├── make.bat ├── source │ ├── _static │ │ ├── images │ │ │ ├── camera_annot.png │ │ │ └── visflo-00081.jpg │ │ ├── media_resized │ │ │ ├── car-turnaround-2-anno.mp4 │ │ │ ├── car-turnaround-2-proxy.mp4 │ │ │ ├── car-turnaround-2.mp4 │ │ │ ├── car-turnaround-2_collage.mp4 │ │ │ ├── car-turnaround_bev-120-xyz.mp4 │ │ │ ├── car-turnaround_bev-120.mp4 │ │ │ ├── car-turnaround_bev.mp4 │ │ │ ├── car-turnaround_ref-xyz.mp4 │ │ │ ├── car-turnaround_ref.mp4 │ │ │ ├── car-turnaround_turntable-120-xyz.mp4 │ │ │ ├── car-turnaround_turntable-120.mp4 │ │ │ ├── car-turnaround_turntable.mp4 │ │ │ ├── cat-85-80_ref-xyz.mp4 │ │ │ ├── cat-85.mp4 │ │ │ ├── cat-pikachu-0-comp_bev-xyz.mp4 │ │ │ ├── cat-pikachu-0-comp_bev.mp4 │ │ │ ├── cat-pikachu-0-proxy.mp4 │ │ │ ├── cat-pikachu-0.mp4 │ │ │ ├── cat-pikachu-0_collage.mp4 │ │ │ ├── cat-pikachu-0_ref-xyz.mp4 │ │ │ ├── cat-pikachu-0_ref.mp4 │ │ │ ├── cat-pikachu-0_turntable-xyz.mp4 │ │ │ ├── cat-pikachu-0_turntable.mp4 │ │ │ ├── cat-pikachu-7.mp4 │ │ │ ├── cat-pikachu-7_collage.mp4 │ │ │ ├── cat-pikachu-8_ref-xyz.mp4 │ │ │ ├── cat-pikachu-8_ref.mp4 │ │ │ ├── cat-pikachu-8_turntable-120-xyz.mp4 │ │ │ ├── cat-pikachu-8_turntable-120.mp4 │ │ │ ├── comp_elev.mp4 │ │ │ ├── dog-98-0_ref-xyz.mp4 │ │ │ ├── dog-98.mp4 │ │ │ ├── dog-dualrig-fgbg000-xyz.mp4 │ │ │ ├── dog-dualrig-fgbg000.mp4 │ │ │ ├── dog-robolounge_collage.mp4 │ │ │ ├── finch.mp4 │ │ │ ├── finch_collage.mp4 │ │ │ ├── finch_ref-xyz.mp4 │ │ │ ├── human-48-0_ref-xyz.mp4 │ │ │ ├── human-48-0_ref.mp4 │ │ │ ├── human-48-0_turntable-120-xyz.mp4 │ │ │ ├── human-48-0_turntable-120.mp4 │ │ │ ├── human-48-reanimate-8-xyz.mp4 │ │ │ ├── human-48-reanimate-8.mp4 │ │ │ ├── human-48.mp4 │ │ │ ├── human-cap-3-xyz.mp4 │ │ │ ├── human-cap-3.mp4 │ │ │ ├── human-cap-3_collage.mp4 │ │ │ ├── penguin-1-xyz.mp4 │ │ │ ├── penguin-2_collage.mp4 │ │ │ ├── penguin.mp4 │ │ │ ├── shiba-haru-6.mp4 │ │ │ ├── shiba-haru-7-xyz.mp4 │ │ │ ├── shiba-haru-7.mp4 │ │ │ ├── shiba-haru-7_collage.mp4 │ │ │ ├── squirrel-xyz.mp4 │ │ │ ├── squirrel.mp4 │ │ │ └── squirrel_collage.mp4 │ │ └── meshes │ │ │ ├── car-turnaround-2-canonical-prealign.obj │ │ │ ├── car-turnaround-2-canonical.obj │ │ │ ├── car-turnaround-2-mesh.obj │ │ │ ├── car-turnaround-2-proxy.obj │ │ │ ├── cat-pikachu-0-bone.obj │ │ │ ├── cat-pikachu-0-mesh.obj │ │ │ ├── cat-pikachu-0-proxy.obj │ │ │ ├── cat-pikachu-bone.obj │ │ │ ├── cat-pikachu-mesh.obj │ │ │ ├── cat-pikachu-proxy.obj │ │ │ ├── human-48-0-mesh-0000.obj │ │ │ ├── human-48-0-mesh.obj │ │ │ ├── human-48-bone.obj │ │ │ └── human-48-proxy.obj │ ├── api_docs │ │ ├── index.rst │ │ └── modules.rst │ ├── conf.py │ ├── data_models.rst │ ├── get_started │ │ └── index.rst │ ├── index.rst │ ├── obj2glb.py │ ├── qa.rst │ ├── resize_vids.py │ └── tutorials │ │ ├── arbitrary_video.rst │ │ ├── category_model.rst │ │ ├── index.rst │ │ ├── multi_video_cat.rst │ │ ├── preprocessing.rst │ │ └── single_video_cat.rst └── template │ ├── module.rst_t │ ├── package.rst_t │ └── toc.rst_t ├── environment.yml ├── lab4d ├── __init__.py ├── config.py ├── config_omega.py ├── dataloader │ ├── __init__.py │ ├── data_utils.py │ └── vidloader.py ├── engine │ ├── __init__.py │ ├── model.py │ ├── train_utils.py │ └── trainer.py ├── export.py ├── nnutils │ ├── __init__.py │ ├── appearance.py │ ├── base.py │ ├── deformable.py │ ├── embedding.py │ ├── feature.py │ ├── intrinsics.py │ ├── multifields.py │ ├── nerf.py │ ├── pose.py │ ├── skinning.py │ ├── time.py │ ├── visibility.py │ └── warping.py ├── reanimate.py ├── render.py ├── tests │ ├── hat_map.py │ ├── test_gpu_map.py │ ├── test_ops.py │ └── utils.py ├── third_party │ ├── nvp.py │ └── quaternion │ │ ├── .gitignore │ │ ├── README.md │ │ ├── __init__.py │ │ ├── add_gcc_cuda.sh │ │ ├── backend.py │ │ ├── mat3x3.py │ │ ├── quaternion.py │ │ ├── setup.py │ │ └── src │ │ ├── bindings.cpp │ │ ├── matinv.cu │ │ ├── matinv.h │ │ ├── quaternion.cu │ │ └── quaternion.h ├── train.py └── utils │ ├── __init__.py │ ├── cam_utils.py │ ├── camera_utils.py │ ├── decorator.py │ ├── geom_utils.py │ ├── gpu_utils.py │ ├── io.py │ ├── loss_utils.py │ ├── numpy_utils.py │ ├── profile_utils.py │ ├── quat_transform.py │ ├── render_utils.py │ ├── skel_utils.py │ ├── torch_utils.py │ ├── transforms.py │ └── vis_utils.py ├── media ├── logo.png └── teaser.gif ├── preprocess ├── __init__.py ├── libs │ ├── __init__.py │ ├── geometry.py │ ├── io.py │ ├── torch_models.py │ └── utils.py ├── scripts │ ├── __init__.py │ ├── camera_registration.py │ ├── canonical_registration.py │ ├── compute_diff.py │ ├── crop.py │ ├── depth.py │ ├── download.py │ ├── extract_dinov2.py │ ├── extract_frames.py │ ├── manual_cameras.py │ ├── tsdf_fusion.py │ └── write_config.py └── third_party │ ├── fusion.py │ ├── vcnplus │ ├── compute_flow.py │ ├── compute_flow.sh │ ├── flowutils │ │ ├── __init__.py │ │ └── flowlib.py │ ├── frame_filter.py │ └── models │ │ ├── VCNplus.py │ │ ├── __init__.py │ │ ├── conv4d.py │ │ ├── det.py │ │ ├── det_losses.py │ │ ├── det_utils.py │ │ ├── feature_extraction.py │ │ ├── inference.py │ │ ├── networks │ │ ├── .gitignore │ │ ├── DCNv2 │ │ │ ├── .gitignore │ │ │ ├── DCN │ │ │ │ ├── __init__.py │ │ │ │ ├── dcn_v2.py │ │ │ │ ├── src │ │ │ │ │ ├── cpu │ │ │ │ │ │ ├── dcn_v2_cpu.cpp │ │ │ │ │ │ ├── dcn_v2_im2col_cpu.cpp │ │ │ │ │ │ ├── dcn_v2_im2col_cpu.h │ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cpu.cpp │ │ │ │ │ │ └── vision.h │ │ │ │ │ ├── cuda │ │ │ │ │ │ ├── dcn_v2_cuda.cu │ │ │ │ │ │ ├── dcn_v2_im2col_cuda.cu │ │ │ │ │ │ ├── dcn_v2_im2col_cuda.h │ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu │ │ │ │ │ │ └── vision.h │ │ │ │ │ ├── dcn_v2.h │ │ │ │ │ └── vision.cpp │ │ │ │ ├── testcpu.py │ │ │ │ └── testcuda.py │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── make.sh │ │ │ └── setup.py │ │ ├── dlav0.py │ │ ├── large_hourglass.py │ │ ├── msra_resnet.py │ │ ├── pose_dla_dcn.py │ │ └── resnet_dcn.py │ │ └── submodule.py │ └── viewpoint │ ├── __init__.py │ ├── configs │ └── cse │ │ ├── Base-DensePose-RCNN-FPN-Human.yaml │ │ ├── Base-DensePose-RCNN-FPN.yaml │ │ ├── densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml │ │ └── densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml │ ├── cselib.py │ └── dp_viewpoint.py ├── scripts ├── create_collage.py ├── download_unzip.sh ├── install-deps.sh ├── render_intermediate.py ├── run_crop_all.py ├── run_preprocess.py ├── run_rendering_parallel.py ├── train.sh ├── zip_dataset.py └── zip_logdir.py └── setup.py /.github/ISSUE_TEMPLATE/bug-issue-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug/issue report 3 | about: Issues running the code / bug report to help us improve 4 | title: "[Bug/issue]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the issue is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/static.yml: -------------------------------------------------------------------------------- 1 | name: Build Sphinx docs and Deploy to GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | workflow_dispatch: 7 | 8 | permissions: 9 | contents: read 10 | pages: write 11 | id-token: write 12 | 13 | concurrency: 14 | group: "pages" 15 | cancel-in-progress: false 16 | 17 | jobs: 18 | build-deploy: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v3 23 | with: 24 | submodules: recursive # Ensures submodules are checked out 25 | 26 | - name: Update Submodules 27 | run: | 28 | cd ./docs 29 | git submodule update --init --recursive 30 | 31 | - name: Setup Miniconda 32 | uses: conda-incubator/setup-miniconda@v2 33 | with: 34 | python-version: 3.9 35 | mamba-version: "*" 36 | channels: conda-forge,defaults 37 | channel-priority: true 38 | activate-environment: lab4d 39 | environment-file: docs/env_min.yml 40 | 41 | - name: Build Docs 42 | shell: bash -l {0} 43 | run: | 44 | conda activate lab4d 45 | cd ./docs 46 | sphinx-apidoc -o source/api_docs ../lab4d/ -f --templatedir template/ 47 | python source/obj2glb.py 48 | make html 49 | 50 | - name: Setup Pages 51 | uses: actions/configure-pages@v3 52 | 53 | - name: Upload artifact 54 | uses: actions/upload-pages-artifact@v1 55 | with: 56 | # Upload the pages 57 | path: './docs/build/html' 58 | 59 | - name: Deploy to GitHub Pages 60 | id: deployment 61 | uses: actions/deploy-pages@v2 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | projects 2 | viewer 3 | run.sh 4 | run-long.sh 5 | /database/processed 6 | /database/configs 7 | /database/raw 8 | /logdir 9 | /tmp 10 | 11 | lab4d.egg-info 12 | __pycache__/ 13 | *.pth 14 | *.ckpt 15 | 16 | preprocess/third_party/vcnplus/vcn_rob.pth 17 | preprocess/third_party/viewpoint/human.pth 18 | preprocess/third_party/viewpoint/quad.pth 19 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "preprocess/third_party/detectron2"] 2 | path = preprocess/third_party/detectron2 3 | url = https://github.com/facebookresearch/detectron2 4 | ignore = dirty 5 | [submodule "preprocess/third_party/Track-Anything"] 6 | path = preprocess/third_party/Track-Anything 7 | url = https://github.com/gengshan-y/Track-Anything 8 | [submodule "docs/pytorch_sphinx_theme"] 9 | path = docs/pytorch_sphinx_theme 10 | url = https://github.com/gengshan-y/pytorch_sphinx_theme 11 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.watcherExclude": { 3 | "**/*.npy": true, 4 | "**/*.jpg": true, 5 | "**/*.mp4": true, 6 | "**/.git/objects/**": true, 7 | "**/.git/subtree-cache/**": true, 8 | "**/node_modules/*/**": true, 9 | "**/*.log": true, 10 | "database/processed/**": true, 11 | "logdir/**": true, 12 | "tmp/**": true, 13 | }, 14 | "[python]": { 15 | "editor.defaultFormatter": "ms-python.black-formatter" 16 | }, 17 | "python.formatting.provider": "none", 18 | "liveServer.settings.root": "docs/build/" 19 | } -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: Lab4d - A framework for in-the-wild 4D reconstruction from monocular videos 3 | message: 'If you use this software, please cite it as below.' 4 | type: software 5 | authors: 6 | - family-names: Yang 7 | given-names: Gengshan 8 | - family-names: Tan 9 | given-names: Jeff 10 | - family-names: Lyons 11 | given-names: Alex 12 | - family-names: Peri 13 | given-names: Neehar 14 | - family-names: Ramanan 15 | given-names: Deva 16 | url: 'https://github.com/lab4d-org/lab4d' 17 | license: MIT 18 | version: 0.0.0 19 | date-released: '2023-06-30' 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Gengshan Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | logo 4 | 5 |

6 | 7 | # Lab4D 8 | **[[Docs & Tutorials](https://lab4d-org.github.io/lab4d/)]** 9 | **[[Data & Checkpoints](https://lab4d-org.github.io/lab4d/data_models.html)]** 10 | 11 | *This is an alpha release and the APIs are subject to change. Please provide feedback and report bugs via github issues. Thank you for your support.* 12 | 13 | ## About 14 | **Lab4D** is a framework for 4D reconstruction from monocular videos. The software is licensed under the MIT license. 15 |

16 | 17 | logo 18 | 19 |

20 | 21 | 22 | ## TODOs 23 | - [ ] web viewer (see [PPR branch](https://github.com/gengshan-y/ppr)) 24 | - [ ] evaluation (see [PPR branch](https://github.com/gengshan-y/ppr)) and benchmarks 25 | - [ ] multi-view reconstruction 26 | - [ ] feedforward models (see [DASR](https://github.com/jefftan969/dasr)) 27 | 28 | ## Acknowledgement 29 | - Our pre-processing pipeline is built upon the following open-sourced repos: 30 | - Segmentation: [Track-Anything](https://github.com/gaomingqi/Track-Anything), [Grounding-DINO](https://github.com/IDEA-Research/GroundingDINO) 31 | - Feature & correspondence: [DensePose-CSE](https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/projects/DensePose/doc/DENSEPOSE_CSE.md), [DINOv2](https://github.com/facebookresearch/dinov2), [VCNPlus](https://github.com/gengshan-y/rigidmask) 32 | - Depth: [ZoeDepth](https://github.com/isl-org/ZoeDepth) 33 | - Camera: [BANMo-viewpoint](https://github.com/facebookresearch/banmo) 34 | - We use [dqtorch](https://github.com/MightyChaos/dqtorch) for efficient rotation operations 35 | - We thank [@mjlbach](https://github.com/mjlbach), [@alexanderbergman7](https://github.com/alexanderbergman7), and [@terrancewang](https://github.com/terrancewang) for testing and feedback 36 | - We thank [@jasonyzhang](https://github.com/jasonyzhang), [@MightyChaos](https://github.com/MightyChaos), [@JudyYe](https://github.com/JudyYe), and [@andrewsonga](https://github.com/andrewsonga) for feedback 37 | 38 | If you use this project for your research, please consider citing the following papers. 39 | 40 | For building deformable object models, cite: 41 |
42 | 43 | ``` 44 | @inproceedings{yang2022banmo, 45 | title={BANMo: Building Animatable 3D Neural Models from Many Casual Videos}, 46 | author={Yang, Gengshan and Vo, Minh and Neverova, Natalia and Ramanan, Deva and Vedaldi, Andrea and Joo, Hanbyul}, 47 | booktitle = {CVPR}, 48 | year={2022} 49 | } 50 | ``` 51 |
52 | 53 | For building category body and pose models, cite: 54 |
55 | 56 | ``` 57 | @inproceedings{yang2023rac, 58 | title={Reconstructing Animatable Categories from Videos}, 59 | author={Yang, Gengshan and Wang, Chaoyang and Reddy, N. Dinesh and Ramanan, Deva}, 60 | booktitle = {CVPR}, 61 | year={2023} 62 | } 63 | ``` 64 |
65 | 66 | For object-scene reconstruction and extreme view synthesis, cite: 67 |
68 | 69 | ``` 70 | @article{song2023totalrecon, 71 | title={Total-Recon: Deformable Scene Reconstruction for Embodied View Synthesis}, 72 | author={Song, Chonghyuk and Yang, Gengshan and Deng, Kangle and Zhu, Jun-Yan and Ramanan, Deva}, 73 | journal={arXiv}, 74 | year={2023} 75 | } 76 | ``` 77 |
78 | 79 | For training feed-forward video/image shape and pose estimators, cite: 80 |
81 | 82 | ``` 83 | @inproceedings{tan2023distilling, 84 | title={Distilling Neural Fields for Real-Time Articulated Shape Reconstruction}, 85 | author={Tan, Jeff and Yang, Gengshan and Ramanan, Deva}, 86 | booktitle={CVPR}, 87 | year={2023} 88 | } 89 | ``` 90 |
91 | 92 | For the human-48 dataset cite: 93 | 94 |
95 | 96 | ``` 97 | @incollection{vlasic2008articulated, 98 | title={Articulated mesh animation from multi-view silhouettes}, 99 | author={Vlasic, Daniel and Baran, Ilya and Matusik, Wojciech and Popovi{\'c}, Jovan}, 100 | booktitle={Acm Siggraph 2008 papers}, 101 | pages={1--9}, 102 | year={2008} 103 | } 104 | @article{xu2018monoperfcap, 105 | title={Monoperfcap: Human performance capture from monocular video}, 106 | author={Xu, Weipeng and Chatterjee, Avishek and Zollh{\"o}fer, Michael and Rhodin, Helge and Mehta, Dushyant and Seidel, Hans-Peter and Theobalt, Christian}, 107 | journal={ACM Transactions on Graphics (ToG)}, 108 | volume={37}, 109 | number={2}, 110 | pages={1--15}, 111 | year={2018}, 112 | publisher={ACM New York, NY, USA} 113 | } 114 | @inproceedings{perazzi2016benchmark, 115 | title={A benchmark dataset and evaluation methodology for video object segmentation}, 116 | author={Perazzi, Federico and Pont-Tuset, Jordi and McWilliams, Brian and Van Gool, Luc and Gross, Markus and Sorkine-Hornung, Alexander}, 117 | booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, 118 | pages={724--732}, 119 | year={2016} 120 | } 121 | ``` 122 |
123 | -------------------------------------------------------------------------------- /browser/app.py: -------------------------------------------------------------------------------- 1 | # WIP by Gengshan Yang 2 | # python browser/app.py 'database/processed/Annotations/Full-Resolution/cat-85-*/vis.mp4' 3 | # python browser/app.py 'logdir/dog-98-category-comp/renderings_00*/xyz.mp4' 4 | # or python browser/app.py and type in string 5 | from flask import Flask, render_template, request, send_from_directory 6 | import os 7 | import sys 8 | import glob 9 | 10 | app = Flask(__name__) 11 | 12 | 13 | def get_files(path): 14 | matched_files = sorted(glob.glob(path)) 15 | return matched_files 16 | 17 | 18 | @app.route("/", methods=["GET", "POST"]) 19 | def index(): 20 | files = [] 21 | if request.method == "POST": 22 | path = request.form.get("path") 23 | 24 | elif len(sys.argv) > 1: 25 | path = sys.argv[1] 26 | else: 27 | path = "" 28 | files = get_files(path) 29 | return render_template("index.html", files=files) 30 | 31 | 32 | @app.route("/logdir/", methods=["GET"]) 33 | def get_logdir_file(filename): 34 | return send_from_directory(os.getcwd(), filename) 35 | 36 | 37 | @app.route("/database/", methods=["GET"]) 38 | def get_database_file(filename): 39 | return send_from_directory(os.getcwd(), filename) 40 | 41 | 42 | if __name__ == "__main__": 43 | app.run(debug=True) 44 | -------------------------------------------------------------------------------- /browser/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Data Browser 6 | 73 | 74 | 75 | 76 |

Data Browser

77 |
78 |
79 | (e.g., 80 | database/processed/Annotations/Full-Resolution/cat*/vis.mp4) 81 |
82 | 83 |
84 |

Matched Files

85 | 100 | 101 | 102 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /database/vid_data/ama-bouncing-4v.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/8b1krs9hcyvk0z0/AAAG5wle5F98KERiDHUJilUMa?dl=0 2 | -------------------------------------------------------------------------------- /database/vid_data/ama-bouncing.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/9a90bd0p6hacqiv/AADFZOFpdsFzpGiPQqvvCsVDa?dl=0 2 | -------------------------------------------------------------------------------- /database/vid_data/ama-samba-4v.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/4b3dx6id0ncoyhe/AAAnKtpH8wirj0sazkdZCbEMa?dl=0 2 | -------------------------------------------------------------------------------- /database/vid_data/ama-samba.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/uox4amcyd9g2gm9/AAA8XECVaSjqpgEgwTzIxc5da?dl=0 2 | -------------------------------------------------------------------------------- /database/vid_data/car-turnaround-2.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/l7klay8bg54ryb8/AACeEmZq4aj6RXYUdY-UaZsua 2 | -------------------------------------------------------------------------------- /database/vid_data/car-turnaround.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/2saroz7jrnp1cy6/AACGHva9pJAIwQ6k8qgMs5Nma 2 | -------------------------------------------------------------------------------- /database/vid_data/cat-85.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/38f29ro8aq85enk/AAA5aSgBi4otuPrEiZRm1Ih5a 2 | -------------------------------------------------------------------------------- /database/vid_data/cat-pikachu-0.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/ls19bz5uo8juzoa/AAB0x4GUeH5PO97sB8Nak9eIa -------------------------------------------------------------------------------- /database/vid_data/cat-pikachu.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/bcm9150d3sy68ve/AADbQVnHFbBTvsfJLoa9AM9Ba -------------------------------------------------------------------------------- /database/vid_data/dog-98.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/85567m06uxoni42/AAASa1OnsXM2u8cxEiQSSF_Ia 2 | -------------------------------------------------------------------------------- /database/vid_data/dog-robolounge.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/66a2f1cfudj6ep3/AAAbJE0mzMMQdLruPnO16r8la 2 | -------------------------------------------------------------------------------- /database/vid_data/human-48.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/kv4xcntzcwfnmcv/AABqMAvjoTJw4U_8puObKBD9a 2 | -------------------------------------------------------------------------------- /database/vid_data/human-cap.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/rl351jmtw9v5107/AACYWpO9M453NJr8ACViIeLfa 2 | -------------------------------------------------------------------------------- /database/vid_data/room.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/hkojyikow9jcd0g/AACA5-U75SQycUYbbx8bDdlUa 2 | -------------------------------------------------------------------------------- /database/vid_data/shiba-haru.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/u3j1ps2bcyubvs1/AACOrE2DiK-O2l74Q5Y4SlNQa 2 | -------------------------------------------------------------------------------- /database/vid_data/squirrel-baseball.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/wur870xkv01hv32/AADsB0zeCGWyUy4czQX5jCMCa 2 | -------------------------------------------------------------------------------- /database/vid_data/squirrel.txt: -------------------------------------------------------------------------------- 1 | https://www.dropbox.com/sh/1ktr3qnqwdysyvi/AAAhIRpzWB58KmCJvXu4agd_a 2 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /source/api_docs/lab4d* 3 | /source/_static/meshes/*.glb 4 | /source/_static/media/* -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | To develop locally, start liveserver and forward the port to local browser 2 | 3 | To generate the necessary files: 4 | ``` 5 | sphinx-apidoc -o source/api_docs ../lab4d/ -f --templatedir template/ 6 | python source/obj2glb.py 7 | ``` 8 | 9 | To rebuild webpage: 10 | ```make clean; make html; mv build/html build/lab4d``` -------------------------------------------------------------------------------- /docs/env_min.yml: -------------------------------------------------------------------------------- 1 | name: lab4d 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | dependencies: 6 | - pip 7 | - ninja 8 | - pytorch 9 | - torchvision 10 | - cpuonly 11 | - matplotlib 12 | - absl-py 13 | - tensorboard 14 | - trimesh 15 | - scikit-image 16 | - opencv 17 | - einops 18 | - scikit-learn 19 | - imageio=2.14.1 20 | - imageio-ffmpeg 21 | - pip: 22 | - pysdf 23 | - sphinx==6.2.1 24 | - sphinx-copybutton 25 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/_static/images/camera_annot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/images/camera_annot.png -------------------------------------------------------------------------------- /docs/source/_static/images/visflo-00081.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/images/visflo-00081.jpg -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround-2-anno.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2-anno.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround-2-proxy.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2-proxy.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround-2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround-2_collage.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2_collage.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround_bev-120-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_bev-120-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround_bev-120.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_bev-120.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround_bev.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_bev.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround_ref-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_ref-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround_ref.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_ref.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround_turntable-120-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_turntable-120-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround_turntable-120.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_turntable-120.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/car-turnaround_turntable.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_turntable.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-85-80_ref-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-85-80_ref-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-85.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-85.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-0-comp_bev-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0-comp_bev-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-0-comp_bev.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0-comp_bev.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-0-proxy.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0-proxy.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-0.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-0_collage.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_collage.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-0_ref-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_ref-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-0_ref.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_ref.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-0_turntable-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_turntable-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-0_turntable.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_turntable.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-7.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-7.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-7_collage.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-7_collage.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-8_ref-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_ref-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-8_ref.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_ref.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-8_turntable-120-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_turntable-120-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/cat-pikachu-8_turntable-120.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_turntable-120.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/comp_elev.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/comp_elev.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/dog-98-0_ref-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-98-0_ref-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/dog-98.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-98.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/dog-dualrig-fgbg000-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-dualrig-fgbg000-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/dog-dualrig-fgbg000.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-dualrig-fgbg000.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/dog-robolounge_collage.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-robolounge_collage.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/finch.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/finch.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/finch_collage.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/finch_collage.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/finch_ref-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/finch_ref-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/human-48-0_ref-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_ref-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/human-48-0_ref.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_ref.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/human-48-0_turntable-120-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_turntable-120-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/human-48-0_turntable-120.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_turntable-120.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/human-48-reanimate-8-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-reanimate-8-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/human-48-reanimate-8.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-reanimate-8.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/human-48.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/human-cap-3-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-cap-3-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/human-cap-3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-cap-3.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/human-cap-3_collage.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-cap-3_collage.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/penguin-1-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/penguin-1-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/penguin-2_collage.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/penguin-2_collage.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/penguin.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/penguin.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/shiba-haru-6.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-6.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/shiba-haru-7-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-7-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/shiba-haru-7.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-7.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/shiba-haru-7_collage.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-7_collage.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/squirrel-xyz.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/squirrel-xyz.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/squirrel.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/squirrel.mp4 -------------------------------------------------------------------------------- /docs/source/_static/media_resized/squirrel_collage.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/squirrel_collage.mp4 -------------------------------------------------------------------------------- /docs/source/api_docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Lab4D documentation master file, created by 2 | sphinx-quickstart on Fri Jun 2 20:54:08 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Lab4D's documentation! 7 | ================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | lab4d.dataloader 13 | lab4d.engine 14 | lab4d.nnutils 15 | lab4d.utils 16 | 17 | .. Indices and tables 18 | .. ================== 19 | 20 | .. * :ref:`genindex` 21 | .. * :ref:`modindex` 22 | .. * :ref:`search` 23 | -------------------------------------------------------------------------------- /docs/source/api_docs/modules.rst: -------------------------------------------------------------------------------- 1 | lab4d 2 | ===== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | lab4d 8 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = "Lab4D" 10 | copyright = "2023, Gengshan Yang, Jeff Tan, Alex Lyons, Neehar Peri, Deva Ramanan, Carnegie Mellon University" 11 | release = "0.0.0" 12 | 13 | # -- General configuration --------------------------------------------------- 14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 15 | 16 | import sys, os 17 | 18 | # Path to lab4d 19 | sys.path.insert( 20 | 0, 21 | "%s/../../" % os.path.join(os.path.dirname(__file__)), 22 | ) 23 | 24 | # Allow auto-generated docs from Google format docstrings 25 | extensions = [ 26 | "sphinx.ext.autodoc", 27 | "sphinx.ext.napoleon", 28 | "sphinx.ext.intersphinx", 29 | "sphinx_copybutton", 30 | ] 31 | 32 | # other pakcages 33 | intersphinx_mapping = { 34 | "python": ("https://docs.python.org/3", None), 35 | "pytorch": ("https://pytorch.org/docs/stable/", None), 36 | } 37 | 38 | # Allow documentation of multiple return types 39 | napoleon_custom_sections = [("Returns", "params_style")] 40 | 41 | templates_path = ["_templates"] 42 | exclude_patterns = [] 43 | 44 | # Mocking the imports of modules that requires cuda 45 | autodoc_mock_imports = ["_quaternion"] 46 | 47 | # -- Options for HTML output ------------------------------------------------- 48 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 49 | 50 | html_theme = "pytorch_sphinx_theme" 51 | html_theme_path = ["../pytorch_sphinx_theme"] 52 | html_static_path = ["_static"] 53 | -------------------------------------------------------------------------------- /docs/source/get_started/index.rst: -------------------------------------------------------------------------------- 1 | Get Started 2 | =================== 3 | 4 | Requirements 5 | ------------------------- 6 | 7 | - **Linux** machine with at least 1 GPU (we tested on 3090s) 8 | - **Conda** 9 | 10 | - Follow `this link `_ to install conda. 11 | 12 | - Recommended: use mamba for package management (more efficient than conda). Install mamba with:: 13 | 14 | conda install -c conda-forge mamba -y 15 | 16 | - For developers: use `VS Code `_ with Black Formatter. 17 | 18 | Set up the environment 19 | ------------------------- 20 | 21 | Clone the repository and create a conda environment with the required packages:: 22 | 23 | git clone git@github.com:lab4d-org/lab4d.git --recursive 24 | 25 | cd lab4d 26 | 27 | mamba env create -f environment.yml 28 | 29 | conda activate lab4d 30 | 31 | bash scripts/install-deps.sh 32 | 33 | 34 | Running the Tutorial Code 35 | --------------------------------------------- 36 | See the `Tutorials page `_. 37 | 38 | 39 | .. .. Lab4D documentation master file, created by 40 | .. sphinx-quickstart on Fri Jun 2 20:54:08 2023. 41 | .. You can adapt this file completely to your liking, but it should at least 42 | .. contain the root `toctree` directive. 43 | 44 | .. Welcome to Lab4D's DOCUMENTATION! 45 | .. ================================= 46 | 47 | .. .. toctree:: 48 | .. :maxdepth: 2 49 | 50 | .. get_started 51 | 52 | .. .. Indices and tables 53 | .. .. ================== 54 | 55 | .. .. * :ref:`genindex` 56 | .. .. * :ref:`modindex` 57 | .. .. * :ref:`search` 58 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Lab4D documentation master file, created by 2 | sphinx-quickstart on Fri Jun 2 20:54:08 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Lab4D's documentation! 7 | ================================= 8 | 9 | **Lab4D** is a framework for 4D reconstruction from monocular videos. 10 | 11 | Features 12 | ------------------------------- 13 | - Representation 14 | 15 | - neural implicit representation 16 | 17 | - deformation fields (neural fields, control-points, skeleton) 18 | 19 | - compositional scene 20 | 21 | - category-level models 22 | 23 | - Interface for priors 24 | 25 | - pixelwise priors: depth, flow, DINOv2 features 26 | 27 | - segmentation: track-anything, video instance segmentation 28 | 29 | - camera viewpoint: viewpoint network, manual annotation 30 | 31 | - Efficiency 32 | 33 | - multi-gpu training 34 | 35 | - dual-quaternion ops 36 | 37 | .. note:: 38 | 39 | This is an alpha release and the APIs are subject to change as we continuously improve and refine it. 40 | We encourage users to provide feedback and report bugs via `github issues `_. 41 | Thank you for your support. 42 | -------------------------------------------------------------------------------- /docs/source/obj2glb.py: -------------------------------------------------------------------------------- 1 | import trimesh 2 | import numpy as np 3 | import glob 4 | import os 5 | 6 | rootdir = os.path.dirname(__file__) 7 | 8 | for path in glob.glob("%s/_static/meshes/*.obj" % rootdir): 9 | print(path) 10 | m = trimesh.load(path, process=False) 11 | # cv coordinate to gl coordinate 12 | m.vertices = np.stack( 13 | [m.vertices[:, 0], -m.vertices[:, 1], -m.vertices[:, 2]], axis=1 14 | ) 15 | m.export(path.replace(".obj", ".glb")) 16 | -------------------------------------------------------------------------------- /docs/source/qa.rst: -------------------------------------------------------------------------------- 1 | Q&A 2 | =========================== 3 | 4 | Installation 5 | --------------------------- 6 | - Conda/mamba is not able to resolve conflicts when installing packages. 7 | 8 | - Possible cause: The base conda environment is not clean. See the discussion `in this thread `_. 9 | 10 | - Fix: Remove packages of the base environment that causes the conflict. 11 | 12 | Data pre-processing 13 | --------------------------- 14 | - My gradio app got stuck at the loading screen. 15 | 16 | - Potential fix: kill the running vscode processes, and re-run the preprocessing code. 17 | 18 | Model training 19 | --------------------------- 20 | 21 | - How to change hyperparameters when using more videos (or video frames)? 22 | 23 | - You want to increase `pixels_per_image`, `imgs_per_gpu` and use more gpus. 24 | The number of sampled rays / pixels per minibatch is computed as the number of gpus x imgs_per_gpu x pixels_per_image. 25 | Also see the note `here `__. 26 | 27 | - Training on >50 videos might cause the following os error:: 28 | 29 | [Errno 24] Too many open files 30 | 31 | - To check the current file limit, run:: 32 | 33 | ulimit -S -n 34 | 35 | To increate open file limit to 4096, run:: 36 | 37 | ulimit -u -n 4096 38 | 39 | - Multi-GPU training hangs but single-GPU training works fine. 40 | 41 | - Run training script with `NCCL_P2P_DISABLE=1 bash scripts/train.sh ...` to disable direct GPU-to-GPU (P2P) communication. See discussion `here `__. 42 | -------------------------------------------------------------------------------- /docs/source/resize_vids.py: -------------------------------------------------------------------------------- 1 | # python source/resize_vids.py 2 | import os 3 | import numpy as np 4 | import imageio 5 | from PIL import Image 6 | 7 | src_dir = "source/_static/media" 8 | dst_dir = "source/_static/media_resized/" 9 | max_dim = 640 * 640 10 | video_exts = [".mp4", ".avi", ".mov", ".flv", ".mkv", ".wmv"] 11 | 12 | # check for destination directory and create if it doesn't exist 13 | if not os.path.exists(dst_dir): 14 | os.makedirs(dst_dir) 15 | 16 | # iterate over video files in source directory 17 | for filename in os.listdir(src_dir): 18 | # check if file is a video, ignoring the case of the extension 19 | if any(filename.lower().endswith(ext) for ext in video_exts): 20 | # add other conditions if there are other video formats 21 | src_filepath = os.path.join(src_dir, filename) 22 | dst_filepath = os.path.splitext(filename)[0] + ".mp4" 23 | dst_filepath = os.path.join(dst_dir, dst_filepath) 24 | 25 | reader = imageio.get_reader(src_filepath) 26 | fps = reader.get_meta_data()["fps"] 27 | 28 | # obtain video dimensions 29 | first_frame = reader.get_data(0) 30 | orig_height, orig_width = first_frame.shape[:2] 31 | 32 | # check if resolution is greater than 640x640 33 | if orig_height * orig_width > max_dim: 34 | print("Resizing video: " + filename) 35 | # resize maintaining aspect ratio 36 | ratio = np.sqrt(max_dim / (orig_height * orig_width)) 37 | new_width = int(orig_width * ratio) 38 | new_height = int(orig_height * ratio) 39 | 40 | writer = imageio.get_writer(dst_filepath, fps=fps) 41 | 42 | # iterate over frames in the video 43 | for i, frame in enumerate(reader): 44 | frame = Image.fromarray(frame) 45 | frame = frame.resize((new_width, new_height), Image.ANTIALIAS) 46 | writer.append_data(np.array(frame)) 47 | 48 | writer.close() 49 | else: 50 | # copy video to destination directory 51 | print("Copying video: " + filename) 52 | os.system("cp " + src_filepath + " " + dst_filepath) 53 | 54 | print("Video resizing is complete!") 55 | -------------------------------------------------------------------------------- /docs/source/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | .. Lab4D documentation master file, created by 2 | sphinx-quickstart on Fri Jun 2 20:54:08 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Tutorials 7 | ================================= 8 | 9 | Overview 10 | --------------------------------- 11 | Inferring 4D representations given 2D observations is challenging due to its under-constrained nature. 12 | With recent advances in differentiable rendering, visual correspondence and segmentation, we built an optimization framework that 13 | reconstructs dense 4D structures with test-time optimization, by minimizing the different between the rendered 2D images and the input observations. 14 | 15 | The tutorials introduce a complete workflow of Lab4D. We'll use the method and dataset from the following papers: 16 | 17 | - `BANMo: Building Animatable 3D Neural Models from Many Casual Videos `_, CVPR 2022. 18 | - `RAC: Reconstructing Animatable Categories from Videos `_, CVPR 2023. 19 | - `Total-Recon: Deformable Scene Reconstruction for Embodied View Synthesis `_, ICCV 2023. 20 | 21 | `The tutorials assumes a basic familiarity with Python and Differentiable Rendering concepts.` 22 | 23 | Each of the tutorial can be executed in a couple of ways: 24 | 25 | - **Custom videos**: This option allows you to train a model on your own videos. 26 | - **Preprocessed data**: This option skips the preprocessing step and train models on the `preprocessed data `_ we provided. 27 | - **Render-only**: This option skips model training and allows you to render the `pre-trained model weights `_ we provided. 28 | 29 | 30 | Content 31 | --------------------------------- 32 | .. toctree:: 33 | :maxdepth: 1 34 | 35 | arbitrary_video 36 | single_video_cat 37 | multi_video_cat 38 | category_model 39 | preprocessing 40 | 41 | .. Indices and tables 42 | .. ================== 43 | 44 | .. * :ref:`genindex` 45 | .. * :ref:`modindex` 46 | .. * :ref:`search` 47 | -------------------------------------------------------------------------------- /docs/source/tutorials/preprocessing.rst: -------------------------------------------------------------------------------- 1 | 5. Pre-process custom videos 2 | ======================================== 3 | 4 | In this tutorial, we show how to preprocess custom videos that can be later used for training. We provide some 5 | `raw videos `_ for you to try out. 6 | The download links are provided as `database/vid_data/$seqname`, where `$seqname`` is the name of the sequence. 7 | 8 | Taking `cat-pikachu-0` in the `second tutorial `_ for example, 9 | run the following to download and process the data:: 10 | 11 | # Args: sequence name, text prompt (segmentation), category from {human, quad, other} (camera viewpoint), gpu id 12 | python scripts/run_preprocess.py cat-pikachu-0 cat quad "0" 13 | 14 | .. note:: 15 | To preprocess other videos, create a folder named `database/raw/$seqname`, move the videos into it, and run the above command. 16 | 17 | `Next, we will get into the details of processing.` 18 | 19 | Frame filtering 20 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 21 | By default, we first remove near-static frames (i.e., frames without motion or with small motion) since they do not provide useful extra signal for reconstruction. 22 | To do so, we run optical flow over consecutive frames and skip a frame if the median flow magnitude is smaller than a threshold. 23 | 24 | .. note:: 25 | There is a flag in `scripts/run_preprocess.py`` that you can set to False to turn on/off frame filtering. 26 | 27 | Segmentation 28 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 29 | 30 | We provide a web GUI and a command line interface for object segmentation. 31 | 32 | **Interactive segmentation**: `Track-Anything `_ will be used given text prompt "other", e.g.,:: 33 | 34 | python scripts/run_preprocess.py cat-pikachu-0 other quad "0" 35 | 36 | It creates a web interfaces and asks the user to specify point prompts on the object of interest. 37 | 38 | 39 | **Automatic segmentation**: `Grounding-DINO `_ will be used to determin which object to track 40 | in the first frame given a valid text prompt e.g., :: 41 | 42 | python scripts/run_preprocess.py cat-pikachu-0 cat quad "0" 43 | 44 | 45 | .. note:: 46 | 47 | There is a flag in `scripts/run_preprocess.py`` that switches the segmentation method. 48 | 49 | 50 | Object-to-camera transformations 51 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 52 | 53 | For human and quadruped animals, we use a viewpoint network (presented in BANMo) to estimate the camera viewpoint / rotation with regard to a canonical 3D coordinate. 54 | 55 | For other categories, user will be asked to annotate camera viewpoints (by aligning the orientation of a reference 3D model to the input image) for a few frames as shown below. 56 | 57 | .. raw:: html 58 | 59 |
60 | 61 |
62 | 63 | .. note:: 64 | 65 | To align the 3D model with the provided image, utilize the sidebar to specify the camera's roll, elevation, and azimuth angles. After adjusting each frame, ensure you click 'save.' Once you've completed adjustments for all the videos, click 'exit.' 66 | We suggest making an annotation every time the object turns 90 degrees, such as when it changes from a front-facing position to facing left. 67 | 68 | In the `scripts/run_preprocess.py` file, there's a flag that allows you to change the method used for camera estimation." 69 | 70 | After getting the sparse annotations, we run camera registration that propogates the rotation annotations using optical flow and monocular depth. 71 | Camera translations are approximated with 2D object center and size (from segmentation) assuming a orthographic camera model. 72 | 73 | 74 | Parallelizing the pre-processing 75 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 76 | 77 | Preprocessing 10 videos takes about 90 minutes on a single device. To speed up the pre-processing, 78 | we can parallelize tasks over multiple gpus with the following:: 79 | 80 | # Args: sequence name, text prompt for segmentation, category from {human, quad, other} for camera viewpoint, gpu id 81 | python scripts/run_preprocess.py cat-pikachu animal quad "0,1,2,3" 82 | 83 | 84 | Visit other `tutorials `_. -------------------------------------------------------------------------------- /docs/template/module.rst_t: -------------------------------------------------------------------------------- 1 | {%- if show_headings %} 2 | {{- [basename, "module"] | join(' ') | e | heading }} 3 | 4 | {% endif -%} 5 | .. automodule:: {{ qualname }} 6 | {%- for option in automodule_options %} 7 | :{{ option }}: 8 | {%- endfor %} 9 | 10 | -------------------------------------------------------------------------------- /docs/template/package.rst_t: -------------------------------------------------------------------------------- 1 | {%- macro automodule(modname, options) -%} 2 | .. automodule:: {{ modname }} 3 | {%- for option in options %} 4 | :{{ option }}: 5 | {%- endfor %} 6 | {%- endmacro %} 7 | 8 | {%- macro toctree(docnames) -%} 9 | .. toctree:: 10 | :maxdepth: {{ maxdepth }} 11 | {% for docname in docnames %} 12 | {{ docname }} 13 | {%- endfor %} 14 | {%- endmacro %} 15 | 16 | {%- if is_namespace %} 17 | {{- [pkgname, "namespace"] | join(" ") | e | heading }} 18 | {% else %} 19 | {{- [pkgname, "package"] | join(" ") | e | heading }} 20 | {% endif %} 21 | 22 | {%- if is_namespace %} 23 | .. py:module:: {{ pkgname }} 24 | {% endif %} 25 | 26 | {%- if modulefirst and not is_namespace %} 27 | {{ automodule(pkgname, automodule_options) }} 28 | {% endif %} 29 | 30 | {%- if subpackages %} 31 | Subpackages 32 | ----------- 33 | 34 | {{ toctree(subpackages) }} 35 | {% endif %} 36 | 37 | {%- if submodules %} 38 | {% if separatemodules %} 39 | {{ toctree(submodules) }} 40 | {% else %} 41 | {%- for submodule in submodules %} 42 | {% if show_headings %} 43 | {{- submodule | e | heading(2) }} 44 | {% endif %} 45 | {{ automodule(submodule, automodule_options) }} 46 | {% endfor %} 47 | {%- endif %} 48 | {%- endif %} -------------------------------------------------------------------------------- /docs/template/toc.rst_t: -------------------------------------------------------------------------------- 1 | {{ header | heading }} 2 | 3 | .. toctree:: 4 | :maxdepth: {{ maxdepth }} 5 | {% for docname in docnames %} 6 | {{ docname }} 7 | {%- endfor %} 8 | 9 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: lab4d 2 | channels: 3 | - pytorch 4 | - nvidia 5 | - conda-forge 6 | dependencies: 7 | - python=3.9 8 | - setuptools=66.0.0 9 | - pip 10 | - pytorch==2.0.0=py3.9_cuda11.7_cudnn8.5.0_0 11 | - torchvision 12 | - cudatoolkit-dev=11.7 13 | - gcc_linux-64=10 14 | - gxx_linux-64=10 15 | - matplotlib 16 | - ninja 17 | - absl-py 18 | - tensorboard 19 | - trimesh 20 | - scikit-image 21 | - opencv 22 | - einops 23 | - numba 24 | - gdown 25 | - scikit-learn 26 | - psutil 27 | - av 28 | - plotly 29 | - imageio 30 | - imageio-ffmpeg 31 | - tqdm 32 | - pip: 33 | - pysdf 34 | - gradio==3.49.0 35 | - timm==0.6.7 36 | - detectron2 @ git+https://github.com/facebookresearch/detectron2.git@e9f7e2b 37 | - segment_anything @ git+https://github.com/facebookresearch/segment-anything.git 38 | - groundingdino @ git+https://github.com/IDEA-Research/GroundingDINO.git 39 | - openmim 40 | - pyrender 41 | -------------------------------------------------------------------------------- /lab4d/__init__.py: -------------------------------------------------------------------------------- 1 | # Decorate all modules with @record_function and @record_class 2 | import lab4d.dataloader.data_utils 3 | import lab4d.dataloader.vidloader 4 | import lab4d.engine.model 5 | import lab4d.engine.train_utils 6 | import lab4d.engine.trainer 7 | import lab4d.nnutils.appearance 8 | import lab4d.nnutils.base 9 | import lab4d.nnutils.deformable 10 | import lab4d.nnutils.embedding 11 | import lab4d.nnutils.feature 12 | import lab4d.nnutils.intrinsics 13 | import lab4d.nnutils.multifields 14 | import lab4d.nnutils.nerf 15 | import lab4d.nnutils.pose 16 | import lab4d.nnutils.skinning 17 | import lab4d.nnutils.time 18 | import lab4d.nnutils.visibility 19 | import lab4d.nnutils.warping 20 | import lab4d.utils.cam_utils 21 | import lab4d.utils.camera_utils 22 | import lab4d.utils.geom_utils 23 | import lab4d.utils.io 24 | import lab4d.utils.loss_utils 25 | import lab4d.utils.numpy_utils 26 | import lab4d.utils.quat_transform 27 | import lab4d.utils.render_utils 28 | import lab4d.utils.skel_utils 29 | import lab4d.utils.torch_utils 30 | import lab4d.utils.transforms 31 | import lab4d.utils.vis_utils 32 | from lab4d.utils.profile_utils import decorate_module 33 | 34 | decorate_module(lab4d.dataloader.data_utils) 35 | decorate_module(lab4d.dataloader.vidloader) 36 | decorate_module(lab4d.engine.model) 37 | decorate_module(lab4d.engine.trainer) 38 | decorate_module(lab4d.engine.train_utils) 39 | decorate_module(lab4d.nnutils.appearance) 40 | decorate_module(lab4d.nnutils.base) 41 | decorate_module(lab4d.nnutils.deformable) 42 | decorate_module(lab4d.nnutils.embedding) 43 | decorate_module(lab4d.nnutils.feature) 44 | decorate_module(lab4d.nnutils.intrinsics) 45 | decorate_module(lab4d.nnutils.multifields) 46 | decorate_module(lab4d.nnutils.nerf) 47 | decorate_module(lab4d.nnutils.pose) 48 | decorate_module(lab4d.nnutils.skinning) 49 | decorate_module(lab4d.nnutils.time) 50 | decorate_module(lab4d.nnutils.visibility) 51 | decorate_module(lab4d.nnutils.warping) 52 | decorate_module(lab4d.utils.camera_utils) 53 | decorate_module(lab4d.utils.cam_utils) 54 | decorate_module(lab4d.utils.geom_utils) 55 | decorate_module(lab4d.utils.io) 56 | decorate_module(lab4d.utils.loss_utils) 57 | decorate_module(lab4d.utils.numpy_utils) 58 | decorate_module(lab4d.utils.quat_transform) 59 | decorate_module(lab4d.utils.render_utils) 60 | decorate_module(lab4d.utils.skel_utils) 61 | decorate_module(lab4d.utils.torch_utils) 62 | decorate_module(lab4d.utils.transforms) 63 | decorate_module(lab4d.utils.vis_utils) 64 | -------------------------------------------------------------------------------- /lab4d/config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import os 3 | 4 | from absl import flags 5 | 6 | opts = flags.FLAGS 7 | 8 | 9 | class TrainModelConfig: 10 | # weights of reconstruction terms 11 | flags.DEFINE_float("mask_wt", 0.1, "weight for silhouette loss") 12 | flags.DEFINE_float("rgb_wt", 0.1, "weight for color loss") 13 | flags.DEFINE_float("depth_wt", 1e-4, "weight for depth loss") 14 | flags.DEFINE_float("flow_wt", 0.5, "weight for flow loss") 15 | flags.DEFINE_float("vis_wt", 1e-2, "weight for visibility loss") 16 | flags.DEFINE_float("feature_wt", 1e-2, "weight for feature reconstruction loss") 17 | flags.DEFINE_float("feat_reproj_wt", 5e-2, "weight for feature reprojection loss") 18 | 19 | # weights of regularization terms 20 | flags.DEFINE_float( 21 | "reg_visibility_wt", 1e-4, "weight for visibility regularization" 22 | ) 23 | flags.DEFINE_float("reg_eikonal_wt", 1e-3, "weight for eikonal regularization") 24 | flags.DEFINE_float( 25 | "reg_deform_cyc_wt", 0.01, "weight for deform cyc regularization" 26 | ) 27 | flags.DEFINE_float("reg_delta_skin_wt", 5e-3, "weight for delta skinning reg") 28 | flags.DEFINE_float("reg_skin_entropy_wt", 5e-4, "weight for delta skinning reg") 29 | flags.DEFINE_float( 30 | "reg_gauss_skin_wt", 1e-3, "weight for gauss skinning consistency" 31 | ) 32 | flags.DEFINE_float("reg_cam_prior_wt", 0.1, "weight for camera regularization") 33 | flags.DEFINE_float("reg_skel_prior_wt", 0.1, "weight for skeleton regularization") 34 | flags.DEFINE_float( 35 | "reg_gauss_mask_wt", 0.01, "weight for gauss mask regularization" 36 | ) 37 | flags.DEFINE_float("reg_soft_deform_wt", 100.0, "weight for soft deformation reg") 38 | 39 | # model 40 | flags.DEFINE_string("field_type", "fg", "{bg, fg, comp}") 41 | flags.DEFINE_string( 42 | "fg_motion", "rigid", "{rigid, dense, bob, skel-human, skel-quad}" 43 | ) 44 | flags.DEFINE_bool("single_inst", True, "assume the same morphology over objs") 45 | 46 | 47 | class TrainOptConfig: 48 | # io-related 49 | flags.DEFINE_string("seqname", "cat", "name of the sequence") 50 | flags.DEFINE_string("logname", "tmp", "name of the saved log") 51 | flags.DEFINE_string( 52 | "data_prefix", "crop", "prefix of the data entries, {crop, full}" 53 | ) 54 | flags.DEFINE_integer("train_res", 256, "size of training images") 55 | flags.DEFINE_string("logroot", "logdir/", "root directory for log files") 56 | flags.DEFINE_string("load_suffix", "", "sufix of params, {latest, 0, 10, ...}") 57 | flags.DEFINE_string("feature_type", "dinov2", "{dinov2, cse}") 58 | flags.DEFINE_string("load_path", "", "path to load pretrained model") 59 | 60 | # accuracy-related 61 | flags.DEFINE_float("learning_rate", 5e-4, "learning rate") 62 | flags.DEFINE_integer("num_rounds", 20, "number of rounds to train") 63 | flags.DEFINE_integer("iters_per_round", 200, "number of iterations per round") 64 | flags.DEFINE_integer("imgs_per_gpu", 128, "images samples per iter, per gpu") 65 | flags.DEFINE_integer("pixels_per_image", 16, "pixel samples per image") 66 | # flags.DEFINE_integer("imgs_per_gpu", 1, "size of minibatches per iter") 67 | # flags.DEFINE_integer("pixels_per_image", 4096, "number of pixel samples per image") 68 | flags.DEFINE_boolean( 69 | "freeze_bone_len", False, "do not change bone length of skeleton" 70 | ) 71 | flags.DEFINE_boolean( 72 | "reset_steps", 73 | True, 74 | "reset steps of loss scheduling, set to False if resuming training", 75 | ) 76 | 77 | # efficiency-related 78 | flags.DEFINE_integer("ngpu", 1, "number of gpus to use") 79 | flags.DEFINE_integer("num_workers", 2, "Number of workers for dataloading") 80 | flags.DEFINE_integer("eval_res", 64, "size used for eval visualizations") 81 | flags.DEFINE_integer("save_freq", 10, "params saving frequency") 82 | flags.DEFINE_boolean("profile", False, "profile the training loop") 83 | 84 | 85 | def get_config(): 86 | return opts.flag_values_dict() 87 | 88 | 89 | def save_config(): 90 | save_dir = os.path.join(opts.logroot, "%s-%s" % (opts.seqname, opts.logname)) 91 | os.makedirs(save_dir, exist_ok=True) 92 | opts_path = os.path.join(save_dir, "opts.log") 93 | if os.path.exists(opts_path): 94 | os.remove(opts_path) 95 | opts.append_flags_into_file(opts_path) 96 | -------------------------------------------------------------------------------- /lab4d/config_omega.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | from omegaconf import DictConfig, OmegaConf 3 | 4 | # Define the hierarchical configuration using a dictionary 5 | config = DictConfig( 6 | { 7 | "train": { 8 | "weights": { 9 | "recon": { 10 | "mask_wt": 0.1, # weight for silhouette loss 11 | "rgb_wt": 0.1, # weight for color loss 12 | "depth_wt": 0.01, # weight for depth loss 13 | "flow_wt": 0.5, # weight for flow loss 14 | "vis_wt": 0.01, # weight for visibility loss 15 | "feature_wt": 0.01, # weight for feature reconstruction loss 16 | "feat_reproj_wt": 0.05, # weight for feature reprojection loss 17 | }, 18 | "reg": { 19 | "visibility_wt": 1e-3, # weight for visibility regularization 20 | "eikonal_wt": 1e-5, # weight for eikonal regularization 21 | "deform_cyc_wt": 0.01, # weight for deform cyc regularization 22 | "gauss_skin_wt": 1, # weight for gauss skinning consistency 23 | }, 24 | }, 25 | "model": { 26 | "field_type": "bg", # {bg, fg, comp} 27 | "fg_motion": "rigid", # {rigid, dense, bob, skel} 28 | "single_inst": True, # assume the same morphology over objs 29 | }, 30 | "io": { 31 | "seqname": "cat", # name of the sequence 32 | "logname": "tmp", # name of the saved log 33 | "data_prefix": "full", # prefix of the data entries 34 | "train_res": 256, # size of training images 35 | "logroot": "logdir/", # root directory for log files 36 | "load_suffix": "", # sufix of params, {latest, 0, 10, ...} 37 | "save_freq": 10, # params saving frequency 38 | }, 39 | "optim": { 40 | "learning_rate": 5e-4, # learning rate 41 | "num_rounds": 20, # number of rounds to trainn 42 | "iters_per_round": 200, # number of iterations per round 43 | "imgs_per_gpu": 128, # images samples per iter, per gpu 44 | "pixels_per_image": 16, # pixel samples per image 45 | "ngpu": 1, # number of gpus to use 46 | "num_workers": 2, # number of workers for dataloading 47 | }, 48 | "eval_res": 64, # size used for eval visualizations 49 | "profile": False, # profile the training loop 50 | }, 51 | } 52 | ) 53 | 54 | 55 | def get_config(): 56 | return opts.flag_values_dict() 57 | 58 | 59 | def save_config(): 60 | save_dir = os.path.join(opts.logroot, opts.logname) 61 | os.makedirs(save_dir, exist_ok=True) 62 | opts_path = os.path.join(save_dir, "opts.log") 63 | if os.path.exists(opts_path): 64 | os.remove(opts_path) 65 | opts.append_flags_into_file(opts_path) 66 | 67 | 68 | # # Convert the configuration to a dictionary 69 | # config_dict = OmegaConf.to_container(config) 70 | 71 | # # Convert the dictionary back to a configuration 72 | # config2 = OmegaConf.create(config_dict) 73 | -------------------------------------------------------------------------------- /lab4d/dataloader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/dataloader/__init__.py -------------------------------------------------------------------------------- /lab4d/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/engine/__init__.py -------------------------------------------------------------------------------- /lab4d/engine/train_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import os 3 | 4 | import torch 5 | 6 | 7 | def get_local_rank(): 8 | try: 9 | return int(os.environ["LOCAL_RANK"]) 10 | except: 11 | print("LOCAL_RANK not found, set to 0") 12 | return 0 13 | 14 | 15 | class DataParallelPassthrough(torch.nn.parallel.DistributedDataParallel): 16 | """For multi-GPU access, forward attributes to the inner module.""" 17 | 18 | def __getattr__(self, name): 19 | try: 20 | return super().__getattr__(name) 21 | except AttributeError: 22 | return getattr(self.module, name) 23 | 24 | def __delattr__(self, name): 25 | try: 26 | return super().__delattr__(name) 27 | except AttributeError: 28 | return delattr(self.module, name) 29 | -------------------------------------------------------------------------------- /lab4d/nnutils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/nnutils/__init__.py -------------------------------------------------------------------------------- /lab4d/nnutils/appearance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import torch 3 | import torch.nn as nn 4 | 5 | from lab4d.nnutils.time import TimeMLP 6 | 7 | 8 | class AppearanceEmbedding(TimeMLP): 9 | """Encode global appearance code over time with an MLP 10 | 11 | Args: 12 | frame_info (Dict): Metadata about the frames in a dataset 13 | appr_channels (int): Number of channels in appearance codes 14 | D (int): Number of linear layers 15 | W (int): Number of hidden units in each MLP layer 16 | num_freq_t (int): Number of frequencies in the time embedding 17 | skips (List(int)): List of layers to add skip connections at 18 | activation (Function): Activation function to use (e.g. nn.ReLU()) 19 | """ 20 | 21 | def __init__( 22 | self, 23 | frame_info, 24 | appr_channels, 25 | D=2, 26 | W=64, 27 | num_freq_t=6, 28 | skips=[], 29 | activation=nn.ReLU(True), 30 | time_scale=0.1, 31 | ): 32 | self.appr_channels = appr_channels 33 | # xyz encoding layers 34 | super().__init__( 35 | frame_info, 36 | D=D, 37 | W=W, 38 | num_freq_t=num_freq_t, 39 | skips=skips, 40 | activation=activation, 41 | time_scale=time_scale, 42 | ) 43 | 44 | # output layers 45 | self.output = nn.Linear(W, appr_channels) 46 | 47 | def forward(self, t_embed): 48 | """ 49 | Args: 50 | t: (..., self.W) Input time embeddings 51 | Returns: 52 | out: (..., appr_channels) Output appearance codes 53 | """ 54 | t_feat = super().forward(t_embed) 55 | out = self.output(t_feat) 56 | return out 57 | -------------------------------------------------------------------------------- /lab4d/nnutils/intrinsics.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | 6 | from lab4d.nnutils.time import TimeMLP 7 | 8 | 9 | class IntrinsicsMLP(TimeMLP): 10 | """Encode camera intrinsics over time with an MLP 11 | 12 | Args: 13 | intrinsics: (N,4) Camera intrinsics (fx, fy, cx, cy) 14 | frame_info (Dict): Metadata about the frames in a dataset 15 | D (int): Number of linear layers 16 | W (int): Number of hidden units in each MLP layer 17 | num_freq_t (int): Number of frequencies in the time embedding 18 | skips (List(int)): List of layers to add skip connections at 19 | activation (Function): Activation function to use (e.g. nn.ReLU()) 20 | time_scale (float): Control the sensitivity to time by scaling. 21 | Lower values make the module less sensitive to time. 22 | """ 23 | 24 | def __init__( 25 | self, 26 | intrinsics, 27 | frame_info=None, 28 | D=5, 29 | W=256, 30 | num_freq_t=0, 31 | skips=[], 32 | activation=nn.ReLU(True), 33 | time_scale=0.1, 34 | ): 35 | if frame_info is None: 36 | num_frames = len(intrinsics) 37 | frame_info = { 38 | "frame_offset": np.asarray([0, num_frames]), 39 | "frame_mapping": list(range(num_frames)), 40 | "frame_offset_raw": np.asarray([0, num_frames]), 41 | } 42 | # xyz encoding layers 43 | super().__init__( 44 | frame_info, 45 | D=D, 46 | W=W, 47 | num_freq_t=num_freq_t, 48 | skips=skips, 49 | activation=activation, 50 | time_scale=time_scale, 51 | ) 52 | 53 | # output layers 54 | self.focal = nn.Sequential( 55 | nn.Linear(W, W // 2), 56 | activation, 57 | nn.Linear(W // 2, 2), 58 | ) 59 | 60 | # camera intrinsics: fx,fy,px,py 61 | self.base_logfocal = nn.Parameter(torch.zeros(self.time_embedding.num_vids, 2)) 62 | self.base_ppoint = nn.Parameter(torch.zeros(self.time_embedding.num_vids, 2)) 63 | self.register_buffer( 64 | "init_vals", torch.tensor(intrinsics, dtype=torch.float32), persistent=False 65 | ) 66 | 67 | def mlp_init(self): 68 | """Initialize camera intrinsics from external values""" 69 | intrinsics = self.init_vals 70 | frame_offset = self.get_frame_offset() 71 | self.base_logfocal.data = intrinsics[frame_offset[:-1], :2].log() 72 | self.base_ppoint.data = intrinsics[frame_offset[:-1], 2:] 73 | super().mlp_init(termination_loss=1.0) 74 | 75 | def forward(self, t_embed): 76 | """ 77 | Args: 78 | t_embed: (..., self.W) Input Fourier time embeddings 79 | Returns: 80 | out: (..., 4) Camera intrinsics 81 | """ 82 | t_feat = super().forward(t_embed) 83 | focal = self.focal(t_feat).exp() 84 | return focal 85 | 86 | def get_vals(self, frame_id=None): 87 | """Compute camera intrinsics at the given frames. 88 | 89 | Args: 90 | frame_id: (...,) Frame id. If None, compute at all frames 91 | Returns: 92 | intrinsics: (..., 4) Output camera intrinsics 93 | """ 94 | t_embed = self.time_embedding(frame_id) 95 | focal = self.forward(t_embed) 96 | if frame_id is None: 97 | inst_id = self.time_embedding.frame_to_vid 98 | else: 99 | inst_id = self.time_embedding.raw_fid_to_vid[frame_id] 100 | base_focal = self.base_logfocal[inst_id].exp() 101 | base_ppoint = self.base_ppoint[inst_id] 102 | focal = focal * base_focal 103 | # force square pixels 104 | focal[..., :] = (focal + focal.flip(-1)) / 2 105 | ppoint = base_ppoint.expand_as(focal) 106 | intrinsics = torch.cat([focal, ppoint], dim=-1) 107 | return intrinsics 108 | -------------------------------------------------------------------------------- /lab4d/nnutils/time.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from lab4d.nnutils.base import BaseMLP 8 | from lab4d.nnutils.embedding import PosEmbedding, TimeEmbedding, get_fourier_embed_dim 9 | 10 | 11 | class TimeMLP(BaseMLP): 12 | """MLP that encodes a quantity over time. 13 | 14 | Args: 15 | frame_info (Dict): Metadata about the frames in a dataset 16 | D (int): Number of linear layers 17 | W (int): Number of hidden units in each MLP layer 18 | num_freq_t (int): Number of frequencies in the time embedding 19 | skips (List(int)): List of layers to add skip connections at 20 | activation (Function): Activation function to use (e.g. nn.ReLU()) 21 | time_scale (float): Control the sensitivity to time by scaling. 22 | Lower values make the module less sensitive to time. 23 | """ 24 | 25 | def __init__( 26 | self, 27 | frame_info, 28 | D=5, 29 | W=256, 30 | num_freq_t=6, 31 | skips=[], 32 | activation=nn.ReLU(True), 33 | time_scale=1.0, 34 | ): 35 | frame_offset = frame_info["frame_offset"] 36 | # frame_offset_raw = frame_info["frame_offset_raw"] 37 | if num_freq_t > 0: 38 | max_ts = (frame_offset[1:] - frame_offset[:-1]).max() 39 | # scale according to input frequency: num_frames = 64 -> freq = 6 40 | num_freq_t = np.log2(max_ts / 64) + num_freq_t 41 | # # scale according to input frequency: num_frames = 512 -> freq = 6 42 | # num_freq_t = np.log2(max_ts / 512) + num_freq_t 43 | num_freq_t = int(np.rint(num_freq_t)) 44 | # print("max video len: %d, override num_freq_t to %d" % (max_ts, num_freq_t)) 45 | 46 | super().__init__( 47 | D=D, 48 | W=W, 49 | in_channels=W, 50 | out_channels=W, 51 | skips=skips, 52 | activation=activation, 53 | final_act=True, 54 | ) 55 | 56 | self.time_embedding = TimeEmbedding( 57 | num_freq_t, frame_info, out_channels=W, time_scale=time_scale 58 | ) 59 | 60 | def loss_fn(y): 61 | x = self.get_vals() 62 | return F.mse_loss(x, y) 63 | 64 | self.loss_fn = loss_fn 65 | 66 | def forward(self, t_embed): 67 | """ 68 | Args: 69 | t_embed: (..., self.W) Time Fourier embeddings 70 | Returns: 71 | out: (..., self.W) Time-dependent features 72 | """ 73 | t_feat = super().forward(t_embed) 74 | return t_feat 75 | 76 | def mlp_init(self, loss_fn=None, termination_loss=0.0001): 77 | """Initialize the time embedding MLP to match external priors. 78 | `self.init_vals` is defined by the child class, and could be 79 | (nframes, 4, 4) camera poses or (nframes, 4) camera intrinsics 80 | """ 81 | if loss_fn is None: 82 | loss_fn = self.loss_fn 83 | 84 | optimizer = torch.optim.Adam(self.parameters(), lr=1e-3) 85 | 86 | i = 0 87 | while True: 88 | optimizer.zero_grad() 89 | loss = loss_fn(self.init_vals) 90 | loss.backward() 91 | optimizer.step() 92 | if i % 100 == 0: 93 | print(f"iter: {i}, loss: {loss.item():.4f}") 94 | i += 1 95 | if loss < termination_loss: 96 | break 97 | 98 | def compute_distance_to_prior(self): 99 | """Compute L2-distance from current SE(3) / intrinsics values to 100 | external priors. 101 | 102 | Returns: 103 | loss (0,): Mean squared error to priors 104 | """ 105 | return self.loss_fn(self.init_vals) 106 | 107 | def get_vals(self, frame_id=None): 108 | """Compute values at the given frames. 109 | 110 | Args: 111 | frame_id: (...,) Frame id. If None, evaluate at all frames 112 | Returns: 113 | pred: Predicted outputs 114 | """ 115 | t_embed = self.time_embedding(frame_id) 116 | pred = self.forward(t_embed) 117 | return pred 118 | 119 | def get_mean_vals(self): 120 | """Compute the mean embedding over all frames""" 121 | device = self.parameters().__next__().device 122 | t_embed = self.time_embedding.get_mean_embedding(device) 123 | pred = self.forward(t_embed) 124 | return pred 125 | 126 | def get_frame_offset(self): 127 | """Return the number of frames before the first frame of each video""" 128 | return self.time_embedding.frame_offset 129 | -------------------------------------------------------------------------------- /lab4d/nnutils/visibility.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import torch 3 | from torch import nn 4 | 5 | from lab4d.nnutils.base import CondMLP 6 | from lab4d.nnutils.embedding import PosEmbedding 7 | 8 | 9 | class VisField(nn.Module): 10 | """Predict a visibility score (-inf to +inf) for all 3D points 11 | 12 | Args: 13 | num_inst (int): Number of distinct object instances. If --nosingle_inst 14 | is passed, this is equal to the number of videos, as we assume each 15 | video captures a different instance. Otherwise, we assume all videos 16 | capture the same instance and set this to 1. 17 | D (int): Number of linear layers 18 | W (int): Number of hidden units in each MLP layer 19 | num_freq_xyz (int): Number of frequencies in position embedding 20 | inst_channels (int): Number of channels in the instance code 21 | skips (List(int)): List of layers to add skip connections at 22 | activation (Function): Activation function to use (e.g. nn.ReLU()) 23 | """ 24 | 25 | def __init__( 26 | self, 27 | num_inst, 28 | D=2, 29 | W=64, 30 | num_freq_xyz=10, 31 | inst_channels=32, 32 | skips=[4], 33 | activation=nn.ReLU(True), 34 | ): 35 | super().__init__() 36 | 37 | # position and direction embedding 38 | self.pos_embedding = PosEmbedding(3, num_freq_xyz) 39 | 40 | # xyz encoding layers 41 | self.basefield = CondMLP( 42 | num_inst=num_inst, 43 | D=D, 44 | W=W, 45 | in_channels=self.pos_embedding.out_channels, 46 | inst_channels=inst_channels, 47 | out_channels=1, 48 | skips=skips, 49 | activation=activation, 50 | final_act=False, 51 | ) 52 | 53 | def forward(self, xyz, inst_id=None): 54 | """ 55 | Args: 56 | xyz: (..., 3), xyz coordinates 57 | inst_id: (...,) instance id, or None to use the average instance 58 | Returns: 59 | out: (..., 1), visibility score 60 | """ 61 | xyz_embed = self.pos_embedding(xyz) 62 | visibility = self.basefield(xyz_embed, inst_id) 63 | return visibility 64 | -------------------------------------------------------------------------------- /lab4d/reanimate.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # python lab4d/reanimate.py --flagfile=logdir/human-48-dinov2-skel-e120/opts.log --load_suffix latest --motion_id 20 --inst_id 0 3 | 4 | import json 5 | import os 6 | import sys 7 | 8 | import numpy as np 9 | import torch 10 | import torch.backends.cudnn as cudnn 11 | from absl import app, flags 12 | 13 | cwd = os.getcwd() 14 | if cwd not in sys.path: 15 | sys.path.insert(0, cwd) 16 | 17 | from lab4d.config import get_config 18 | from lab4d.render import construct_batch_from_opts, render 19 | from lab4d.utils.profile_utils import torch_profile 20 | from lab4d.utils.quat_transform import se3_to_quaternion_translation 21 | 22 | cudnn.benchmark = True 23 | 24 | 25 | class RenderFlags: 26 | """Flags for the renderer.""" 27 | 28 | flags.DEFINE_integer("motion_id", 0, "motion id") 29 | 30 | 31 | def construct_batch_from_opts_reanimate(opts, model, data_info): 32 | device = "cuda" 33 | # load motion data 34 | motion_path = "%s/%s-%s/export_%04d/fg-motion.json" % ( 35 | opts["logroot"], 36 | opts["seqname"], 37 | opts["logname"], 38 | opts["motion_id"], 39 | ) 40 | with open(motion_path, "r") as fp: 41 | motion_data = json.load(fp) 42 | t_articulation = np.asarray(motion_data["t_articulation"]) 43 | field2cam = np.asarray(motion_data["field2cam"]) 44 | 45 | opts["num_frames"] = len(t_articulation) 46 | 47 | # joint angles 48 | joint_so3 = np.asarray(motion_data["joint_so3"]) 49 | joint_so3 = torch.tensor(joint_so3, dtype=torch.float32, device=device) 50 | 51 | # root pose 52 | field2cam = torch.tensor(field2cam, dtype=torch.float32, device=device) 53 | field2cam = field2cam.reshape(-1, 4, 4) 54 | field2cam = se3_to_quaternion_translation(field2cam, tuple=False) 55 | 56 | batch, raw_size = construct_batch_from_opts(opts, model, data_info) 57 | 58 | batch["joint_so3"] = joint_so3 59 | batch["field2cam"] = {"fg": field2cam} 60 | return batch, raw_size 61 | 62 | 63 | def main(_): 64 | opts = get_config() 65 | render(opts, construct_batch_func=construct_batch_from_opts_reanimate) 66 | 67 | 68 | if __name__ == "__main__": 69 | app.run(main) 70 | -------------------------------------------------------------------------------- /lab4d/tests/hat_map.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import torch 3 | 4 | from lab4d.utils.geom_utils import hat_map, so3_to_exp_map 5 | 6 | 7 | @torch.jit.script 8 | def hat(v: torch.Tensor) -> torch.Tensor: 9 | """ 10 | Compute the Hat operator [1] of a batch of 3D vectors. 11 | 12 | Args: 13 | v: Batch of vectors of shape `(minibatch , 3)`. 14 | 15 | Returns: 16 | Batch of skew-symmetric matrices of shape 17 | `(minibatch, 3 , 3)` where each matrix is of the form: 18 | `[ 0 -v_z v_y ] 19 | [ v_z 0 -v_x ] 20 | [ -v_y v_x 0 ]` 21 | 22 | Raises: 23 | ValueError if `v` is of incorrect shape. 24 | 25 | [1] https://en.wikipedia.org/wiki/Hat_operator 26 | """ 27 | 28 | N, dim = v.shape 29 | if dim != 3: 30 | raise ValueError("Input vectors have to be 3-dimensional.") 31 | 32 | h = torch.zeros((N, 3, 3), dtype=v.dtype, device=v.device) 33 | 34 | x, y, z = v.unbind(1) 35 | 36 | h[:, 0, 1] = -z 37 | h[:, 0, 2] = y 38 | h[:, 1, 0] = z 39 | h[:, 1, 2] = -x 40 | h[:, 2, 0] = -y 41 | h[:, 2, 1] = x 42 | 43 | return h 44 | 45 | 46 | def so3_exp_map(log_rot, eps=0.0001): 47 | """ 48 | A helper function that computes the so3 exponential map and, 49 | apart from the rotation matrix, also returns intermediate variables 50 | that can be re-used in other functions. 51 | """ 52 | _, dim = log_rot.shape 53 | if dim != 3: 54 | raise ValueError("Input tensor shape has to be Nx3.") 55 | 56 | nrms = (log_rot * log_rot).sum(1) 57 | # phis ... rotation angles 58 | rot_angles = torch.clamp(nrms, eps).sqrt() 59 | # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. 60 | rot_angles_inv = 1.0 / rot_angles 61 | fac1 = rot_angles_inv * rot_angles.sin() 62 | fac2 = rot_angles_inv * rot_angles_inv * (1.0 - rot_angles.cos()) 63 | skews = hat(log_rot) 64 | skews_square = torch.bmm(skews, skews) 65 | 66 | R = ( 67 | fac1[:, None, None] * skews 68 | # pyre-fixme[16]: `float` has no attribute `__getitem__`. 69 | + fac2[:, None, None] * skews_square 70 | + torch.eye(3, dtype=log_rot.dtype, device=log_rot.device)[None] 71 | ) 72 | 73 | return R 74 | 75 | 76 | def test_hat_map(): 77 | # Define a test input tensor 78 | v = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float32) 79 | # Compute the skew-symmetric matrices using the hat_map function 80 | V = hat_map(v) 81 | # Verify that the output has the correct shape 82 | assert V.shape == (3, 3, 3) 83 | # Verify that the output is correct 84 | expected_V = torch.tensor( 85 | [ 86 | [[0, -3, 2], [3, 0, -1], [-2, 1, 0]], 87 | [[0, -6, 5], [6, 0, -4], [-5, 4, 0]], 88 | [[0, -9, 8], [9, 0, -7], [-8, 7, 0]], 89 | ], 90 | dtype=torch.float32, 91 | ) 92 | if not torch.allclose(V, expected_V): 93 | print("Computed output:") 94 | print(V) 95 | print("Expected output:") 96 | print(expected_V) 97 | assert torch.allclose(V, expected_V) 98 | 99 | 100 | def test_so3_to_exp_map(): 101 | so3 = torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) 102 | exp_map = so3_exp_map(so3) 103 | computed_exp_map = so3_to_exp_map(so3) 104 | if not torch.allclose(computed_exp_map, exp_map): 105 | print("Computed output:") 106 | print(computed_exp_map) 107 | print("Expected output:") 108 | print(exp_map) 109 | 110 | 111 | test_so3_to_exp_map() 112 | test_hat_map() 113 | -------------------------------------------------------------------------------- /lab4d/tests/test_gpu_map.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Jeff Tan, Carnegie Mellon University. 2 | import numpy as np 3 | import torch 4 | 5 | from lab4d.tests.utils import check_func 6 | from lab4d.utils.gpu_utils import gpu_map 7 | 8 | 9 | def func(arg1, arg2): 10 | x = torch.ones(arg1, arg2, dtype=torch.int64, device="cuda") 11 | return int(torch.sum(x)) 12 | 13 | 14 | def test_gpu_map_static(n_elts): 15 | """Test utils/proc_utils.py::gpu_map_static""" 16 | 17 | def impl1(n_elts): 18 | return [(i + 1) * (i + 2) for i in range(n_elts)] 19 | 20 | def impl2(n_elts): 21 | return gpu_map(func, [(x + 1, x + 2) for x in range(n_elts)], method="static") 22 | 23 | check_func(impl1, impl2, (n_elts,), name="gpu_map_static", niters=1) 24 | 25 | 26 | def test_gpu_map_dynamic(n_elts): 27 | """Test utils/proc_utils.py::gpu_map_dynamic""" 28 | 29 | def impl1(n_elts): 30 | return [(i + 1) * (i + 2) for i in range(n_elts)] 31 | 32 | def impl2(n_elts): 33 | return gpu_map(func, [(x + 1, x + 2) for x in range(n_elts)], method="dynamic") 34 | 35 | check_func(impl1, impl2, (n_elts,), name="gpu_map_dynamic", niters=1) 36 | 37 | 38 | if __name__ == "__main__": 39 | test_gpu_map_static(11) 40 | # test_gpu_map_dynamic(11) 41 | -------------------------------------------------------------------------------- /lab4d/tests/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Jeff Tan, Carnegie Mellon University. 2 | import time 3 | from statistics import mean, stdev 4 | 5 | import numpy as np 6 | import torch 7 | 8 | 9 | def check_func(func1, func2, args=(), name="", niters=100, rtol=None, atol=None): 10 | """Verify that both input functions produce identical outputs 11 | 12 | Args: 13 | func1: First function to test 14 | func2: Second function to test 15 | args: Arguments to both functions 16 | name: Name of this test 17 | niters: Number of test iterations (default 5) 18 | rtol: Relative tolerance (by default, selected based on datatype) 19 | atol: Absolute tolerance (by default, selected based on datatype) 20 | """ 21 | # Make sure cuda is already loaded 22 | torch.zeros(1, dtype=torch.float32, device="cuda") 23 | 24 | all_t1 = [] 25 | all_t2 = [] 26 | for i in range(niters): 27 | torch.cuda.synchronize() 28 | t1 = time.time() 29 | out1 = func1(*args) 30 | torch.cuda.synchronize() 31 | all_t1.append(time.time() - t1) 32 | 33 | torch.cuda.synchronize() 34 | t2 = time.time() 35 | out2 = func2(*args) 36 | torch.cuda.synchronize() 37 | all_t2.append(time.time() - t2) 38 | 39 | try: 40 | assert type(out1) == type(out2) 41 | if isinstance(out1, torch.Tensor) and isinstance(out2, torch.Tensor): 42 | torch.testing.assert_close(out1, out2, rtol=rtol, atol=atol) 43 | elif isinstance(out1, np.ndarray) and isinstance(out2, np.ndarray): 44 | np.testing.assert_allclose(out1, out2, rtol=rtol, atol=atol) 45 | else: 46 | assert all( 47 | elt1 == elt2 for elt1, elt2 in zip(out1, out2) 48 | ), f"out1={out1} but out2={out2}" 49 | except Exception as e: 50 | print(f"Error: {e}") 51 | 52 | all_t1 = all_t1[10:] # Remove the first few iterations to account for warmup 53 | all_t2 = all_t2[10:] 54 | avg_t1 = 1000 * mean(all_t1) # milliseconds 55 | avg_t2 = 1000 * mean(all_t2) 56 | std_t1 = 1000 * stdev(all_t1) if len(all_t1) > 1 else 0 57 | std_t2 = 1000 * stdev(all_t2) if len(all_t1) > 1 else 0 58 | 59 | print( 60 | f"Test '{name}' passed:\tavg_t1={avg_t1:.2f}ms,\tavg_t2={avg_t2:.2f}ms," 61 | f"\tstd_t1={std_t1:.2f}ms,\tstd_t2={std_t2:.2f}ms" 62 | ) 63 | -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | quaternion.egg-info/ 3 | dist/ 4 | -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/README.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University. 2 | The code is released as [dqtorch](https://github.com/MightyChaos/dqtorch). 3 | Please refer to the repository for tutorials and general use cases. 4 | -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/__init__.py: -------------------------------------------------------------------------------- 1 | ## Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University. 2 | from .mat3x3 import mat3x3_inv 3 | from .quaternion import quaternion_conjugate, quaternion_mul 4 | -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/add_gcc_cuda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | module add gcc-6.3.0 3 | module add cuda-11.1.1 4 | -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/backend.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University. 2 | import os 3 | 4 | from torch.utils.cpp_extension import load 5 | 6 | _src_path = os.path.dirname(os.path.abspath(__file__)) 7 | 8 | nvcc_flags = [ 9 | '-O3', '-std=c++14', 10 | '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__', 11 | ] 12 | 13 | if os.name == "posix": 14 | c_flags = ['-O3', '-std=c++14'] 15 | elif os.name == "nt": 16 | c_flags = ['/O2', '/std:c++17'] 17 | 18 | # find cl.exe 19 | def find_cl_path(): 20 | import glob 21 | for edition in ["Enterprise", "Professional", "BuildTools", "Community"]: 22 | paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True) 23 | if paths: 24 | return paths[0] 25 | 26 | # If cl.exe is not on path, try to find it. 27 | if os.system("where cl.exe >nul 2>nul") != 0: 28 | cl_path = find_cl_path() 29 | if cl_path is None: 30 | raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") 31 | os.environ["PATH"] += ";" + cl_path 32 | 33 | _backend = load(name='_quaternion', 34 | extra_cflags=c_flags, 35 | extra_cuda_cflags=nvcc_flags, 36 | sources=[os.path.join(_src_path, 'src', f) for f in [ 37 | 'quaternion.cu', 38 | 'matinv.cu', 39 | 'bindings.cpp', 40 | ]], 41 | ) 42 | 43 | __all__ = ['_backend'] -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/mat3x3.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University. 2 | import torch 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.cuda.amp import custom_bwd, custom_fwd 6 | 7 | try: 8 | import _quaternion as _backend 9 | except ImportError: 10 | from .backend import _backend 11 | 12 | 13 | class _Mat3x3_det(Function): 14 | @staticmethod 15 | @custom_fwd(cast_inputs=torch.float) 16 | def forward(ctx, inputs:torch.Tensor): 17 | B = inputs.shape[0] 18 | assert(inputs.shape[1] == 9) 19 | dtype = inputs.dtype 20 | device = inputs.device 21 | 22 | outputs = torch.empty(B, dtype=dtype, device=device) 23 | 24 | _backend.mat3x3_det_forward(inputs, outputs, B) 25 | ctx.save_for_backward(inputs) 26 | 27 | return outputs 28 | 29 | @staticmethod 30 | @once_differentiable 31 | @custom_bwd 32 | def backward(ctx, grad): 33 | return None 34 | 35 | _mat3x3_det = _Mat3x3_det.apply 36 | def mat3x3_det(inputs:torch.Tensor): 37 | rt_size = inputs.shape[:-2] 38 | outputs = _mat3x3_det(inputs.contiguous().view(-1,9)) 39 | return outputs.view(rt_size) 40 | 41 | 42 | class _Mat3x3_scale_adjoint(Function): 43 | @staticmethod 44 | @custom_fwd(cast_inputs=torch.half) 45 | def forward(ctx, inputs:torch.Tensor, scales:torch.Tensor): 46 | B = inputs.shape[0] 47 | assert(inputs.shape[1] == 9) 48 | dtype = inputs.dtype 49 | device = inputs.device 50 | outputs = torch.empty(B, 9, dtype=dtype, device=device) 51 | _backend.mat3x3_scale_adjoint_forward(inputs, scales, outputs, B) 52 | ctx.save_for_backward(inputs, scales) 53 | return outputs 54 | 55 | @staticmethod 56 | @once_differentiable 57 | @custom_bwd 58 | def backward(ctx, *grad_outputs): 59 | return None 60 | 61 | _mat3x3_scale_adjoint = _Mat3x3_scale_adjoint.apply 62 | def mat3x3_scale_adjoint(inputs:torch.Tensor, scales:torch.Tensor): 63 | rt_size = inputs.shape 64 | outputs = _mat3x3_scale_adjoint(inputs.contiguous().view(-1,9), scales.contiguous().view(-1)) 65 | return outputs.view(rt_size) 66 | 67 | 68 | class _Mat3x3_inv(Function): 69 | @staticmethod 70 | @custom_fwd(cast_inputs=torch.float) 71 | def forward(ctx, inputs:torch.Tensor): 72 | B = inputs.shape[0] 73 | assert(inputs.shape[1] == 9) 74 | dtype = inputs.dtype 75 | device = inputs.device 76 | scales = torch.empty(B, dtype=dtype, device=device) 77 | outputs = torch.empty(B, 9, dtype=dtype, device=device) 78 | _backend.mat3x3_inv_forward(inputs, outputs, scales, B) 79 | ctx.save_for_backward(outputs, scales) 80 | # print(scales) 81 | return outputs 82 | 83 | @staticmethod 84 | @once_differentiable 85 | @custom_bwd 86 | def backward(ctx, grad): 87 | inv_mats, _ = ctx.saved_tensors 88 | B = inv_mats.shape[0] 89 | assert(inv_mats.shape[1] == 9) 90 | dtype = inv_mats.dtype 91 | device = inv_mats.device 92 | grad_inputs = torch.empty(B, 9, dtype=dtype, device=device) 93 | _backend.mat3x3_inv_backward(grad, inv_mats, grad_inputs, B) 94 | return grad_inputs 95 | 96 | 97 | 98 | _mat3x3_inv = _Mat3x3_inv.apply 99 | def mat3x3_inv(inputs:torch.Tensor): 100 | rt_size = inputs.shape 101 | outputs = _mat3x3_inv(inputs.contiguous().view(-1,9)) 102 | return outputs.view(rt_size) 103 | 104 | def _test_mat3x3_inv_backward(x:torch.Tensor): 105 | x_inv = mat3x3_inv(x) 106 | loss = x_inv.mean() 107 | loss.backward() 108 | 109 | def _test(): 110 | import torch.utils.benchmark as benchmark 111 | N = 4096*128 112 | # N = 100 113 | x = torch.randn(N, 3, 3, requires_grad=True).float().cuda() 114 | x_det = mat3x3_det(x) 115 | 116 | # torch.autograd.gradcheck(mat3x3_inv, x) 117 | 118 | T = 100 119 | t = benchmark.Timer( 120 | stmt='mat3x3_det(x)', 121 | setup='from __main__ import mat3x3_det', 122 | globals={'x': x}) 123 | print(t.timeit(T)) 124 | 125 | x_adj = mat3x3_scale_adjoint(x, x_det) 126 | T = 100 127 | t = benchmark.Timer( 128 | stmt='mat3x3_scale_adjoint(x, x_det)', 129 | setup='from __main__ import mat3x3_scale_adjoint', 130 | globals={'x': x, 'x_det':x_det}) 131 | print(t.timeit(T)) 132 | 133 | # check correctness 134 | print(x @ x_adj) 135 | 136 | x_inv = mat3x3_inv(x) 137 | print(x @ x_inv) 138 | T = 100 139 | t = benchmark.Timer( 140 | stmt='mat3x3_inv(x)', 141 | setup='from __main__ import mat3x3_inv', 142 | globals={'x': x}) 143 | print(t.timeit(T)) 144 | 145 | T = 100 146 | t = benchmark.Timer( 147 | stmt='_test_mat3x3_inv_backward(x)', 148 | setup='from __main__ import _test_mat3x3_inv_backward', 149 | globals={'x': x}) 150 | print(t.timeit(T)) 151 | 152 | 153 | if __name__ == '__main__': 154 | _test() 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/quaternion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University. 2 | import torch 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.cuda.amp import custom_bwd, custom_fwd 6 | 7 | try: 8 | import _quaternion as _backend 9 | except ImportError: 10 | from .backend import _backend 11 | 12 | class _Quaternion_mul_backward(Function): 13 | @staticmethod 14 | @custom_fwd(cast_inputs=torch.half) 15 | def forward(ctx, grad, inputs_1, inputs_2): 16 | B = inputs_1.shape[0] # batch size, coord dim 17 | D1 = inputs_1.shape[1] 18 | D2 = inputs_2.shape[1] 19 | dtype, device = inputs_1.dtype, inputs_1.device 20 | grad_inputs_1 = torch.empty(B, D1, device=device, dtype=dtype) 21 | grad_inputs_2 = torch.empty(B, D2, device=device, dtype=dtype) 22 | _backend.quaternion_mul_backward(grad, B, D1, D2, inputs_1, inputs_2, grad_inputs_1, grad_inputs_2) 23 | ctx.save_for_backward(grad, inputs_1, inputs_2) 24 | return grad_inputs_1, grad_inputs_2 25 | 26 | @staticmethod 27 | @once_differentiable 28 | @custom_bwd 29 | def backward(ctx, *grad_outputs): 30 | grad_out_1, grad_out_2 = grad_outputs 31 | grad, inputs_1, inputs_2 = ctx.saved_tensors 32 | B = inputs_1.shape[0] # batch size, coord dim 33 | D1 = inputs_1.shape[1] 34 | D2 = inputs_2.shape[1] 35 | dtype, device = inputs_1.dtype, inputs_1.device 36 | grad_grad = torch.empty(B, 4, device=device, dtype=dtype) 37 | grad_grad_inputs_1 = torch.empty(B, D1, device=device, dtype=dtype) 38 | grad_grad_inputs_2 = torch.empty(B, D2, device=device, dtype=dtype) 39 | _backend.quaternion_mul_backward_backward(grad_out_1, grad_out_2, 40 | B, D1, D2, 41 | grad, inputs_1, inputs_2, 42 | grad_grad, grad_grad_inputs_1, grad_grad_inputs_2) 43 | return grad_grad, grad_grad_inputs_1, grad_grad_inputs_2 44 | 45 | _quaternion_mul_backward = _Quaternion_mul_backward.apply 46 | 47 | class _Quaternion_mul(Function): 48 | @staticmethod 49 | @custom_fwd(cast_inputs=torch.half) 50 | def forward(ctx, inputs_1:torch.Tensor, inputs_2:torch.Tensor): 51 | # inputs: [B, input_dim], float in [-1, 1] 52 | # RETURN: [B, F], float 53 | calc_grad_inputs = inputs_1.requires_grad or inputs_2.requires_grad 54 | 55 | inputs_1 = inputs_1.contiguous() 56 | inputs_2 = inputs_2.contiguous() 57 | 58 | B = inputs_1.shape[0] # batch size, coord dim 59 | D1 = inputs_1.shape[1] 60 | D2 = inputs_2.shape[1] 61 | 62 | dtype = inputs_1.dtype 63 | device = inputs_1.device 64 | 65 | outputs = torch.empty(B, 4, dtype=dtype, device=device) 66 | 67 | 68 | _backend.quaternion_mul_forward(inputs_1, inputs_2, outputs, B, D1, D2) 69 | 70 | ctx.save_for_backward(inputs_1, inputs_2) 71 | 72 | 73 | return outputs 74 | 75 | @staticmethod 76 | @custom_bwd 77 | def backward(ctx, grad): 78 | # grad: [B, C * C] 79 | 80 | grad = grad.contiguous() 81 | inputs_1, inputs_2 = ctx.saved_tensors 82 | 83 | grad_inputs_1, grad_inputs_2 = _quaternion_mul_backward(grad, inputs_1, inputs_2) 84 | 85 | return grad_inputs_1, grad_inputs_2 86 | # else: 87 | # return None, None 88 | 89 | 90 | 91 | quaternion_mul = _Quaternion_mul.apply 92 | 93 | 94 | class _Quaternion_conjugate(torch.autograd.Function): 95 | @staticmethod 96 | @custom_fwd(cast_inputs=torch.half) 97 | def forward(ctx, inputs:torch.Tensor): 98 | B = inputs.shape[0] # batch size, coord dim 99 | outputs = torch.empty_like(inputs) 100 | _backend.quaternion_conjugate(inputs.contiguous(), B, outputs) 101 | return outputs 102 | 103 | @staticmethod 104 | @custom_bwd 105 | def backward(ctx, grad): 106 | return _Quaternion_conjugate.apply(grad) 107 | 108 | 109 | quaternion_conjugate = _Quaternion_conjugate.apply 110 | 111 | -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University. 2 | import os 3 | 4 | from setuptools import setup 5 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 6 | 7 | _src_path = os.path.dirname(os.path.abspath(__file__)) 8 | 9 | nvcc_flags = [ 10 | '-O3', '-std=c++14', 11 | '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__', 12 | ] 13 | 14 | if os.name == "posix": 15 | c_flags = ['-O3', '-std=c++14'] 16 | elif os.name == "nt": 17 | c_flags = ['/O2', '/std:c++17'] 18 | 19 | # find cl.exe 20 | def find_cl_path(): 21 | import glob 22 | for edition in ["Enterprise", "Professional", "BuildTools", "Community"]: 23 | paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True) 24 | if paths: 25 | return paths[0] 26 | 27 | # If cl.exe is not on path, try to find it. 28 | if os.system("where cl.exe >nul 2>nul") != 0: 29 | cl_path = find_cl_path() 30 | if cl_path is None: 31 | raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") 32 | os.environ["PATH"] += ";" + cl_path 33 | 34 | setup( 35 | name='quaternion', # package name, import this to use python API 36 | ext_modules=[ 37 | CUDAExtension( 38 | name='_quaternion', # extension name, import this to use CUDA API 39 | sources=[os.path.join(_src_path, 'src', f) for f in [ 40 | 'quaternion.cu', 41 | 'matinv.cu', 42 | 'bindings.cpp', 43 | ]], 44 | extra_compile_args={ 45 | 'cxx': c_flags, 46 | 'nvcc': nvcc_flags, 47 | } 48 | ), 49 | ], 50 | cmdclass={ 51 | 'build_ext': BuildExtension, 52 | } 53 | ) -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University. 2 | #include 3 | 4 | #include "quaternion.h" 5 | #include "matinv.h" 6 | 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 8 | m.def("quaternion_mul_forward", &quaternion_mul_forward, "quaternion multiplication forward (CUDA)"); 9 | m.def("quaternion_mul_backward", &quaternion_mul_backward, "quaternion multiplication backward (CUDA)"); 10 | m.def("quaternion_mul_backward_backward", &quaternion_mul_backward_backward, "quaternion multiplication backward (CUDA)"); 11 | m.def("quaternion_conjugate", &quaternion_conjugate, "quaternion_conjugate (CUDA)"); 12 | // mat3x3 inverse 13 | m.def("mat3x3_det_forward", &mat3x3_det_forward, "mat3x3_det_forward (CUDA)"); 14 | m.def("mat3x3_scale_adjoint_forward", &mat3x3_scale_adjoint_forward, "mat3x3_scale_adjoint_forward (CUDA)"); 15 | m.def("mat3x3_inv_forward", &mat3x3_inv_forward, "mat3x3_inv_forward (CUDA)"); 16 | m.def("mat3x3_inv_backward", &mat3x3_inv_backward, "mat3x3_inv_backward (CUDA)"); 17 | } -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/src/matinv.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University. 2 | # pragma once 3 | 4 | #include 5 | #include 6 | 7 | void mat3x3_det_forward(at::Tensor inputs, at::Tensor outputs,const uint32_t B); 8 | void mat3x3_scale_adjoint_forward(at::Tensor inputs, at::Tensor scales, at::Tensor outputs, const uint32_t B); 9 | void mat3x3_inv_forward(at::Tensor inputs, at::Tensor outputs, at::Tensor output_scales, const uint32_t B); 10 | 11 | void mat3x3_inv_backward(at::Tensor grad, at::Tensor inv_mats, at::Tensor grad_inputs, const uint32_t B); -------------------------------------------------------------------------------- /lab4d/third_party/quaternion/src/quaternion.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University. 2 | # pragma once 3 | 4 | #include 5 | #include 6 | 7 | // inputs: [B, D], float, in [-1, 1] 8 | // outputs: [B, F], float 9 | 10 | // encode_forward(inputs, outputs, B, input_dim, degree, calc_grad_inputs, dy_dx) 11 | void quaternion_mul_forward(at::Tensor inputs_1, at::Tensor inputs_2, at::Tensor outputs, const uint32_t B, const uint32_t D1, const uint32_t D2); 12 | 13 | // sh_encode_backward(grad, inputs, B, input_dim, degree, ctx.calc_grad_inputs, dy_dx, grad_inputs) 14 | void quaternion_mul_backward(at::Tensor grad, const uint32_t B, const uint32_t D1, const uint32_t D2, at::Tensor inputs_1, at::Tensor inputs_2, at::Tensor grad_inputs_1, at::Tensor grad_inputs_2); 15 | 16 | 17 | void quaternion_mul_backward_backward( 18 | at::Tensor grad_out_1, at::Tensor grad_out_2, 19 | const uint32_t B, const uint32_t D1, const uint32_t D2, 20 | at::Tensor grad, at::Tensor inputs_1, at::Tensor inputs_2, 21 | at::Tensor grad_grad, at::Tensor grad_grad_inputs_1, at::Tensor grad_grad_inputs_2); 22 | 23 | 24 | void quaternion_conjugate(at::Tensor inputs, const uint32_t B, at::Tensor outputs); -------------------------------------------------------------------------------- /lab4d/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import os 3 | import sys 4 | 5 | import torch 6 | import torch.backends.cudnn as cudnn 7 | from absl import app 8 | 9 | cwd = os.getcwd() 10 | if cwd not in sys.path: 11 | sys.path.insert(0, cwd) 12 | 13 | from lab4d.config import get_config, save_config 14 | from lab4d.engine.train_utils import get_local_rank 15 | from lab4d.utils.profile_utils import record_function 16 | 17 | cudnn.benchmark = True 18 | 19 | 20 | def train_ddp(Trainer): 21 | local_rank = get_local_rank() 22 | torch.cuda.set_device(local_rank) 23 | 24 | opts = get_config() 25 | if local_rank == 0: 26 | save_config() 27 | 28 | torch.distributed.init_process_group( 29 | "nccl", 30 | init_method="env://", 31 | world_size=opts["ngpu"], 32 | rank=local_rank, 33 | ) 34 | 35 | # torch.manual_seed(0) 36 | # torch.cuda.manual_seed(1) 37 | # torch.manual_seed(0) 38 | 39 | trainer = Trainer(opts) 40 | trainer.train() 41 | 42 | 43 | def main(_): 44 | from lab4d.engine.trainer import Trainer 45 | 46 | train_ddp(Trainer) 47 | 48 | 49 | if __name__ == "__main__": 50 | app.run(main) 51 | -------------------------------------------------------------------------------- /lab4d/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/utils/__init__.py -------------------------------------------------------------------------------- /lab4d/utils/cam_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # TODO: move camera-related utils to here 3 | -------------------------------------------------------------------------------- /lab4d/utils/decorator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | from functools import wraps 3 | 4 | 5 | def train_only_fields(method): 6 | """Decorator to skip the method and return an empty field list if not in 7 | training mode. 8 | """ 9 | 10 | @wraps(method) 11 | def _impl(self, *method_args, **method_kwargs): 12 | if self.training: 13 | return method(self, *method_args, **method_kwargs) 14 | else: 15 | return {} 16 | 17 | return _impl 18 | -------------------------------------------------------------------------------- /lab4d/utils/gpu_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Jeff Tan, Carnegie Mellon University. 2 | import multiprocessing 3 | import os 4 | 5 | 6 | def gpu_map(func, args, gpus=None, method="static"): 7 | """Map a function over GPUs 8 | 9 | Args: 10 | func (Function): Function to parallelize 11 | args (List(Tuple)): List of argument tuples, to split evenly over GPUs 12 | gpus (List(int) or None): Optional list of GPU device IDs to use 13 | method (str): Either "static" or "dynamic" (default "static"). 14 | Static assignment is the fastest if workload per task is balanced; 15 | dynamic assignment better handles tasks with uneven workload. 16 | Returns: 17 | outs (List): List of outputs 18 | """ 19 | mp = multiprocessing.get_context("spawn") # spawn allows CUDA usage 20 | devices = os.getenv("CUDA_VISIBLE_DEVICES") 21 | outputs = None 22 | 23 | # Compute list of GPUs 24 | if gpus is None: 25 | if devices is None: 26 | num_gpus = int(os.popen("nvidia-smi -L | wc -l").read()) 27 | gpus = list(range(num_gpus)) 28 | else: 29 | gpus = [int(n) for n in devices.split(",")] 30 | 31 | # Map arguments over GPUs using static or dynamic assignment 32 | try: 33 | if method == "static": 34 | # Interleave arguments across GPUs 35 | args_by_rank = [[] for rank in range(len(gpus))] 36 | for it, arg in enumerate(args): 37 | args_by_rank[it % len(gpus)].append(arg) 38 | 39 | # Spawn processes 40 | spawned_procs = [] 41 | result_queue = mp.Queue() 42 | for rank, gpu_id in enumerate(gpus): 43 | # Environment variables get copied on process creation 44 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) 45 | proc_args = (func, args_by_rank[rank], rank, result_queue) 46 | proc = mp.Process(target=gpu_map_static_helper, args=proc_args) 47 | proc.start() 48 | spawned_procs.append(proc) 49 | 50 | # Wait to finish 51 | for proc in spawned_procs: 52 | proc.join() 53 | 54 | # Construct output list 55 | outputs_by_rank = {} 56 | while True: 57 | try: 58 | rank, out = result_queue.get(block=False) 59 | outputs_by_rank[rank] = out 60 | except multiprocessing.queues.Empty: 61 | break 62 | 63 | outputs = [] 64 | for it in range(len(args)): 65 | rank = it % len(gpus) 66 | idx = it // len(gpus) 67 | outputs.append(outputs_by_rank[rank][idx]) 68 | 69 | elif method == "dynamic": 70 | gpu_queue = mp.Queue() 71 | for gpu_id in gpus: 72 | gpu_queue.put(gpu_id) 73 | 74 | # Spawn processes as GPUs become available 75 | spawned_procs = [] 76 | result_queue = mp.Queue() 77 | for it, arg in enumerate(args): 78 | # Take latest available gpu_id (blocking) 79 | gpu_id = gpu_queue.get() 80 | 81 | # Environment variables get copied on process creation 82 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) 83 | proc_args = (func, arg, it, gpu_id, result_queue, gpu_queue) 84 | proc = mp.Process(target=gpu_map_dynamic_helper, args=proc_args) 85 | proc.start() 86 | spawned_procs.append(proc) 87 | 88 | # Wait to finish 89 | for proc in spawned_procs: 90 | proc.join() 91 | 92 | # Construct output list 93 | outputs_by_it = {} 94 | while True: 95 | try: 96 | it, out = result_queue.get(block=False) 97 | outputs_by_it[it] = out 98 | except multiprocessing.queues.Empty: 99 | break 100 | 101 | outputs = [] 102 | for it in range(len(args)): 103 | outputs.append(outputs_by_it[it]) 104 | 105 | else: 106 | raise NotImplementedError 107 | 108 | except Exception as e: 109 | pass 110 | 111 | # Restore env vars 112 | finally: 113 | if devices is not None: 114 | os.environ["CUDA_VISIBLE_DEVICES"] = devices 115 | else: 116 | del os.environ["CUDA_VISIBLE_DEVICES"] 117 | return outputs 118 | 119 | 120 | def gpu_map_static_helper(func, args, rank, result_queue): 121 | out = [func(*arg) for arg in args] 122 | result_queue.put((rank, out)) 123 | 124 | 125 | def gpu_map_dynamic_helper(func, arg, it, gpu_id, result_queue, gpu_queue): 126 | out = func(*arg) 127 | gpu_queue.put(gpu_id) 128 | result_queue.put((it, out)) 129 | -------------------------------------------------------------------------------- /lab4d/utils/io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import glob 3 | import os 4 | 5 | import cv2 6 | import einops 7 | import imageio 8 | import numpy as np 9 | 10 | from lab4d.utils.vis_utils import img2color, make_image_grid 11 | 12 | 13 | def make_save_dir(opts, sub_dir="renderings"): 14 | """Create a subdirectory to save outputs 15 | 16 | Args: 17 | opts (Dict): Command-line options 18 | sub_dir (str): Subdirectory to create 19 | Returns: 20 | save_dir (str): Output directory 21 | """ 22 | logname = "%s-%s" % (opts["seqname"], opts["logname"]) 23 | save_dir = "%s/%s/%s/" % (opts["logroot"], logname, sub_dir) 24 | os.makedirs(save_dir, exist_ok=True) 25 | return save_dir 26 | 27 | 28 | def save_vid( 29 | outpath, 30 | frames, 31 | suffix=".mp4", 32 | upsample_frame=0, 33 | fps=10, 34 | target_size=None, 35 | ): 36 | """Save frames to video 37 | 38 | Args: 39 | outpath (str): Output directory 40 | frames: (N, H, W, x) Frames to output 41 | suffix (str): File type to save (".mp4" or ".gif") 42 | upsample_frame (int): Target number of frames 43 | fps (int): Target frames per second 44 | target_size: If provided, (H, W) target size of frames 45 | """ 46 | # convert to 150 frames 47 | if upsample_frame < 1: 48 | upsample_frame = len(frames) 49 | frame_150 = [] 50 | for i in range(int(upsample_frame)): 51 | fid = int(i / upsample_frame * len(frames)) 52 | frame = frames[fid] 53 | if frame.max() <= 1: 54 | frame = frame * 255 55 | frame = frame.astype(np.uint8) 56 | if target_size is not None: 57 | frame = cv2.resize(frame, target_size[::-1]) 58 | if suffix == ".gif": 59 | h, w = frame.shape[:2] 60 | fxy = np.sqrt(4e4 / (h * w)) 61 | frame = cv2.resize(frame, None, fx=fxy, fy=fxy) 62 | 63 | # resize to make divisible by marco block size = 16 64 | h, w = frame.shape[:2] 65 | h = int(np.ceil(h / 16) * 16) 66 | w = int(np.ceil(w / 16) * 16) 67 | frame = cv2.resize(frame, (w, h)) 68 | 69 | frame_150.append(frame) 70 | imageio.mimsave("%s%s" % (outpath, suffix), frame_150, fps=fps) 71 | 72 | 73 | def save_rendered(rendered, save_dir, raw_size, pca_fn): 74 | """Save rendered outputs 75 | 76 | Args: 77 | rendered (Dict): Maps arbitrary keys to outputs of shape (N, H, W, x) 78 | save_dir (str): Output directory 79 | raw_size: (2,) Target height and width 80 | pca_fn (Function): Function to apply PCA on feature outputs 81 | """ 82 | # save rendered images 83 | for k, v in rendered.items(): 84 | n, h, w = v.shape[:3] 85 | img_grid = make_image_grid(v) 86 | img_grid = img2color(k, img_grid, pca_fn=pca_fn) 87 | img_grid = (img_grid * 255).astype(np.uint8) 88 | # cv2.imwrite("%s/%s.jpg" % (save_dir, k), img_grid[:, :, ::-1]) 89 | 90 | # save video 91 | frames = einops.rearrange(img_grid, "(m h) (n w) c -> (m n) h w c", h=h, w=w) 92 | frames = frames[:n] 93 | save_vid( 94 | "%s/%s" % (save_dir, k), 95 | frames, 96 | fps=30, 97 | target_size=(raw_size[0], raw_size[1]), 98 | ) 99 | -------------------------------------------------------------------------------- /lab4d/utils/loss_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import torch 3 | import numpy as np 4 | import torch.nn.functional as F 5 | 6 | 7 | def entropy_loss(prob, dim=-1): 8 | """Compute entropy of a probability distribution 9 | In the case of skinning weights, each column is a distribution over assignment to B bones. 10 | We want to encourage low entropy, i.e. each point is assigned to fewer bones. 11 | 12 | Args: 13 | prob: (..., B) Probability distribution 14 | Returns: 15 | entropy (...,) Entropy of each distribution 16 | """ 17 | entropy = -(prob * (prob + 1e-9).log()).sum(dim) 18 | return entropy 19 | 20 | 21 | def cross_entropy_skin_loss(skin): 22 | """Compute entropy of a probability distribution 23 | In the case of skinning weights, each column is a distribution over assignment to B bones. 24 | We want to encourage low entropy, i.e. each point is assigned to fewer bones. 25 | 26 | Args: 27 | skin: (..., B) un-normalized skinning weights 28 | """ 29 | shape = skin.shape 30 | nbones = shape[-1] 31 | full_skin = skin.clone() 32 | 33 | # find the most likely bone assignment 34 | score, indices = skin.max(-1, keepdim=True) 35 | skin = torch.zeros_like(skin).fill_(0) 36 | skin = skin.scatter(-1, indices, torch.ones_like(score)) 37 | 38 | cross_entropy = F.cross_entropy( 39 | full_skin.view(-1, nbones), skin.view(-1, nbones), reduction="none" 40 | ) 41 | cross_entropy = cross_entropy.view(shape[:-1]) 42 | return cross_entropy 43 | 44 | 45 | def align_vectors(v1, v2): 46 | """Return the scale that best aligns v1 to v2 in the L2 sense: 47 | min || kv1-v2 ||^2 48 | 49 | Args: 50 | v1: (...,) Source vector 51 | v2: (...,) Target vector 52 | Returns: 53 | scale_fac (1,): Scale factor 54 | """ 55 | scale_fac = (v1 * v2).sum() / (v1 * v1).sum() 56 | if scale_fac < 0: 57 | scale_fac = torch.tensor([1.0], device=scale_fac.device) 58 | return scale_fac 59 | -------------------------------------------------------------------------------- /lab4d/utils/numpy_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import numpy as np 3 | 4 | 5 | def interp_wt(x, y, x2, type="linear"): 6 | """Map a scalar value from range [x0, x1] to [y0, y1] using interpolation 7 | 8 | Args: 9 | x: Input range [x0, x1] 10 | y: Output range [y0, y1] 11 | x2 (float): Scalar value in range [x0, x1] 12 | type (str): Interpolation type ("linear" or "log") 13 | Returns: 14 | y2 (float): Scalar value mapped to [y0, y1] 15 | """ 16 | # Extract values from tuples 17 | x0, x1 = x 18 | y0, y1 = y 19 | 20 | # # Check if x2 is in range 21 | # if x2 < x0 or x2 > x1: 22 | # raise ValueError("x2 must be in the range [x0, x1]") 23 | 24 | if type == "linear": 25 | # Perform linear interpolation 26 | y2 = y0 + (x2 - x0) * (y1 - y0) / (x1 - x0) 27 | 28 | elif type == "log": 29 | # Transform to log space 30 | log_y0 = np.log10(y0) 31 | log_y1 = np.log10(y1) 32 | 33 | # Perform linear interpolation in log space 34 | log_y2 = log_y0 + (x2 - x0) * (log_y1 - log_y0) / (x1 - x0) 35 | 36 | # Transform back to original space 37 | y2 = 10**log_y2 38 | 39 | else: 40 | raise ValueError("interpolation_type must be 'linear' or 'log'") 41 | 42 | y2 = np.clip(y2, np.min(y), np.max(y)) 43 | return y2 44 | 45 | 46 | def pca_numpy(raw_data, n_components): 47 | """Return a function that applies PCA to input data, based on the principal 48 | components of a raw data distribution. 49 | 50 | Args: 51 | raw_data (np.array): Raw data distribution, used to compute 52 | principal components. 53 | n_components (int): Number of principal components to use 54 | Returns: 55 | apply_pca_fn (Function): A function that applies PCA to input data 56 | """ 57 | # center the data matrix by subtracting the mean of each feature 58 | mean = np.mean(raw_data, axis=0) 59 | centered_data = raw_data - mean 60 | 61 | # compute the covariance matrix of the centered data 62 | covariance_matrix = np.cov(centered_data.T) 63 | 64 | # compute the eigenvalues and eigenvectors of the covariance matrix 65 | eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix) 66 | 67 | # sort the eigenvalues in descending order and sort the eigenvectors accordingly 68 | sorted_indices = np.argsort(eigenvalues)[::-1] 69 | sorted_eigenvectors = eigenvectors[:, sorted_indices] 70 | 71 | # choose the top k eigenvectors (or all eigenvectors if k is not specified) 72 | top_eigenvectors = sorted_eigenvectors[:, :n_components] 73 | 74 | def apply_pca_fn(data, normalize=False): 75 | """ 76 | Args: 77 | data (np.array): Data to apply PCA to 78 | normalize (bool): If True, normalize the data to 0,1 for visualization 79 | """ 80 | shape = data.shape 81 | data = data.reshape(-1, shape[-1]) 82 | data = np.dot(data - mean, top_eigenvectors) 83 | 84 | if normalize: 85 | # scale to std = 1 86 | data = data / np.sqrt(eigenvalues[sorted_indices][:n_components]) 87 | data = np.clip(data, -2, 2) # clip to [-2, 2], 95.4% percentile 88 | # scale to 0,1 89 | data = (data + 2) / 4 90 | 91 | data = data.reshape(shape[:-1] + (n_components,)) 92 | return data 93 | 94 | return apply_pca_fn 95 | 96 | 97 | def bilinear_interp(feat, xy_loc): 98 | """Sample from a 2D feature map using bilinear interpolation 99 | 100 | Args: 101 | feat: (H,W,x) Input feature map 102 | xy_loc: (N,2) Coordinates to sample, float 103 | Returns: 104 | feat_samp: (N,x) Sampled features 105 | """ 106 | dtype = feat.dtype 107 | ul_loc = np.floor(xy_loc).astype(int) # x,y 108 | x = (xy_loc[:, 0] - ul_loc[:, 0])[:, None] # (N, 1) 109 | y = (xy_loc[:, 1] - ul_loc[:, 1])[:, None] # (N, 1) 110 | ul_loc = np.clip(ul_loc, 0, 110) # clip 111 | q11 = feat[ul_loc[:, 1], ul_loc[:, 0]] # (N, 16) 112 | q12 = feat[ul_loc[:, 1], ul_loc[:, 0] + 1] 113 | q21 = feat[ul_loc[:, 1] + 1, ul_loc[:, 0]] 114 | q22 = feat[ul_loc[:, 1] + 1, ul_loc[:, 0] + 1] 115 | feat_samp = ( 116 | q11 * (1 - x) * (1 - y) 117 | + q21 * (1 - x) * (y - 0) 118 | + q12 * (x - 0) * (1 - y) 119 | + q22 * (x - 0) * (y - 0) 120 | ) 121 | feat_samp = feat_samp.astype(dtype) 122 | return feat_samp 123 | -------------------------------------------------------------------------------- /lab4d/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import torch 3 | 4 | @torch.enable_grad() 5 | def compute_gradient(fn, x): 6 | """ 7 | gradient of mlp params wrt pts 8 | """ 9 | x.requires_grad_(True) 10 | y = fn(x) 11 | 12 | # get gradient for each size-1 output 13 | gradients = [] 14 | for i in range(y.shape[-1]): 15 | y_sub = y[..., i : i + 1] 16 | d_output = torch.ones_like(y_sub, requires_grad=False, device=y.device) 17 | gradient = torch.autograd.grad( 18 | outputs=y_sub, 19 | inputs=x, 20 | grad_outputs=d_output, 21 | create_graph=True, 22 | retain_graph=True, 23 | only_inputs=True, 24 | )[0] 25 | gradients.append(gradient[..., None]) 26 | gradients = torch.cat(gradients, -1) # ...,input-dim, output-dim 27 | return gradients 28 | 29 | def frameid_to_vid(fid, frame_offset): 30 | """Given absolute frame ids [0, ..., N], compute the video id of each frame. 31 | 32 | Args: 33 | fid: (nframes,) Absolute frame ids 34 | e.g. [0, 1, 2, 3, 100, 101, 102, 103, 200, 201, 202, 203] 35 | frame_offset: (nvideos + 1,) Offset of each video 36 | e.g., [0, 100, 200, 300] 37 | Returns: 38 | vid: (nframes,) Maps idx to video id 39 | tid: (nframes,) Maps idx to relative frame id 40 | """ 41 | vid = torch.zeros_like(fid) 42 | for i in range(frame_offset.shape[0] - 1): 43 | assign = torch.logical_and(fid >= frame_offset[i], fid < frame_offset[i + 1]) 44 | vid[assign] = i 45 | return vid 46 | 47 | 48 | def remove_ddp_prefix(state_dict): 49 | """Remove distributed data parallel prefix from model checkpoint 50 | 51 | Args: 52 | state_dict (Dict): Model checkpoint 53 | Returns: 54 | new_state_dict (Dict): New model checkpoint 55 | """ 56 | new_state_dict = {} 57 | for key, value in state_dict.items(): 58 | if key.startswith("module."): 59 | new_key = key[7:] # Remove 'module.' prefix 60 | else: 61 | new_key = key 62 | new_state_dict[new_key] = value 63 | return new_state_dict 64 | 65 | 66 | def remove_state_startwith(state_dict, prefix): 67 | """Remove model parameters that start with a prefix 68 | 69 | Args: 70 | state_dict (Dict): Model checkpoint 71 | prefix (str): Prefix to filter 72 | Returns: 73 | new_state_dict (Dict): New model checkpoint 74 | """ 75 | new_state_dict = {} 76 | for key, value in state_dict.items(): 77 | if key.startswith(prefix): 78 | continue 79 | else: 80 | new_state_dict[key] = value 81 | return new_state_dict 82 | 83 | 84 | def remove_state_with(state_dict, string): 85 | """Remove model parameters that contain a string 86 | 87 | Args: 88 | state_dict (Dict): Model checkpoint 89 | string (str): String to filter 90 | Returns: 91 | new_state_dict (Dict): New model checkpoint 92 | """ 93 | new_state_dict = {} 94 | for key, value in state_dict.items(): 95 | if string in key: 96 | continue 97 | else: 98 | new_state_dict[key] = value 99 | return new_state_dict 100 | 101 | 102 | def compress_state_with(state_dict, string): 103 | """Initialize model parameters with the mean of the instance embedding if 104 | the parameter name contains a string 105 | 106 | Args: 107 | state_dict (Dict): Model checkpoint, modified in place 108 | string (str): String to filter 109 | """ 110 | # init with the mean of inst_embedding 111 | for key, value in state_dict.items(): 112 | if string in key: 113 | state_dict[key] = value.mean(dim=0, keepdim=True) 114 | -------------------------------------------------------------------------------- /lab4d/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | from lab4d.utils.quat_transform import ( 3 | dual_quaternion_apply, 4 | dual_quaternion_inverse, 5 | dual_quaternion_to_quaternion_translation, 6 | ) 7 | 8 | 9 | def get_bone_coords(xyz, bone2obj): 10 | """Transform points from object canonical space to bone coordinates 11 | 12 | Args: 13 | xyz: (..., 3) Points in object canonical space 14 | bone2obj: ((..., B, 4), (..., B, 4)) Bone-to-object SE(3) 15 | transforms, written as dual quaternions 16 | Returns: 17 | xyz_bone: (..., B, 3) Points in bone space 18 | """ 19 | # transform xyz to bone space 20 | obj2bone = dual_quaternion_inverse(bone2obj) 21 | 22 | # reshape 23 | xyz = xyz[..., None, :].expand(xyz.shape[:-1] + (bone2obj[0].shape[-2], 3)).clone() 24 | xyz_bone = dual_quaternion_apply(obj2bone, xyz) 25 | return xyz_bone 26 | 27 | 28 | def get_xyz_bone_distance(xyz, bone2obj): 29 | """Compute squared distances from points to bone centers 30 | 31 | Argss: 32 | xyz: (..., 3) Points in object canonical space 33 | bone2obj: ((..., B, 4), (..., B, 4)) Bone-to-object SE(3) transforms, written as dual quaternions 34 | 35 | Returns: 36 | dist2: (..., B) Squared distance to each bone center 37 | """ 38 | _, center = dual_quaternion_to_quaternion_translation(bone2obj) 39 | dist2 = (xyz[..., None, :] - center).pow(2).sum(-1) # M, K 40 | return dist2 41 | -------------------------------------------------------------------------------- /media/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/media/logo.png -------------------------------------------------------------------------------- /media/teaser.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/media/teaser.gif -------------------------------------------------------------------------------- /preprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/__init__.py -------------------------------------------------------------------------------- /preprocess/libs/__init__.py: -------------------------------------------------------------------------------- 1 | # import lab4d 2 | import os 3 | import sys 4 | 5 | sys.path.insert( 6 | 0, 7 | "%s/../../" % os.path.join(os.path.dirname(__file__)), 8 | ) 9 | 10 | sys.path.insert( 11 | 0, 12 | "%s/../" % os.path.join(os.path.dirname(__file__)), 13 | ) 14 | 15 | sys.path.insert( 16 | 0, 17 | "%s/../third_party" % os.path.join(os.path.dirname(__file__)), 18 | ) 19 | -------------------------------------------------------------------------------- /preprocess/libs/geometry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # taken from Rigidmask: https://github.com/gengshan-y/rigidmask/blob/b308b5082d09926e687c55001c20def6b0708021/utils/dydepth.py#L425 3 | import os 4 | import sys 5 | 6 | import cv2 7 | import numpy as np 8 | import trimesh 9 | 10 | from lab4d.utils.profile_utils import record_function 11 | 12 | sys.path.insert( 13 | 0, 14 | "%s/../third_party/vcnplus/" % os.path.join(os.path.dirname(__file__)), 15 | ) 16 | 17 | from flowutils.flowlib import warp_flow 18 | 19 | 20 | @record_function("compute_procrustes") 21 | def compute_procrustes_robust(pts0, pts1): 22 | """ 23 | analytical solution of R/t from correspondence 24 | pts0: N x 3 25 | pts1: N x 3 26 | """ 27 | num_samples = 2000 28 | min_samples = 10 29 | extent = (pts0.max(0) - pts0.min(0)).mean() 30 | threshold = extent * 0.05 31 | 32 | inliers = [] 33 | samples = [] 34 | idx_array = np.arange(pts0.shape[0]) 35 | for i in range(num_samples): 36 | sample = np.random.choice(idx_array, size=min_samples, replace=False) 37 | sol = compute_procrustes(pts0[sample], pts1[sample]) 38 | 39 | # evaluate inliers 40 | R, t = sol 41 | pts2 = R @ pts0.T + t[:, np.newaxis] 42 | dist = np.linalg.norm(pts2.T - pts1, 2, axis=1) 43 | inliers.append((dist < threshold).sum()) 44 | samples.append(sample) 45 | 46 | best_idx = np.argmax(np.sum(inliers, axis=0)) 47 | print("inlier_ratio: ", np.max(inliers) / pts0.shape[0]) 48 | best_sample = samples[best_idx] 49 | sol = compute_procrustes(pts0[best_sample], pts1[best_sample]) 50 | return sol 51 | 52 | 53 | @record_function("compute_procrustes") 54 | def compute_procrustes(pts0, pts1): 55 | """ 56 | analytical solution of R/t from correspondence 57 | pts0: N x 3 58 | pts1: N x 3 59 | """ 60 | if pts0.shape[0] < 10: 61 | print("Warning: too few points for procrustes. Return identity.") 62 | return np.eye(3), np.zeros(3) 63 | pts0_mean = np.mean(pts0, 0) 64 | pts1_mean = np.mean(pts1, 0) 65 | pts0_centered = pts0 - pts0_mean 66 | pts1_centered = pts1 - pts1_mean 67 | H = pts0_centered.T @ pts1_centered 68 | U, S, Vt = np.linalg.svd(H) 69 | R = Vt.T @ U.T 70 | if np.linalg.det(R) < 0: 71 | Vt[2, :] *= -1 72 | R = Vt.T @ U.T 73 | t = pts1_mean - R @ pts0_mean 74 | 75 | # pts2 = R @ pts0.T + t[:, np.newaxis] 76 | # pts2 = pts2.T 77 | # trimesh.Trimesh(pts0).export('tmp/0.obj') 78 | # trimesh.Trimesh(pts1).export('tmp/1.obj') 79 | # trimesh.Trimesh(pts2).export('tmp/2.obj') 80 | return R, t 81 | 82 | 83 | @record_function("two_frame_registration") 84 | def two_frame_registration( 85 | depth0, depth1, flow, K0, K1, mask, registration_type="procrustes" 86 | ): 87 | # prepare data 88 | shape = flow.shape[:2] 89 | x0, y0 = np.meshgrid(range(shape[1]), range(shape[0])) 90 | x0 = x0.astype(np.float32) 91 | y0 = y0.astype(np.float32) 92 | x1 = x0 + flow[:, :, 0] 93 | y1 = y0 + flow[:, :, 1] 94 | hp0 = np.stack((x0, y0, np.ones(x0.shape)), 0).reshape((3, -1)) 95 | hp1 = np.stack((x1, y1, np.ones(x0.shape)), 0).reshape((3, -1)) 96 | 97 | # use bg + valid pixels to compute R/t 98 | # valid_mask = np.logical_and(mask, flow[..., 2] > 0).flatten() 99 | valid_mask = mask.flatten() 100 | pts0 = np.linalg.inv(K0) @ hp0 * depth0.flatten() 101 | depth1_warped = warp_flow(depth1.astype(float), flow[..., :2]).flatten() 102 | pts1 = np.linalg.inv(K1) @ hp1 * depth1_warped 103 | 104 | if registration_type == "procrustes": 105 | # Procrustes 106 | valid_mask = np.logical_and(valid_mask, depth1_warped > 0) 107 | rmat, trans = compute_procrustes(pts0.T[valid_mask], pts1.T[valid_mask]) 108 | # rmat, trans = compute_procrustes_robust(pts0.T[valid_mask], pts1.T[valid_mask]) 109 | elif registration_type == "pnp": 110 | # PnP 111 | _, rvec, trans = cv2.solvePnP( 112 | pts0.T[valid_mask.flatten(), np.newaxis], 113 | hp1[:2].T[valid_mask.flatten(), np.newaxis], 114 | K0, 115 | 0, 116 | flags=cv2.SOLVEPNP_DLS, 117 | ) 118 | _, rvec, trans = cv2.solvePnP( 119 | pts0.T[valid_mask, np.newaxis], 120 | hp1[:2].T[valid_mask, np.newaxis], 121 | K0, 122 | 0, 123 | rvec, 124 | trans, 125 | useExtrinsicGuess=True, 126 | flags=cv2.SOLVEPNP_ITERATIVE, 127 | ) 128 | rmat = cv2.Rodrigues(rvec)[0] 129 | trans = trans[:, 0] 130 | else: 131 | raise NotImplementedError 132 | 133 | cam01 = np.eye(4) 134 | cam01[:3, :3] = rmat 135 | cam01[:3, 3] = trans 136 | return cam01 137 | -------------------------------------------------------------------------------- /preprocess/libs/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import cv2 3 | import numpy as np 4 | from scipy.spatial.transform import Rotation as R 5 | 6 | from lab4d.utils.profile_utils import record_function 7 | 8 | 9 | @record_function("resize_to_target") 10 | def resize_to_target(flowfw, aspect_ratio=None, is_flow=False): 11 | h, w = flowfw.shape[:2] 12 | if aspect_ratio is None: 13 | factor = np.sqrt(250 * 1000 / (h * w)) 14 | th, tw = int(h * factor), int(w * factor) 15 | else: 16 | rh, rw = aspect_ratio[:2] 17 | factor = np.sqrt(250 * 1000 / (rh * rw)) 18 | th, tw = int(rh * factor), int(rw * factor) 19 | 20 | factor_h = th / h 21 | factor_w = tw / w 22 | 23 | flowfw_d = cv2.resize(flowfw, (tw, th)) 24 | 25 | if is_flow: 26 | flowfw_d[..., 0] *= factor_w 27 | flowfw_d[..., 1] *= factor_h 28 | return flowfw_d 29 | 30 | 31 | @record_function("reduce_component") 32 | def reduce_component(mask): 33 | dtype = mask.dtype 34 | nb_components, output, stats, centroids = cv2.connectedComponentsWithStats( 35 | mask.astype(np.uint8), connectivity=8 36 | ) 37 | if nb_components > 1: 38 | max_label, max_size = max( 39 | [(i, stats[i, cv2.CC_STAT_AREA]) for i in range(1, nb_components)], 40 | key=lambda x: x[1], 41 | ) 42 | mask = (output == max_label).astype(int) 43 | mask = mask.astype(dtype) 44 | return mask 45 | 46 | 47 | def robust_rot_align(rot1, rot2): 48 | """ 49 | align rot1 to rot2 using RANSAC 50 | """ 51 | in_thresh = 1.0 / 4 * np.pi # 45 deg 52 | n_samples = rot2.shape[0] 53 | rots = rot2[:, :3, :3] @ rot1[:, :3, :3].transpose(0, 2, 1) 54 | 55 | inliers = [] 56 | for i in range(n_samples): 57 | rots_aligned = rots[i : i + 1] @ rot1[:, :3, :3] 58 | dist = rots_aligned @ rot2[:, :3, :3].transpose(0, 2, 1) 59 | dist = R.from_matrix(dist).as_rotvec() 60 | dist = np.linalg.norm(dist, 2, axis=1) 61 | inliers.append((dist < in_thresh).sum()) 62 | 63 | # Convert rotation vectors back to rotation matrices 64 | best_rot = rots[np.argmax(inliers)] 65 | # print(inliers) 66 | return best_rot 67 | -------------------------------------------------------------------------------- /preprocess/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/scripts/__init__.py -------------------------------------------------------------------------------- /preprocess/scripts/camera_registration.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # python preprocess/scripts/camera_registration.py 2023-04-03-16-50-09-room-0000 0 3 | import glob 4 | import os 5 | import sys 6 | 7 | import cv2 8 | import numpy as np 9 | import trimesh 10 | 11 | sys.path.insert( 12 | 0, 13 | "%s/../../" % os.path.join(os.path.dirname(__file__)), 14 | ) 15 | 16 | sys.path.insert( 17 | 0, 18 | "%s/../" % os.path.join(os.path.dirname(__file__)), 19 | ) 20 | 21 | from libs.geometry import two_frame_registration 22 | from libs.io import flow_process, read_raw 23 | from libs.utils import reduce_component 24 | 25 | from lab4d.utils.geom_utils import K2inv, K2mat 26 | from lab4d.utils.vis_utils import draw_cams 27 | 28 | 29 | def camera_registration(seqname, component_id): 30 | imgdir = "database/processed/JPEGImages/Full-Resolution/%s" % seqname 31 | imglist = sorted(glob.glob("%s/*.jpg" % imgdir)) 32 | delta = 1 33 | crop_size = 256 34 | use_full = True 35 | registration_type = "procrustes" 36 | 37 | # get camera intrinsics 38 | raw_shape = cv2.imread(imglist[0]).shape[:2] 39 | max_l = max(raw_shape) 40 | Kraw = np.array([max_l, max_l, raw_shape[1] / 2, raw_shape[0] / 2]) 41 | Kraw = K2mat(Kraw) 42 | 43 | cam_current = np.eye(4) # scene to camera: I, R01 I, R12 R01 I, ... 44 | cams = [cam_current] 45 | for im0idx in range(len(imglist)): 46 | if im0idx + delta >= len(imglist): 47 | continue 48 | # TODO: load croped images directly 49 | frameid0 = int(imglist[im0idx].split("/")[-1].split(".")[0]) 50 | frameid1 = int(imglist[im0idx + delta].split("/")[-1].split(".")[0]) 51 | # print("%s %d %d" % (seqname, frameid0, frameid1)) 52 | data_dict0 = read_raw(imglist[im0idx], delta, crop_size, use_full) 53 | data_dict1 = read_raw(imglist[im0idx + delta], -delta, crop_size, use_full) 54 | flow_process(data_dict0, data_dict1) 55 | 56 | # compute intrincs for the cropped images 57 | K0 = K2inv(data_dict0["crop2raw"]) @ Kraw 58 | K1 = K2inv(data_dict1["crop2raw"]) @ Kraw 59 | 60 | # get mask 61 | mask = data_dict0["mask"][..., 0].astype(int) == component_id 62 | if component_id > 0: 63 | # reduce the mask to the largest connected component 64 | mask = reduce_component(mask) 65 | else: 66 | # for background, additionally remove flow with low confidence 67 | mask = np.logical_and(mask, data_dict0["flow"][..., 2] > 0).flatten() 68 | cam_0_to_1 = two_frame_registration( 69 | data_dict0["depth"], 70 | data_dict1["depth"], 71 | data_dict0["flow"], 72 | K0, 73 | K1, 74 | mask, 75 | registration_type, 76 | ) 77 | cam_current = cam_0_to_1 @ cam_current 78 | cams.append(cam_current) 79 | 80 | os.makedirs(imgdir.replace("JPEGImages", "Cameras"), exist_ok=True) 81 | save_path = imgdir.replace("JPEGImages", "Cameras") 82 | # for idx, img_path in enumerate(sorted(glob.glob("%s/*.jpg" % imgdir))): 83 | # frameid = int(img_path.split("/")[-1].split(".")[0]) 84 | # campath = "%s/%05d-%02d.txt" % (save_path, frameid, component_id) 85 | # np.savetxt(campath, cams[idx]) 86 | np.save("%s/%02d.npy" % (save_path, component_id), cams) 87 | mesh_cam = draw_cams(cams) 88 | mesh_cam.export("%s/cameras-%02d.obj" % (save_path, component_id)) 89 | 90 | print("camera registration done: %s, %d" % (seqname, component_id)) 91 | 92 | 93 | if __name__ == "__main__": 94 | seqname = sys.argv[1] 95 | component_id = int(sys.argv[2]) # 0: bg, 1: fg 96 | 97 | camera_registration(seqname, component_id) 98 | -------------------------------------------------------------------------------- /preprocess/scripts/compute_diff.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # python preprocess/scripts/compute_diff.py database/processed/JPEGImages/Full-Resolution/cat-pikachu-0000/ database/processed/JPEGImages/Full-Resolution/2023-04-19-01-36-53-cat-pikachu-0000/ 3 | import glob 4 | import sys 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | path1 = sys.argv[1] 10 | path2 = sys.argv[2] 11 | 12 | for path1, path2 in zip( 13 | sorted(glob.glob(path1 + "/*")), sorted(glob.glob(path2 + "/*")) 14 | ): 15 | print(path1, path2) 16 | 17 | if path1.endswith(".npy"): 18 | t1 = np.load(path1).astype(np.float32) 19 | t2 = np.load(path2).astype(np.float32) 20 | elif path1.endswith(".jpg"): 21 | t1 = cv2.imread(path1).astype(np.float32) 22 | t2 = cv2.imread(path2).astype(np.float32) 23 | elif path1.endswith(".txt"): 24 | t1 = np.loadtxt(path1) 25 | t2 = np.loadtxt(path2) 26 | else: 27 | raise NotImplementedError 28 | 29 | print(np.mean(np.abs(t1 - t2))) 30 | -------------------------------------------------------------------------------- /preprocess/scripts/crop.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # python preprocess/scripts/crop.py $seqname 256 1 3 | import glob 4 | import os 5 | import sys 6 | 7 | import numpy as np 8 | from tqdm import tqdm 9 | 10 | sys.path.insert( 11 | 0, 12 | "%s/../" % os.path.join(os.path.dirname(__file__)), 13 | ) 14 | 15 | from libs.io import flow_process, read_raw 16 | 17 | 18 | def extract_crop(seqname, crop_size, use_full): 19 | if use_full: 20 | save_prefix = "full" 21 | else: 22 | save_prefix = "crop" 23 | save_prefix = "%s-%d" % (save_prefix, crop_size) 24 | 25 | delta_list = [1, 2, 4, 8] 26 | 27 | flowfw_list = {delta: [] for delta in delta_list} 28 | flowbw_list = {delta: [] for delta in delta_list} 29 | rgb_list = [] 30 | mask_list = [] 31 | depth_list = [] 32 | crop2raw_list = [] 33 | is_detected_list = [] 34 | 35 | imglist = sorted( 36 | glob.glob("database/processed/JPEGImages/Full-Resolution/%s/*.jpg" % seqname) 37 | ) 38 | for im0idx in tqdm(range(len(imglist))): 39 | for delta in delta_list: 40 | if im0idx % delta != 0: 41 | continue 42 | if im0idx + delta >= len(imglist): 43 | continue 44 | # print("%s %d %d" % (seqname, frameid0, frameid1)) 45 | data_dict0 = read_raw(imglist[im0idx], delta, crop_size, use_full) 46 | data_dict1 = read_raw(imglist[im0idx + delta], -delta, crop_size, use_full) 47 | flow_process(data_dict0, data_dict1) 48 | 49 | # save img, mask, vis2d 50 | if delta == 1: 51 | rgb_list.append(data_dict0["img"]) 52 | mask_list.append(data_dict0["mask"]) 53 | depth_list.append(data_dict0["depth"]) 54 | crop2raw_list.append(data_dict0["crop2raw"]) 55 | is_detected_list.append(data_dict0["is_detected"]) 56 | 57 | if im0idx == len(imglist) - 2: 58 | rgb_list.append(data_dict1["img"]) 59 | mask_list.append(data_dict1["mask"]) 60 | depth_list.append(data_dict1["depth"]) 61 | crop2raw_list.append(data_dict1["crop2raw"]) 62 | is_detected_list.append(data_dict1["is_detected"]) 63 | 64 | flowfw_list[delta].append(data_dict0["flow"]) 65 | flowbw_list[delta].append(data_dict1["flow"]) 66 | 67 | # save cropped data 68 | for delta in delta_list: 69 | if len(flowfw_list[delta]) == 0: 70 | continue 71 | np.save( 72 | "database/processed/FlowFW_%d/Full-Resolution/%s/%s.npy" 73 | % (delta, seqname, save_prefix), 74 | np.stack(flowfw_list[delta], 0), 75 | ) 76 | np.save( 77 | "database/processed/FlowBW_%d/Full-Resolution/%s/%s.npy" 78 | % (delta, seqname, save_prefix), 79 | np.stack(flowbw_list[delta], 0), 80 | ) 81 | 82 | np.save( 83 | "database/processed/JPEGImages/Full-Resolution/%s/%s.npy" 84 | % (seqname, save_prefix), 85 | np.stack(rgb_list, 0), 86 | ) 87 | np.save( 88 | "database/processed/Annotations/Full-Resolution/%s/%s.npy" 89 | % (seqname, save_prefix), 90 | np.stack(mask_list, 0), 91 | ) 92 | 93 | np.save( 94 | "database/processed/Depth/Full-Resolution/%s/%s.npy" % (seqname, save_prefix), 95 | np.stack(depth_list, 0), 96 | ) 97 | 98 | np.save( 99 | "database/processed/Annotations/Full-Resolution/%s/%s-crop2raw.npy" 100 | % (seqname, save_prefix), 101 | np.stack(crop2raw_list, 0), 102 | ) 103 | 104 | np.save( 105 | "database/processed/Annotations/Full-Resolution/%s/%s-is_detected.npy" 106 | % (seqname, save_prefix), 107 | np.stack(is_detected_list, 0), 108 | ) 109 | 110 | print("crop (size: %d, full: %d) done: %s" % (crop_size, use_full, seqname)) 111 | 112 | 113 | if __name__ == "__main__": 114 | seqname = sys.argv[1] 115 | crop_size = int(sys.argv[2]) 116 | use_full = bool(int(sys.argv[3])) 117 | 118 | extract_crop(seqname, crop_size, use_full) 119 | -------------------------------------------------------------------------------- /preprocess/scripts/depth.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # python preprocess/scripts/depth.py 2023-03-30-21-20-57-cat-pikachu-5-0000 3 | import glob 4 | import os 5 | import sys 6 | 7 | import numpy as np 8 | import torch 9 | import trimesh 10 | from PIL import Image 11 | 12 | sys.path.insert( 13 | 0, 14 | "%s/../" % os.path.join(os.path.dirname(__file__)), 15 | ) 16 | 17 | 18 | from libs.utils import resize_to_target 19 | 20 | 21 | def depth2pts(depth): 22 | Kmat = np.eye(3) 23 | Kmat[0, 0] = depth.shape[0] 24 | Kmat[1, 1] = depth.shape[0] 25 | Kmat[0, 2] = depth.shape[1] / 2 26 | Kmat[1, 2] = depth.shape[0] / 2 27 | 28 | xy = np.meshgrid(np.arange(depth.shape[1]), np.arange(depth.shape[0])) 29 | hxy = np.stack( 30 | [xy[0].flatten(), xy[1].flatten(), np.ones_like(xy[0].flatten())], axis=0 31 | ) 32 | hxy = np.linalg.inv(Kmat) @ hxy 33 | xyz = hxy * depth.flatten() 34 | return xyz.T 35 | 36 | 37 | def extract_depth(seqname): 38 | image_dir = "database/processed/JPEGImages/Full-Resolution/%s/" % seqname 39 | output_dir = image_dir.replace("JPEGImages", "Depth") 40 | 41 | # torch.hub.help( 42 | # "intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True 43 | # ) # Triggers fresh download of MiDaS repo 44 | 45 | model_zoe_nk = torch.hub.load("isl-org/ZoeDepth", "ZoeD_NK", pretrained=True) 46 | zoe = model_zoe_nk.to("cuda") 47 | 48 | os.makedirs(output_dir, exist_ok=True) 49 | for img_path in sorted(glob.glob(f"{image_dir}/*.jpg")): 50 | # print(img_path) 51 | image = Image.open(img_path) 52 | depth = zoe.infer_pil(image) 53 | depth = resize_to_target(depth, is_flow=False).astype(np.float16) 54 | out_path = f"{output_dir}/{os.path.basename(img_path).replace('.jpg', '.npy')}" 55 | np.save(out_path, depth) 56 | # pts = depth2pts(depth) 57 | 58 | print("zoe depth done: ", seqname) 59 | 60 | 61 | if __name__ == "__main__": 62 | seqname = sys.argv[1] 63 | 64 | extract_depth(seqname) 65 | -------------------------------------------------------------------------------- /preprocess/scripts/download.py: -------------------------------------------------------------------------------- 1 | # Usage: 2 | # python preprocess/scripts/download.py 3 | import os, sys 4 | import shutil 5 | import subprocess 6 | import zipfile 7 | 8 | 9 | def download_seq(seqname): 10 | datadir = os.path.join("database", "raw", seqname) 11 | if os.path.exists(datadir): 12 | print(f"Deleting existing directory: {datadir}") 13 | shutil.rmtree(datadir) 14 | 15 | url_path = os.path.join("database", "vid_data", f"{seqname}.txt") 16 | if not os.path.exists(url_path): 17 | # specify the folder of videos 18 | print(f"URL file does not exist: {url_path}") 19 | # ask for user input 20 | vid_path = "video_folder" 21 | while not os.path.isdir(vid_path): 22 | vid_path = input("Enter the path to video folder:") 23 | # copy folder to datadir 24 | print(f"Copying from directory: {vid_path} to {datadir}") 25 | shutil.copytree(vid_path, datadir) 26 | else: 27 | with open(url_path, "r") as f: 28 | url = f.read().strip() 29 | 30 | # Download the video 31 | print(f"Downloading from URL: {url}") 32 | tmp_zip = "tmp-%s.zip" % seqname 33 | subprocess.run( 34 | ["wget", url, "-O", tmp_zip], 35 | stdout=subprocess.DEVNULL, 36 | stderr=subprocess.DEVNULL, 37 | ) 38 | 39 | # Unzip the file 40 | os.makedirs(datadir) 41 | print(f"Unzipping to directory: {datadir}") 42 | with zipfile.ZipFile(tmp_zip, "r") as zip_ref: 43 | zip_ref.extractall(datadir) 44 | 45 | # Remove the zip file 46 | os.remove(tmp_zip) 47 | 48 | 49 | def main(): 50 | # Get sequence name from command line arguments 51 | if len(sys.argv) > 1: 52 | seqname = sys.argv[1] 53 | download_seq(seqname) 54 | else: 55 | print("Usage: python preprocess/scripts/download.py ") 56 | 57 | 58 | if __name__ == "__main__": 59 | main() 60 | -------------------------------------------------------------------------------- /preprocess/scripts/extract_frames.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # python preprocess/scripts/extract_frames.py database/raw/cat-1015/10415567.mp4 tmp/ 3 | import sys 4 | 5 | import imageio 6 | import numpy as np 7 | 8 | 9 | def extract_frames(in_path, out_path): 10 | print("extracting frames: ", in_path) 11 | # Open the video file 12 | reader = imageio.get_reader(in_path) 13 | 14 | # Find the first non-black frame 15 | for i, im in enumerate(reader): 16 | if np.any(im > 0): 17 | start_frame = i 18 | break 19 | 20 | # Write the video starting from the first non-black frame 21 | count = 0 22 | for i, im in enumerate(reader): 23 | if i >= start_frame: 24 | imageio.imsave("%s/%05d.jpg" % (out_path, count), im) 25 | count += 1 26 | 27 | 28 | if __name__ == "__main__": 29 | in_path = sys.argv[1] 30 | out_path = sys.argv[2] 31 | extract_frames(in_path, out_path) 32 | -------------------------------------------------------------------------------- /preprocess/scripts/tsdf_fusion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # python preprocess/scripts/tsdf_fusion.py 2023-04-03-18-02-32-cat-pikachu-5-0000 0 3 | import glob 4 | import os 5 | import sys 6 | 7 | import cv2 8 | import numpy as np 9 | import trimesh 10 | 11 | sys.path.insert( 12 | 0, 13 | "%s/../third_party" % os.path.join(os.path.dirname(__file__)), 14 | ) 15 | 16 | 17 | sys.path.insert( 18 | 0, 19 | "%s/../" % os.path.join(os.path.dirname(__file__)), 20 | ) 21 | 22 | sys.path.insert( 23 | 0, 24 | "%s/../../" % os.path.join(os.path.dirname(__file__)), 25 | ) 26 | 27 | import fusion 28 | from libs.io import read_frame_data 29 | 30 | from lab4d.utils.geom_utils import K2inv, K2mat 31 | from lab4d.utils.vis_utils import draw_cams 32 | 33 | # def read_cam(imgpath, component_id): 34 | # campath = imgpath.replace("JPEGImages", "Cameras").replace( 35 | # ".jpg", "-%02d.txt" % component_id 36 | # ) 37 | # scene2cam = np.loadtxt(campath) 38 | # cam2scene = np.linalg.inv(scene2cam) 39 | # return cam2scene 40 | 41 | 42 | def tsdf_fusion(seqname, component_id, crop_size=256, use_full=True): 43 | # load rgb/depth 44 | imgdir = "database/processed/JPEGImages/Full-Resolution/%s" % seqname 45 | imglist = sorted(glob.glob("%s/*.jpg" % imgdir)) 46 | 47 | # camera path 48 | save_path = imgdir.replace("JPEGImages", "Cameras") 49 | save_path = "%s/%02d.npy" % (save_path, component_id) 50 | cams_prev = np.load(save_path) 51 | 52 | # get camera intrinsics 53 | raw_shape = cv2.imread(imglist[0]).shape[:2] 54 | max_l = max(raw_shape) 55 | Kraw = np.array([max_l, max_l, raw_shape[1] / 2, raw_shape[0] / 2]) 56 | Kraw = K2mat(Kraw) 57 | 58 | # initialize volume 59 | vol_bnds = np.zeros((3, 2)) 60 | for it, imgpath in enumerate(imglist[:-1]): 61 | rgb, depth, mask, crop2raw = read_frame_data( 62 | imgpath, crop_size, use_full, component_id 63 | ) 64 | K0 = K2inv(crop2raw) @ Kraw 65 | # cam2scene = read_cam(imgpath, component_id) 66 | cam2scene = np.linalg.inv(cams_prev[it]) 67 | depth[~mask] = 0 68 | depth[depth > 10] = 0 69 | view_frust_pts = fusion.get_view_frustum(depth, K0, cam2scene) 70 | vol_bnds[:, 0] = np.minimum(vol_bnds[:, 0], np.amin(view_frust_pts, axis=1)) 71 | vol_bnds[:, 1] = np.maximum(vol_bnds[:, 1], np.amax(view_frust_pts, axis=1)) 72 | tsdf_vol = fusion.TSDFVolume(vol_bnds, voxel_size=0.2, use_gpu=False) 73 | 74 | # fusion 75 | for it, imgpath in enumerate(imglist[:-1]): 76 | # print(imgpath) 77 | rgb, depth, mask, crop2raw = read_frame_data( 78 | imgpath, crop_size, use_full, component_id 79 | ) 80 | K0 = K2inv(crop2raw) @ Kraw 81 | depth[~mask] = 0 82 | # cam2scene = read_cam(imgpath, component_id) 83 | cam2scene = np.linalg.inv(cams_prev[it]) 84 | tsdf_vol.integrate(rgb, depth, K0, cam2scene, obs_weight=1.0) 85 | 86 | save_path = imgdir.replace("JPEGImages", "Cameras") 87 | # get mesh, compute center 88 | rt = tsdf_vol.get_mesh() 89 | verts, faces = rt[0], rt[1] 90 | mesh = trimesh.Trimesh(verts, faces) 91 | aabb = mesh.bounds 92 | center = aabb.mean(0) 93 | mesh.vertices = mesh.vertices - center[None] 94 | mesh.export("%s/mesh-%02d-centered.obj" % (save_path, component_id)) 95 | 96 | # save cameras 97 | cams = [] 98 | for it, imgpath in enumerate(imglist): 99 | # campath = imgpath.replace("JPEGImages", "Cameras").replace( 100 | # ".jpg", "-%02d.txt" % component_id 101 | # ) 102 | # cam = np.loadtxt(campath) 103 | # shift the camera in the scene space 104 | cam = np.linalg.inv(cams_prev[it]) 105 | cam[:3, 3] -= center 106 | cam = np.linalg.inv(cam) 107 | # np.savetxt(campath, cam) 108 | cams.append(cam) 109 | np.save("%s/%02d.npy" % (save_path, component_id), cams) 110 | mesh_cam = draw_cams(cams) 111 | mesh_cam.export("%s/cameras-%02d-centered.obj" % (save_path, component_id)) 112 | 113 | print("tsdf fusion done: %s, %d" % (seqname, component_id)) 114 | 115 | 116 | if __name__ == "__main__": 117 | seqname = sys.argv[1] 118 | component_id = int(sys.argv[2]) 119 | 120 | tsdf_fusion(seqname, component_id) 121 | -------------------------------------------------------------------------------- /preprocess/scripts/write_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # python preprocess/scripts/write_config.py ${vidname} 3 | import configparser 4 | import glob 5 | import os 6 | import sys 7 | 8 | import cv2 9 | 10 | 11 | def write_config(collection_name): 12 | min_nframe = 8 13 | imgroot = "database/processed/JPEGImages/Full-Resolution/" 14 | 15 | config = configparser.ConfigParser() 16 | config["data"] = { 17 | "init_frame": "0", 18 | "end_frame": "-1", 19 | } 20 | 21 | seqname_all = sorted( 22 | glob.glob("%s/%s-[0-9][0-9][0-9][0-9]*" % (imgroot, collection_name)) 23 | ) 24 | total_vid = 0 25 | for i, seqname in enumerate(seqname_all): 26 | seqname = seqname.split("/")[-1] 27 | img = cv2.imread("%s/%s/00000.jpg" % (imgroot, seqname), 0) 28 | num_fr = len(glob.glob("%s/%s/*.jpg" % (imgroot, seqname))) 29 | if num_fr < min_nframe: 30 | continue 31 | 32 | fl = max(img.shape) 33 | px = img.shape[1] // 2 34 | py = img.shape[0] // 2 35 | camtxt = [fl, fl, px, py] 36 | config["data_%d" % total_vid] = { 37 | "ks": " ".join([str(i) for i in camtxt]), 38 | "shape": " ".join([str(img.shape[0]), str(img.shape[1])]), 39 | "img_path": "database/processed/JPEGImages/Full-Resolution/%s/" % seqname, 40 | } 41 | total_vid += 1 42 | 43 | os.makedirs("database/configs", exist_ok=True) 44 | with open("database/configs/%s.config" % collection_name, "w") as configfile: 45 | config.write(configfile) 46 | 47 | 48 | if __name__ == "__main__": 49 | collection_name = sys.argv[1] 50 | 51 | write_config(collection_name) 52 | -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/compute_flow.sh: -------------------------------------------------------------------------------- 1 | # bash compute_flow.sh $seqname 2 | seqname=$1 3 | 4 | if [[ $seqname ]]; 5 | then 6 | array=(1 2 4 8) 7 | for i in "${array[@]}" 8 | do 9 | python compute_flow.py --datapath ../../../database/processed/JPEGImages/Full-Resolution/$seqname/ --loadmodel ./vcn_rob.pth --dframe $i 10 | done 11 | fi 12 | -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/flowutils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/third_party/vcnplus/flowutils/__init__.py -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/frame_filter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | import sys 3 | import os 4 | 5 | # insert path of current file 6 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) 7 | 8 | import cv2 9 | import pdb 10 | import argparse 11 | import numpy as np 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.parallel 15 | import torch.backends.cudnn as cudnn 16 | import torch.utils.data 17 | import glob 18 | import shutil 19 | 20 | from models.VCNplus import VCN 21 | from models.inference import ( 22 | load_eval_checkpoint, 23 | modify_flow_module, 24 | process_flow_input, 25 | make_disc_aux, 26 | ) 27 | 28 | cudnn.benchmark = True 29 | 30 | 31 | def frame_filter(seqname, outdir): 32 | print("Filtering frames for %s" % (seqname)) 33 | model_path = "./preprocess/third_party/vcnplus/vcn_rob.pth" 34 | maxdisp = 256 # maxium disparity. Only affect the coarsest cost volume size 35 | fac = ( 36 | 1 # controls the shape of search grid. Only affect the coarse cost volume size 37 | ) 38 | flow_threshold = 0.05 # flow threshold that controls frame skipping 39 | max_frames = 500 # maximum number of frames to keep (to avoid oom in tracking etc.) 40 | 41 | # construct model 42 | model = load_eval_checkpoint(model_path, maxdisp=maxdisp, fac=fac) 43 | 44 | # input and output images 45 | img_paths = sorted( 46 | glob.glob("%s/JPEGImagesRaw/Full-Resolution/%s/*.jpg" % (outdir, seqname)) 47 | ) 48 | output_path = "%s/JPEGImages/Full-Resolution/%s/" % (outdir, seqname) 49 | output_idxs = [] 50 | 51 | # load image 0 and compute resize ratio 52 | img0_o = cv2.imread(img_paths[0])[:, :, ::-1] 53 | output_idxs.append(0) 54 | 55 | input_size = img0_o.shape 56 | inp_h, inp_w, _ = input_size 57 | max_res = 300 * 300 58 | res_fac = np.sqrt(max_res / (inp_h * inp_w)) 59 | max_h = int(np.ceil(inp_h * res_fac / 64) * 64) 60 | max_w = int(np.ceil(inp_w * res_fac / 64) * 64) 61 | 62 | # modify flow module according to input size 63 | modify_flow_module(model, max_h, max_w) 64 | model.eval() 65 | 66 | # find adjacent frames with sufficiently large flow 67 | img0, img0_noaug = process_flow_input(img0_o, model.mean_L, max_h, max_w) 68 | for jnx in range(1, len(img_paths)): 69 | img1_o = cv2.imread(img_paths[jnx])[:, :, ::-1] 70 | img1, img1_noaug = process_flow_input(img1_o, model.mean_R, max_h, max_w) 71 | 72 | # forward inference 73 | disc_aux = make_disc_aux(img0_noaug, max_h, max_w, input_size) 74 | with torch.no_grad(): 75 | img01 = torch.cat([img0, img1], dim=0) 76 | flowfw, _, _, _ = model(img01, disc_aux) # 1, 2, max_h, max_w 77 | 78 | flowfw[:, 0:1] /= max_w 79 | flowfw[:, 1:2] /= max_h 80 | 81 | maxflow = torch.max(torch.norm(flowfw[0], p=2, dim=0)).item() 82 | # print(jnx, "%.06f" % (maxflow)) 83 | 84 | if maxflow > flow_threshold: 85 | output_idxs.append(jnx) 86 | img0_o = img1_o 87 | img0, img0_noaug = process_flow_input(img0_o, model.mean_L, max_h, max_w) 88 | 89 | if len(output_idxs) >= max_frames: 90 | break 91 | 92 | # copy selected frames to output 93 | if len(output_idxs) > 8: 94 | os.system("mkdir -p %s" % (output_path)) 95 | for output_file in [f"{jnx:05d}.jpg" for jnx in output_idxs]: 96 | shutil.copy2( 97 | f"{outdir}/JPEGImagesRaw/Full-Resolution/{seqname}/{output_file}", 98 | output_path, 99 | ) 100 | 101 | print("frame filtering done: %s" % seqname) 102 | else: 103 | print("lack of motion, ignored: %s" % seqname) 104 | 105 | 106 | if __name__ == "__main__": 107 | if len(sys.argv) != 3: 108 | print(f"Usage: python {sys.argv[0]} ") 109 | print(f"Example: python {sys.argv[0]} cat-pikachu-0-0000 'database/processed/'") 110 | exit() 111 | seqname = sys.argv[1] 112 | outdir = sys.argv[2] 113 | frame_filter(seqname, outdir) 114 | -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/third_party/vcnplus/models/__init__.py -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/det.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torchvision.models as models 6 | import torch 7 | import torch.nn as nn 8 | import os 9 | 10 | from .networks.msra_resnet import get_pose_net 11 | from .networks.dlav0 import get_pose_net as get_dlav0 12 | from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn 13 | from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn 14 | from .networks.large_hourglass import get_large_hourglass_net 15 | 16 | _model_factory = { 17 | 'res': get_pose_net, # default Resnet with deconv 18 | 'dlav0': get_dlav0, # default DLAup 19 | 'dla': get_dla_dcn, 20 | 'resdcn': get_pose_net_dcn, 21 | 'hourglass': get_large_hourglass_net, 22 | } 23 | 24 | def create_model(arch, heads, head_conv,num_input): 25 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0 26 | arch = arch[:arch.find('_')] if '_' in arch else arch 27 | get_model = _model_factory[arch] 28 | model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv,num_input=num_input) 29 | return model 30 | 31 | def load_model(model, model_path, optimizer=None, resume=False, 32 | lr=None, lr_step=None): 33 | start_epoch = 0 34 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 35 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 36 | state_dict_ = checkpoint['state_dict'] 37 | state_dict = {} 38 | 39 | # convert data_parallal to model 40 | for k in state_dict_: 41 | if k.startswith('module') and not k.startswith('module_list'): 42 | state_dict[k[7:]] = state_dict_[k] 43 | else: 44 | state_dict[k] = state_dict_[k] 45 | model_state_dict = model.state_dict() 46 | 47 | # check loaded parameters and created model parameters 48 | msg = 'If you see this, your model does not fully load the ' + \ 49 | 'pre-trained weight. Please make sure ' + \ 50 | 'you have correctly specified --arch xxx ' + \ 51 | 'or set the correct --num_classes for your own dataset.' 52 | for k in state_dict: 53 | if k in model_state_dict: 54 | if state_dict[k].shape != model_state_dict[k].shape: 55 | print('Skip loading parameter {}, required shape{}, '\ 56 | 'loaded shape{}. {}'.format( 57 | k, model_state_dict[k].shape, state_dict[k].shape, msg)) 58 | state_dict[k] = model_state_dict[k] 59 | else: 60 | print('Drop parameter {}.'.format(k) + msg) 61 | for k in model_state_dict: 62 | if not (k in state_dict): 63 | print('No param {}.'.format(k) + msg) 64 | state_dict[k] = model_state_dict[k] 65 | model.load_state_dict(state_dict, strict=False) 66 | 67 | # resume optimizer parameters 68 | if optimizer is not None and resume: 69 | if 'optimizer' in checkpoint: 70 | optimizer.load_state_dict(checkpoint['optimizer']) 71 | start_epoch = checkpoint['epoch'] 72 | start_lr = lr 73 | for step in lr_step: 74 | if start_epoch >= step: 75 | start_lr *= 0.1 76 | for param_group in optimizer.param_groups: 77 | param_group['lr'] = start_lr 78 | print('Resumed optimizer with start lr', start_lr) 79 | else: 80 | print('No optimizer parameters in checkpoint.') 81 | if optimizer is not None: 82 | return model, optimizer, start_epoch 83 | else: 84 | return model 85 | 86 | def save_model(path, epoch, model, optimizer=None): 87 | if isinstance(model, torch.nn.DataParallel): 88 | state_dict = model.module.state_dict() 89 | else: 90 | state_dict = model.state_dict() 91 | data = {'epoch': epoch, 92 | 'state_dict': state_dict} 93 | if not (optimizer is None): 94 | data['optimizer'] = optimizer.state_dict() 95 | torch.save(data, path) 96 | 97 | -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/det_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | def _sigmoid(x): 9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 10 | return y 11 | 12 | def _gather_feat(feat, ind, mask=None): 13 | dim = feat.size(2) 14 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 15 | feat = feat.gather(1, ind) 16 | if mask is not None: 17 | mask = mask.unsqueeze(2).expand_as(feat) 18 | feat = feat[mask] 19 | feat = feat.view(-1, dim) 20 | return feat 21 | 22 | def _transpose_and_gather_feat(feat, ind): 23 | feat = feat.permute(0, 2, 3, 1).contiguous() 24 | feat = feat.view(feat.size(0), -1, feat.size(3)) 25 | feat = _gather_feat(feat, ind) 26 | return feat 27 | 28 | def flip_tensor(x): 29 | return torch.flip(x, [3]) 30 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy() 31 | # return torch.from_numpy(tmp).to(x.device) 32 | 33 | def flip_lr(x, flip_idx): 34 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 35 | shape = tmp.shape 36 | for e in flip_idx: 37 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 38 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 39 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 40 | 41 | def flip_lr_off(x, flip_idx): 42 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 43 | shape = tmp.shape 44 | tmp = tmp.reshape(tmp.shape[0], 17, 2, 45 | tmp.shape[2], tmp.shape[3]) 46 | tmp[:, :, 0, :, :] *= -1 47 | for e in flip_idx: 48 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 49 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 50 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/.gitignore: -------------------------------------------------------------------------------- 1 | DCNv2/build 2 | -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/DCNv2/.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .idea 3 | *.so 4 | *.o 5 | *pyc 6 | _ext 7 | build 8 | DCNv2.egg-info 9 | dist -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcn_v2 import * 2 | -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.h: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 4 | * 5 | * COPYRIGHT 6 | * 7 | * All contributions by the University of California: 8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 9 | * All rights reserved. 10 | * 11 | * All other contributions: 12 | * Copyright (c) 2014-2017, the respective contributors 13 | * All rights reserved. 14 | * 15 | * Caffe uses a shared copyright model: each contributor holds copyright over 16 | * their contributions to Caffe. The project versioning records all such 17 | * contribution and copyright details. If a contributor wants to further mark 18 | * their specific copyright on a particular contribution, they should indicate 19 | * their copyright solely in the commit message of the change when it is 20 | * committed. 21 | * 22 | * LICENSE 23 | * 24 | * Redistribution and use in source and binary forms, with or without 25 | * modification, are permitted provided that the following conditions are met: 26 | * 27 | * 1. Redistributions of source code must retain the above copyright notice, this 28 | * list of conditions and the following disclaimer. 29 | * 2. Redistributions in binary form must reproduce the above copyright notice, 30 | * this list of conditions and the following disclaimer in the documentation 31 | * and/or other materials provided with the distribution. 32 | * 33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | * 44 | * CONTRIBUTION AGREEMENT 45 | * 46 | * By contributing to the BVLC/caffe repository through pull-request, comment, 47 | * or otherwise, the contributor releases their content to the 48 | * license and copyright terms herein. 49 | * 50 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 51 | * 52 | * Copyright (c) 2018 Microsoft 53 | * Licensed under The MIT License [see LICENSE for details] 54 | * \file modulated_deformable_im2col.h 55 | * \brief Function definitions of converting an image to 56 | * column matrix based on kernel, padding, dilation, and offset. 57 | * These functions are mainly used in deformable convolution operators. 58 | * \ref: https://arxiv.org/abs/1811.11168 59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 60 | */ 61 | 62 | /***************** Adapted by Charles Shang *********************/ 63 | // modified from the CUDA version for CPU use by Daniel K. Suhendro 64 | 65 | #ifndef DCN_V2_IM2COL_CPU 66 | #define DCN_V2_IM2COL_CPU 67 | 68 | #ifdef __cplusplus 69 | extern "C" 70 | { 71 | #endif 72 | 73 | void modulated_deformable_im2col_cpu(const float *data_im, const float *data_offset, const float *data_mask, 74 | const int batch_size, const int channels, const int height_im, const int width_im, 75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 77 | const int dilation_h, const int dilation_w, 78 | const int deformable_group, float *data_col); 79 | 80 | void modulated_deformable_col2im_cpu(const float *data_col, const float *data_offset, const float *data_mask, 81 | const int batch_size, const int channels, const int height_im, const int width_im, 82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 84 | const int dilation_h, const int dilation_w, 85 | const int deformable_group, float *grad_im); 86 | 87 | void modulated_deformable_col2im_coord_cpu(const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 88 | const int batch_size, const int channels, const int height_im, const int width_im, 89 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 90 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 91 | const int dilation_h, const int dilation_w, 92 | const int deformable_group, 93 | float *grad_offset, float *grad_mask); 94 | 95 | #ifdef __cplusplus 96 | } 97 | #endif 98 | 99 | #endif -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/cpu/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cpu_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cpu_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/cuda/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cuda_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cuda_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/vision.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "dcn_v2.h" 3 | 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward"); 6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward"); 7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward"); 8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward"); 9 | } 10 | -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/DCNv2/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Charles Shang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/DCNv2/README.md: -------------------------------------------------------------------------------- 1 | ## Deformable Convolutional Networks V2 with Pytorch 1.X 2 | 3 | ### Build 4 | ```bash 5 | ./make.sh # build 6 | python testcpu.py # run examples and gradient check on cpu 7 | python testcuda.py # run examples and gradient check on gpu 8 | ``` 9 | ### Note 10 | Now the master branch is for pytorch 1.x, you can switch back to pytorch 0.4 with, 11 | ```bash 12 | git checkout pytorch_0.4 13 | ``` 14 | 15 | ### Known Issues: 16 | 17 | - [x] Gradient check w.r.t offset (solved) 18 | - [ ] Backward is not reentrant (minor) 19 | 20 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op). 21 | 22 | Update: all gradient check passes with **double** precision. 23 | 24 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 25 | float `<1e-15` for double), 26 | so it may not be a serious problem (?) 27 | 28 | Please post an issue or PR if you have any comments. 29 | -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/DCNv2/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python setup.py build develop 3 | -------------------------------------------------------------------------------- /preprocess/third_party/vcnplus/models/networks/DCNv2/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import glob 5 | 6 | import torch 7 | 8 | from torch.utils.cpp_extension import CUDA_HOME 9 | from torch.utils.cpp_extension import CppExtension 10 | from torch.utils.cpp_extension import CUDAExtension 11 | 12 | from setuptools import find_packages 13 | from setuptools import setup 14 | 15 | requirements = ["torch", "torchvision"] 16 | 17 | 18 | def get_extensions(): 19 | this_dir = os.path.dirname(os.path.abspath(__file__)) 20 | extensions_dir = os.path.join(this_dir, "DCN", "src") 21 | 22 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 23 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 24 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 25 | 26 | #os.environ["CC"] = "g++" 27 | sources = main_file + source_cpu 28 | extension = CppExtension 29 | extra_compile_args = {'cxx': ['-std=c++14']} 30 | define_macros = [] 31 | 32 | 33 | #if torch.cuda.is_available() and CUDA_HOME is not None: 34 | if torch.cuda.is_available(): 35 | extension = CUDAExtension 36 | sources += source_cuda 37 | define_macros += [("WITH_CUDA", None)] 38 | extra_compile_args["nvcc"] = [ 39 | "-DCUDA_HAS_FP16=1", 40 | "-D__CUDA_NO_HALF_OPERATORS__", 41 | "-D__CUDA_NO_HALF_CONVERSIONS__", 42 | "-D__CUDA_NO_HALF2_OPERATORS__", 43 | ] 44 | else: 45 | #raise NotImplementedError('Cuda is not available') 46 | pass 47 | 48 | 49 | sources = [os.path.join(extensions_dir, s) for s in sources] 50 | include_dirs = [extensions_dir] 51 | ext_modules = [ 52 | extension( 53 | "_ext", 54 | sources, 55 | include_dirs=include_dirs, 56 | define_macros=define_macros, 57 | extra_compile_args=extra_compile_args, 58 | ) 59 | ] 60 | return ext_modules 61 | 62 | setup( 63 | name="DCNv2", 64 | version="0.1", 65 | author="charlesshang", 66 | url="https://github.com/charlesshang/DCNv2", 67 | description="deformable convolutional networks", 68 | packages=find_packages(exclude=("configs", "tests",)), 69 | # install_requires=requirements, 70 | ext_modules=get_extensions(), 71 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 72 | ) 73 | -------------------------------------------------------------------------------- /preprocess/third_party/viewpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/third_party/viewpoint/__init__.py -------------------------------------------------------------------------------- /preprocess/third_party/viewpoint/configs/cse/Base-DensePose-RCNN-FPN-Human.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-DensePose-RCNN-FPN.yaml" 2 | MODEL: 3 | ROI_DENSEPOSE_HEAD: 4 | CSE: 5 | EMBEDDERS: 6 | "smpl_27554": 7 | TYPE: vertex_feature 8 | NUM_VERTICES: 27554 9 | FEATURE_DIM: 256 10 | FEATURES_TRAINABLE: False 11 | IS_TRAINABLE: True 12 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_smpl_27554_256.pkl" 13 | DATASETS: 14 | TRAIN: 15 | - "densepose_coco_2014_train_cse" 16 | - "densepose_coco_2014_valminusminival_cse" 17 | TEST: 18 | - "densepose_coco_2014_minival_cse" 19 | CLASS_TO_MESH_NAME_MAPPING: 20 | "0": "smpl_27554" 21 | -------------------------------------------------------------------------------- /preprocess/third_party/viewpoint/configs/cse/Base-DensePose-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | VERSION: 2 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN" 4 | BACKBONE: 5 | NAME: "build_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 10 | ANCHOR_GENERATOR: 11 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 12 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 13 | RPN: 14 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 15 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 16 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 17 | # Detectron1 uses 2000 proposals per-batch, 18 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 19 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 20 | POST_NMS_TOPK_TRAIN: 1000 21 | POST_NMS_TOPK_TEST: 1000 22 | 23 | DENSEPOSE_ON: True 24 | ROI_HEADS: 25 | NAME: "DensePoseROIHeads" 26 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 27 | NUM_CLASSES: 1 28 | ROI_BOX_HEAD: 29 | NAME: "FastRCNNConvFCHead" 30 | NUM_FC: 2 31 | POOLER_RESOLUTION: 7 32 | POOLER_SAMPLING_RATIO: 2 33 | POOLER_TYPE: "ROIAlign" 34 | ROI_DENSEPOSE_HEAD: 35 | NAME: "DensePoseV1ConvXHead" 36 | POOLER_TYPE: "ROIAlign" 37 | NUM_COARSE_SEGM_CHANNELS: 2 38 | PREDICTOR_NAME: "DensePoseEmbeddingPredictor" 39 | LOSS_NAME: "DensePoseCseLoss" 40 | CSE: 41 | # embedding loss, possible values: 42 | # - "EmbeddingLoss" 43 | # - "SoftEmbeddingLoss" 44 | EMBED_LOSS_NAME: "EmbeddingLoss" 45 | SOLVER: 46 | IMS_PER_BATCH: 16 47 | BASE_LR: 0.01 48 | STEPS: (60000, 80000) 49 | MAX_ITER: 90000 50 | WARMUP_FACTOR: 0.1 51 | CLIP_GRADIENTS: 52 | CLIP_TYPE: norm 53 | CLIP_VALUE: 1.0 54 | ENABLED: true 55 | NORM_TYPE: 2.0 56 | INPUT: 57 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 58 | DENSEPOSE_EVALUATION: 59 | TYPE: cse 60 | STORAGE: file 61 | -------------------------------------------------------------------------------- /preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-DensePose-RCNN-FPN-Human.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | ROI_DENSEPOSE_HEAD: 7 | NAME: "DensePoseDeepLabHead" 8 | CSE: 9 | EMBED_LOSS_NAME: "SoftEmbeddingLoss" 10 | SOLVER: 11 | MAX_ITER: 130000 12 | STEPS: (100000, 120000) 13 | -------------------------------------------------------------------------------- /preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-DensePose-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_s1x/250533982/model_final_2c4512.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | ROI_HEADS: 7 | NUM_CLASSES: 1 8 | ROI_DENSEPOSE_HEAD: 9 | NAME: "DensePoseV1ConvXHead" 10 | COARSE_SEGM_TRAINED_BY_MASKS: True 11 | CSE: 12 | EMBED_LOSS_NAME: "SoftEmbeddingLoss" 13 | EMBEDDING_DIST_GAUSS_SIGMA: 0.1 14 | GEODESIC_DIST_GAUSS_SIGMA: 0.1 15 | EMBEDDERS: 16 | "cat_5001": 17 | TYPE: vertex_feature 18 | NUM_VERTICES: 5001 19 | FEATURE_DIM: 256 20 | FEATURES_TRAINABLE: False 21 | IS_TRAINABLE: True 22 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_5001_256.pkl" 23 | "dog_5002": 24 | TYPE: vertex_feature 25 | NUM_VERTICES: 5002 26 | FEATURE_DIM: 256 27 | FEATURES_TRAINABLE: False 28 | IS_TRAINABLE: True 29 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_5002_256.pkl" 30 | "sheep_5004": 31 | TYPE: vertex_feature 32 | NUM_VERTICES: 5004 33 | FEATURE_DIM: 256 34 | FEATURES_TRAINABLE: False 35 | IS_TRAINABLE: True 36 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl" 37 | "horse_5004": 38 | TYPE: vertex_feature 39 | NUM_VERTICES: 5004 40 | FEATURE_DIM: 256 41 | FEATURES_TRAINABLE: False 42 | IS_TRAINABLE: True 43 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl" 44 | "zebra_5002": 45 | TYPE: vertex_feature 46 | NUM_VERTICES: 5002 47 | FEATURE_DIM: 256 48 | FEATURES_TRAINABLE: False 49 | IS_TRAINABLE: True 50 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl" 51 | "giraffe_5002": 52 | TYPE: vertex_feature 53 | NUM_VERTICES: 5002 54 | FEATURE_DIM: 256 55 | FEATURES_TRAINABLE: False 56 | IS_TRAINABLE: True 57 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl" 58 | "elephant_5002": 59 | TYPE: vertex_feature 60 | NUM_VERTICES: 5002 61 | FEATURE_DIM: 256 62 | FEATURES_TRAINABLE: False 63 | IS_TRAINABLE: True 64 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl" 65 | "cow_5002": 66 | TYPE: vertex_feature 67 | NUM_VERTICES: 5002 68 | FEATURE_DIM: 256 69 | FEATURES_TRAINABLE: False 70 | IS_TRAINABLE: True 71 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl" 72 | "bear_4936": 73 | TYPE: vertex_feature 74 | NUM_VERTICES: 4936 75 | FEATURE_DIM: 256 76 | FEATURES_TRAINABLE: False 77 | IS_TRAINABLE: True 78 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl" 79 | DATASETS: 80 | TRAIN: 81 | - "densepose_lvis_v1_ds1_train_v1" 82 | TEST: 83 | - "densepose_lvis_v1_ds1_val_v1" 84 | WHITELISTED_CATEGORIES: 85 | "densepose_lvis_v1_ds1_train_v1": 86 | - 943 # sheep 87 | - 1202 # zebra 88 | - 569 # horse 89 | - 496 # giraffe 90 | - 422 # elephant 91 | - 80 # cow 92 | - 76 # bear 93 | - 225 # cat 94 | - 378 # dog 95 | "densepose_lvis_v1_ds1_val_v1": 96 | - 943 # sheep 97 | - 1202 # zebra 98 | - 569 # horse 99 | - 496 # giraffe 100 | - 422 # elephant 101 | - 80 # cow 102 | - 76 # bear 103 | - 225 # cat 104 | - 378 # dog 105 | CATEGORY_MAPS: 106 | "densepose_lvis_v1_ds1_train_v1": 107 | "1202": 943 # zebra -> sheep 108 | "569": 943 # horse -> sheep 109 | "496": 943 # giraffe -> sheep 110 | "422": 943 # elephant -> sheep 111 | "80": 943 # cow -> sheep 112 | "76": 943 # bear -> sheep 113 | "225": 943 # cat -> sheep 114 | "378": 943 # dog -> sheep 115 | "densepose_lvis_v1_ds1_val_v1": 116 | "1202": 943 # zebra -> sheep 117 | "569": 943 # horse -> sheep 118 | "496": 943 # giraffe -> sheep 119 | "422": 943 # elephant -> sheep 120 | "80": 943 # cow -> sheep 121 | "76": 943 # bear -> sheep 122 | "225": 943 # cat -> sheep 123 | "378": 943 # dog -> sheep 124 | CLASS_TO_MESH_NAME_MAPPING: 125 | # Note: different classes are mapped to a single class 126 | # mesh is chosen based on GT data, so this is just some 127 | # value which has no particular meaning 128 | "0": "sheep_5004" 129 | SOLVER: 130 | MAX_ITER: 4000 131 | STEPS: (3000, 3500) 132 | DENSEPOSE_EVALUATION: 133 | EVALUATE_MESH_ALIGNMENT: True 134 | -------------------------------------------------------------------------------- /preprocess/third_party/viewpoint/cselib.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. 3 | import cv2 4 | import numpy as np 5 | import torch 6 | import torch.nn.functional as F 7 | import pdb 8 | 9 | from detectron2.config import get_cfg 10 | from detectron2.modeling import build_model 11 | from detectron2.checkpoint import DetectionCheckpointer 12 | from detectron2.structures import Boxes as create_boxes 13 | 14 | import sys 15 | 16 | sys.path.insert(0, "preprocess/third_party/detectron2/projects/DensePose/") 17 | from densepose import add_densepose_config 18 | 19 | 20 | # load model 21 | def create_cse(is_human): 22 | if is_human: 23 | dp_config_path = "preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml" 24 | dp_weight_path = "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x/250713061/model_final_1d3314.pkl" 25 | else: 26 | dp_config_path = "preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml" 27 | dp_weight_path = "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k/253498611/model_final_6d69b7.pkl" 28 | 29 | cfg = get_cfg() 30 | add_densepose_config(cfg) 31 | cfg.merge_from_file(dp_config_path) 32 | cfg.MODEL.WEIGHTS = dp_weight_path 33 | model = build_model(cfg) # returns a torch.nn.Module 34 | DetectionCheckpointer(model).load( 35 | cfg.MODEL.WEIGHTS 36 | ) # load a file, usually from cfg.MODEL.WEIGHTS 37 | return model 38 | 39 | 40 | def preprocess_image(image, mask): 41 | h, w, _ = image.shape 42 | 43 | # resize 44 | max_size = 1333 45 | if h > w: 46 | h_rszd, w_rszd = max_size, max_size * w // h 47 | else: 48 | h_rszd, w_rszd = max_size * h // w, max_size 49 | image = cv2.resize(image, (w_rszd, h_rszd)) 50 | mask = cv2.resize(mask.astype(float), (w_rszd, h_rszd)).astype(np.uint8) 51 | 52 | # pad 53 | h_pad = (1 + h_rszd // 32) * 32 54 | w_pad = (1 + w_rszd // 32) * 32 55 | image_tmp = np.zeros((h_pad, w_pad, 3)).astype(np.uint8) 56 | mask_tmp = np.zeros((h_pad, w_pad)).astype(np.uint8) 57 | image_tmp[:h_rszd, :w_rszd] = image 58 | mask_tmp[:h_rszd, :w_rszd] = mask 59 | image = image_tmp 60 | mask = mask_tmp 61 | 62 | # preprocess image and box 63 | indices = np.where(mask > 0) 64 | xid = indices[1] 65 | yid = indices[0] 66 | center = ((xid.max() + xid.min()) // 2, (yid.max() + yid.min()) // 2) 67 | length = ( 68 | int((xid.max() - xid.min()) * 1.0 // 2), 69 | int((yid.max() - yid.min()) * 1.0 // 2), 70 | ) 71 | bbox = [center[0] - length[0], center[1] - length[1], length[0] * 2, length[1] * 2] 72 | bbox = [ 73 | max(0, bbox[0]), 74 | max(0, bbox[1]), 75 | min(w_pad, bbox[0] + bbox[2]), 76 | min(h_pad, bbox[1] + bbox[3]), 77 | ] 78 | bbox_raw = bbox.copy() # bbox in the raw image coordinate 79 | bbox_raw[0] *= w / w_rszd 80 | bbox_raw[2] *= w / w_rszd 81 | bbox_raw[1] *= h / h_rszd 82 | bbox_raw[3] *= h / h_rszd 83 | return image, mask, bbox, bbox_raw 84 | 85 | 86 | def run_cse(model, image, mask): 87 | image, mask, bbox, bbox_raw = preprocess_image(image, mask) 88 | 89 | image = torch.Tensor(image).cuda().permute(2, 0, 1)[None] 90 | image = torch.stack([(x - model.pixel_mean) / model.pixel_std for x in image]) 91 | pred_boxes = torch.Tensor([bbox]).cuda() 92 | pred_boxes = create_boxes(pred_boxes) 93 | 94 | # inference 95 | model.eval() 96 | with torch.no_grad(): 97 | features = model.backbone(image) 98 | features = [features[f] for f in model.roi_heads.in_features] 99 | features = [model.roi_heads.decoder(features)] 100 | features_dp = model.roi_heads.densepose_pooler(features, [pred_boxes]) 101 | densepose_head_outputs = model.roi_heads.densepose_head(features_dp) 102 | densepose_predictor_outputs = model.roi_heads.densepose_predictor( 103 | densepose_head_outputs 104 | ) 105 | coarse_segm_resized = densepose_predictor_outputs.coarse_segm[0] 106 | embedding_resized = densepose_predictor_outputs.embedding[0] 107 | 108 | # use input mask 109 | x, y, xx, yy = bbox 110 | mask_box = mask[y:yy, x:xx] 111 | mask_box = torch.Tensor(mask_box).cuda()[None, None] 112 | mask_box = ( 113 | F.interpolate(mask_box, coarse_segm_resized.shape[1:3], mode="bilinear")[0, 0] 114 | > 0 115 | ) 116 | 117 | # output embedding 118 | embedding = embedding_resized # size does not matter for a image code 119 | embedding = embedding * mask_box.float()[None] 120 | 121 | # output dp2raw 122 | bbox_raw = np.asarray(bbox_raw) 123 | dp2raw = np.concatenate( 124 | [(bbox_raw[2:] - bbox_raw[:2]) / embedding.shape[1], bbox_raw[:2]] 125 | ) 126 | return embedding, dp2raw 127 | -------------------------------------------------------------------------------- /scripts/create_collage.py: -------------------------------------------------------------------------------- 1 | # python scripts/create_collage.py --testdir logdir/penguin-fg-skel-b120/ --prefix renderings_0002 2 | 3 | from moviepy.editor import clips_array, VideoFileClip, vfx 4 | import sys, os 5 | import numpy as np 6 | import pdb 7 | import glob 8 | import argparse 9 | import itertools 10 | 11 | parser = argparse.ArgumentParser(description="combine results into a collage") 12 | parser.add_argument("--testdir", default="", help="path to test dir") 13 | parser.add_argument( 14 | "--prefix", default="renderings_ref_", type=str, help="what data to combine" 15 | ) 16 | args = parser.parse_args() 17 | 18 | 19 | def main(): 20 | save_path = "%s/collage.mp4" % args.testdir 21 | 22 | video_list = [] 23 | for sub_seq in sorted(glob.glob("%s/%s*" % (args.testdir, args.prefix))): 24 | path_list = [] 25 | path_list.append("%s/ref/ref_rgb.mp4" % sub_seq) 26 | path_list.append("%s/ref/rgb.mp4" % sub_seq) 27 | path_list.append("%s/ref/xyz.mp4" % sub_seq) 28 | path_list.append("%s/rot-0-360/rgb.mp4" % sub_seq) 29 | path_list.append("%s/rot-0-360/xyz.mp4" % sub_seq) 30 | 31 | # make sure these exist 32 | if np.sum([os.path.exists(path) for path in path_list]) == len(path_list): 33 | print("found %s" % sub_seq) 34 | video_list.append([VideoFileClip(path) for path in path_list]) 35 | 36 | if len(video_list) == 0: 37 | print("no video found") 38 | return 39 | 40 | # align in time 41 | max_duration = max( 42 | [clip.duration for clip in list(itertools.chain.from_iterable(video_list))] 43 | ) 44 | for i, clip_list in enumerate(video_list): 45 | for j, clip in enumerate(clip_list): 46 | video_list[i][j] = clip.resize(width=512).fx( 47 | vfx.freeze, t="end", total_duration=max_duration, padding_end=0.5 48 | ) 49 | 50 | final_clip = clips_array(video_list) 51 | final_clip.write_videofile(save_path) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /scripts/download_unzip.sh: -------------------------------------------------------------------------------- 1 | # bash scripts/download_unzip.sh "$url" 2 | url=$1 3 | rootdir=$PWD 4 | 5 | filename=tmp-`date +"%Y-%m-%d-%H-%M-%S"`.zip 6 | wget $url -O $filename 7 | unzip $filename 8 | rm $filename 9 | -------------------------------------------------------------------------------- /scripts/install-deps.sh: -------------------------------------------------------------------------------- 1 | mim install mmcv 2 | 3 | (cd lab4d/third_party/quaternion && CUDA_HOME=$CONDA_PREFIX pip install .) 4 | 5 | mkdir ./preprocess/third_party/Track-Anything/checkpoints; wget "https://www.dropbox.com/scl/fi/o86gx6zn27b494m937n2i/E2FGVI-HQ-CVPR22.pth?rlkey=j15ue65ryy8jb1mvn2htf0jtk&st=t4zyl5jk&dl=0" -O ./preprocess/third_party/Track-Anything/checkpoints/E2FGVI-HQ-CVPR22.pth 6 | 7 | wget https://www.dropbox.com/s/bgsodsnnbxdoza3/vcn_rob.pth -O ./preprocess/third_party/vcnplus/vcn_rob.pth 8 | 9 | wget https://www.dropbox.com/s/51cjzo8zgz966t5/human.pth -O preprocess/third_party/viewpoint/human.pth 10 | 11 | wget https://www.dropbox.com/s/1464pg6c9ce8rve/quad.pth -O preprocess/third_party/viewpoint/quad.pth 12 | -------------------------------------------------------------------------------- /scripts/render_intermediate.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University. 2 | # python scripts/render_intermediate.py --testdir logdir/human-48-category-comp/ 3 | import sys, os 4 | import pdb 5 | 6 | os.environ["PYOPENGL_PLATFORM"] = "egl" # opengl seems to only work with TPU 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 8 | 9 | import glob 10 | import numpy as np 11 | import cv2 12 | import argparse 13 | import trimesh 14 | import pyrender 15 | from pyrender import IntrinsicsCamera, Mesh, Node, Scene, OffscreenRenderer 16 | import matplotlib 17 | import tqdm 18 | 19 | from lab4d.utils.io import save_vid 20 | 21 | cmap = matplotlib.colormaps.get_cmap("cool") 22 | 23 | parser = argparse.ArgumentParser(description="script to render cameras over epochs") 24 | parser.add_argument("--testdir", default="", help="path to test dir") 25 | parser.add_argument( 26 | "--data_class", default="fg", type=str, help="which data to render, {fg, bg}" 27 | ) 28 | args = parser.parse_args() 29 | 30 | img_size = 1024 31 | 32 | # renderer 33 | r = OffscreenRenderer(img_size, img_size) 34 | cam = IntrinsicsCamera(img_size, img_size, img_size / 2, img_size / 2) 35 | # light 36 | direc_l = pyrender.DirectionalLight(color=np.ones(3), intensity=3.0) 37 | light_pose = np.asarray( 38 | [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=float 39 | ) 40 | # cv to gl coords 41 | cam_pose = -np.eye(4) 42 | cam_pose[0, 0] = 1 43 | cam_pose[-1, -1] = 1 44 | rtmat = np.eye(4) 45 | # object to camera transforms 46 | rtmat[:3, :3] = cv2.Rodrigues(np.asarray([np.pi / 2, 0, 0]))[0] # bev 47 | 48 | 49 | def main(): 50 | # io 51 | path_list = [ 52 | i for i in glob.glob("%s/*-%s-proxy.obj" % (args.testdir, args.data_class)) 53 | ] 54 | if len(path_list) == 0: 55 | print("no mesh found in %s for %s" % (args.testdir, args.data_class)) 56 | return 57 | path_list = sorted(path_list, key=lambda x: int(x.split("/")[-1].split("-")[0])) 58 | outdir = "%s/renderings_proxy" % args.testdir 59 | os.makedirs(outdir, exist_ok=True) 60 | 61 | mesh_dict = {} 62 | aabb_min = np.asarray([np.inf, np.inf, np.inf]) 63 | aabb_max = np.asarray([-np.inf, -np.inf, -np.inf]) 64 | for mesh_path in path_list: 65 | batch_idx = int(mesh_path.split("/")[-1].split("-")[0]) 66 | mesh_obj = trimesh.load(mesh_path) 67 | mesh_dict[batch_idx] = mesh_obj 68 | 69 | # update aabb 70 | aabb_min = np.minimum(aabb_min, mesh_obj.bounds[0]) 71 | aabb_max = np.maximum(aabb_max, mesh_obj.bounds[1]) 72 | 73 | # set camera translation 74 | rtmat[2, 3] = max(aabb_max - aabb_min) * 1.2 75 | 76 | # render 77 | frames = [] 78 | for batch_idx, mesh_obj in tqdm.tqdm(mesh_dict.items()): 79 | scene = Scene(ambient_light=0.4 * np.asarray([1.0, 1.0, 1.0, 1.0])) 80 | 81 | # add object / camera 82 | mesh_obj.apply_transform(rtmat) 83 | scene.add_node(Node(mesh=Mesh.from_trimesh(mesh_obj))) 84 | 85 | # camera 86 | scene.add(cam, pose=cam_pose) 87 | 88 | # light 89 | scene.add(direc_l, pose=light_pose) 90 | 91 | # render 92 | color, depth = r.render( 93 | scene, 94 | flags=pyrender.RenderFlags.SHADOWS_DIRECTIONAL 95 | | pyrender.RenderFlags.SKIP_CULL_FACES, 96 | ) 97 | # add text 98 | color = color.astype(np.uint8) 99 | color = cv2.putText( 100 | color, 101 | "batch: %02d" % batch_idx, 102 | (30, 50), 103 | cv2.FONT_HERSHEY_SIMPLEX, 104 | 2, 105 | (256, 0, 0), 106 | 2, 107 | ) 108 | frames.append(color) 109 | 110 | save_vid("%s/fg" % outdir, frames, suffix=".mp4", upsample_frame=-1) 111 | print("saved to %s/fg.mp4" % outdir) 112 | 113 | 114 | if __name__ == "__main__": 115 | main() 116 | -------------------------------------------------------------------------------- /scripts/run_crop_all.py: -------------------------------------------------------------------------------- 1 | # WIP by Gengshan Yang 2 | # TODO: use config file to go over seqs 3 | # python scripts/run_crop_all.py cat-pikachu 4 | import os 5 | import sys 6 | import glob 7 | import multiprocessing 8 | from functools import partial 9 | 10 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 11 | from preprocess.scripts.crop import extract_crop 12 | 13 | os.environ["OMP_NUM_THREADS"] = "1" 14 | 15 | vidname = sys.argv[1] 16 | path = ( 17 | "database/processed/JPEGImages/Full-Resolution/%s*" % vidname 18 | ) # path to the images 19 | 20 | 21 | def process_seqname(seqname, size, region): 22 | extract_crop(seqname, size, region) 23 | 24 | 25 | if __name__ == "__main__": 26 | pool = multiprocessing.Pool(processes=32) # use up to 32 processes 27 | 28 | for seqname in sorted(glob.glob(path)): 29 | seqname = seqname.split("/")[-1] 30 | # we'll use a partial function to bind the common arguments 31 | func = partial(process_seqname, seqname, 256) 32 | pool.apply_async(func, args=(0,)) 33 | pool.apply_async(func, args=(1,)) 34 | 35 | pool.close() 36 | pool.join() # wait for all processes to finish 37 | -------------------------------------------------------------------------------- /scripts/run_rendering_parallel.py: -------------------------------------------------------------------------------- 1 | # WIP by Gengshan Yang 2 | # generate three visualizations (reference view, bev, turntable) rendering, mesh export in parallel 3 | # python scripts/run_rendering_parallel.py logdir/dog-98-category-comp/opts.log 0-2 0,1,2 4 | import sys 5 | import subprocess 6 | 7 | # Set the flagfile. 8 | flagfile = sys.argv[1] 9 | 10 | # Set the range of inst_ids. 11 | start_inst_id, end_inst_id = map(int, sys.argv[2].split("-")) 12 | id_list = list(range(start_inst_id, end_inst_id + 1)) 13 | 14 | # Set the devices id 15 | dev_list = sys.argv[3].split(",") 16 | dev_list = list(map(int, dev_list)) 17 | num_devices = len(dev_list) 18 | id_per_device = len(id_list) // num_devices 19 | 20 | print( 21 | "rendering videos", 22 | id_list, 23 | "on devices", 24 | dev_list, 25 | ) 26 | 27 | # render proxy over rounds 28 | logdir = flagfile.rsplit("/", 1)[0] 29 | subprocess.Popen( 30 | f"python scripts/render_intermediate.py --testdir {logdir}/", shell=True 31 | ) 32 | 33 | # Loop over each device. 34 | for dev_id, device in enumerate(dev_list): 35 | # Initialize an empty command list for this device. 36 | command_for_device = [] 37 | 38 | # Loop over the inst_ids assigned to this device. 39 | if dev_id == num_devices - 1: 40 | assigned_ids = id_list[dev_id * id_per_device :] 41 | else: 42 | assigned_ids = id_list[dev_id * id_per_device : (dev_id + 1) * id_per_device] 43 | for inst_id in assigned_ids: 44 | # Add the command for this inst_id to the device's command list. 45 | command_for_device.append( 46 | f"CUDA_VISIBLE_DEVICES={device} python lab4d/render.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id} --render_res 256 --data_prefix full" 47 | ) 48 | command_for_device.append( 49 | f"CUDA_VISIBLE_DEVICES={device} python lab4d/render.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id} --render_res 256 --viewpoint rot-0-360" 50 | ) 51 | # command_for_device.append( 52 | # f"CUDA_VISIBLE_DEVICES={device} python lab4d/render.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id} --render_res 256 --viewpoint bev-90" 53 | # ) 54 | # command_for_device.append( 55 | # f"CUDA_VISIBLE_DEVICES={device} python lab4d/export.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id}" 56 | # ) 57 | 58 | # Add a delay between commands to avoid overloading the device. 59 | command_for_device.append("sleep 1") 60 | 61 | # Join all commands for this device into a single string. 62 | command_str = "; ".join(command_for_device) 63 | 64 | # Start a screen session for this device, executing the device's command string. 65 | subprocess.Popen( 66 | f'screen -S render-{device}-{",".join(str(i) for i in assigned_ids)} -d -m bash -c "{command_str}"', 67 | shell=True, 68 | ) 69 | -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | # bash scripts/train.sh lab4d/train.py 0 --seqname 2023-03-26-00-39-17-cat-pikachu 2 | main_func=$1 3 | dev=$2 4 | add_args=${*: 3:$#-1} 5 | 6 | ngpu=`echo $dev | awk -F '[\t,]' '{print NF-1}'` 7 | ngpu=$(($ngpu + 1 )) 8 | echo "using "$ngpu "gpus" 9 | 10 | # assign random port 11 | # https://github.com/pytorch/pytorch/issues/73320 12 | CUDA_VISIBLE_DEVICES=$dev torchrun \ 13 | --nproc_per_node $ngpu --nnodes 1 --rdzv_backend c10d --rdzv_endpoint localhost:0 \ 14 | $main_func \ 15 | --ngpu $ngpu \ 16 | $add_args 17 | -------------------------------------------------------------------------------- /scripts/zip_dataset.py: -------------------------------------------------------------------------------- 1 | # Description: Zip the dataset for easy sharing 2 | # Usage: python scripts/zip_dataset.py 3 | import configparser 4 | import os 5 | import sys 6 | 7 | cwd = os.getcwd() 8 | if cwd not in sys.path: 9 | sys.path.insert(0, cwd) 10 | 11 | from preprocess.libs.io import run_bash_command 12 | 13 | vidname = sys.argv[1] 14 | 15 | args = [] 16 | config = configparser.RawConfigParser() 17 | config.read("database/configs/%s.config" % vidname) 18 | for vidid in range(len(config.sections()) - 1): 19 | seqname = config.get("data_%d" % vidid, "img_path").strip("/").split("/")[-1] 20 | run_bash_command( 21 | f"zip {vidname}.zip -r database/processed/*/Full-Resolution/{seqname}" 22 | ) 23 | 24 | run_bash_command(f"zip {vidname}.zip database/configs/{vidname}.config") 25 | -------------------------------------------------------------------------------- /scripts/zip_logdir.py: -------------------------------------------------------------------------------- 1 | # Description: Zip the logdir for easy sharing 2 | # Usage: python scripts/zip_logdir 3 | import os 4 | import pdb 5 | import sys 6 | 7 | cwd = os.getcwd() 8 | if cwd not in sys.path: 9 | sys.path.insert(0, cwd) 10 | 11 | from preprocess.libs.io import run_bash_command 12 | 13 | logpath = sys.argv[1] 14 | 15 | logname = logpath.strip("/").split("/")[-1] 16 | print(logname) 17 | 18 | run_bash_command(f"zip log-{logname}.zip {logpath}/*") 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="lab4d", 5 | packages=find_packages(), 6 | ) 7 | --------------------------------------------------------------------------------