├── .github
├── ISSUE_TEMPLATE
│ ├── bug-issue-report.md
│ └── feature_request.md
└── workflows
│ └── static.yml
├── .gitignore
├── .gitmodules
├── .vscode
└── settings.json
├── CITATION.cff
├── LICENSE
├── README.md
├── browser
├── app.py
└── templates
│ └── index.html
├── database
├── mesh-templates
│ └── cat-pikachu-remeshed.obj
└── vid_data
│ ├── ama-bouncing-4v.txt
│ ├── ama-bouncing.txt
│ ├── ama-samba-4v.txt
│ ├── ama-samba.txt
│ ├── car-turnaround-2.txt
│ ├── car-turnaround.txt
│ ├── cat-85.txt
│ ├── cat-pikachu-0.txt
│ ├── cat-pikachu.txt
│ ├── dog-98.txt
│ ├── dog-robolounge.txt
│ ├── human-48.txt
│ ├── human-cap.txt
│ ├── room.txt
│ ├── shiba-haru.txt
│ ├── squirrel-baseball.txt
│ └── squirrel.txt
├── docs
├── .gitignore
├── Makefile
├── README.md
├── env_min.yml
├── make.bat
├── source
│ ├── _static
│ │ ├── images
│ │ │ ├── camera_annot.png
│ │ │ └── visflo-00081.jpg
│ │ ├── media_resized
│ │ │ ├── car-turnaround-2-anno.mp4
│ │ │ ├── car-turnaround-2-proxy.mp4
│ │ │ ├── car-turnaround-2.mp4
│ │ │ ├── car-turnaround-2_collage.mp4
│ │ │ ├── car-turnaround_bev-120-xyz.mp4
│ │ │ ├── car-turnaround_bev-120.mp4
│ │ │ ├── car-turnaround_bev.mp4
│ │ │ ├── car-turnaround_ref-xyz.mp4
│ │ │ ├── car-turnaround_ref.mp4
│ │ │ ├── car-turnaround_turntable-120-xyz.mp4
│ │ │ ├── car-turnaround_turntable-120.mp4
│ │ │ ├── car-turnaround_turntable.mp4
│ │ │ ├── cat-85-80_ref-xyz.mp4
│ │ │ ├── cat-85.mp4
│ │ │ ├── cat-pikachu-0-comp_bev-xyz.mp4
│ │ │ ├── cat-pikachu-0-comp_bev.mp4
│ │ │ ├── cat-pikachu-0-proxy.mp4
│ │ │ ├── cat-pikachu-0.mp4
│ │ │ ├── cat-pikachu-0_collage.mp4
│ │ │ ├── cat-pikachu-0_ref-xyz.mp4
│ │ │ ├── cat-pikachu-0_ref.mp4
│ │ │ ├── cat-pikachu-0_turntable-xyz.mp4
│ │ │ ├── cat-pikachu-0_turntable.mp4
│ │ │ ├── cat-pikachu-7.mp4
│ │ │ ├── cat-pikachu-7_collage.mp4
│ │ │ ├── cat-pikachu-8_ref-xyz.mp4
│ │ │ ├── cat-pikachu-8_ref.mp4
│ │ │ ├── cat-pikachu-8_turntable-120-xyz.mp4
│ │ │ ├── cat-pikachu-8_turntable-120.mp4
│ │ │ ├── comp_elev.mp4
│ │ │ ├── dog-98-0_ref-xyz.mp4
│ │ │ ├── dog-98.mp4
│ │ │ ├── dog-dualrig-fgbg000-xyz.mp4
│ │ │ ├── dog-dualrig-fgbg000.mp4
│ │ │ ├── dog-robolounge_collage.mp4
│ │ │ ├── finch.mp4
│ │ │ ├── finch_collage.mp4
│ │ │ ├── finch_ref-xyz.mp4
│ │ │ ├── human-48-0_ref-xyz.mp4
│ │ │ ├── human-48-0_ref.mp4
│ │ │ ├── human-48-0_turntable-120-xyz.mp4
│ │ │ ├── human-48-0_turntable-120.mp4
│ │ │ ├── human-48-reanimate-8-xyz.mp4
│ │ │ ├── human-48-reanimate-8.mp4
│ │ │ ├── human-48.mp4
│ │ │ ├── human-cap-3-xyz.mp4
│ │ │ ├── human-cap-3.mp4
│ │ │ ├── human-cap-3_collage.mp4
│ │ │ ├── penguin-1-xyz.mp4
│ │ │ ├── penguin-2_collage.mp4
│ │ │ ├── penguin.mp4
│ │ │ ├── shiba-haru-6.mp4
│ │ │ ├── shiba-haru-7-xyz.mp4
│ │ │ ├── shiba-haru-7.mp4
│ │ │ ├── shiba-haru-7_collage.mp4
│ │ │ ├── squirrel-xyz.mp4
│ │ │ ├── squirrel.mp4
│ │ │ └── squirrel_collage.mp4
│ │ └── meshes
│ │ │ ├── car-turnaround-2-canonical-prealign.obj
│ │ │ ├── car-turnaround-2-canonical.obj
│ │ │ ├── car-turnaround-2-mesh.obj
│ │ │ ├── car-turnaround-2-proxy.obj
│ │ │ ├── cat-pikachu-0-bone.obj
│ │ │ ├── cat-pikachu-0-mesh.obj
│ │ │ ├── cat-pikachu-0-proxy.obj
│ │ │ ├── cat-pikachu-bone.obj
│ │ │ ├── cat-pikachu-mesh.obj
│ │ │ ├── cat-pikachu-proxy.obj
│ │ │ ├── human-48-0-mesh-0000.obj
│ │ │ ├── human-48-0-mesh.obj
│ │ │ ├── human-48-bone.obj
│ │ │ └── human-48-proxy.obj
│ ├── api_docs
│ │ ├── index.rst
│ │ └── modules.rst
│ ├── conf.py
│ ├── data_models.rst
│ ├── get_started
│ │ └── index.rst
│ ├── index.rst
│ ├── obj2glb.py
│ ├── qa.rst
│ ├── resize_vids.py
│ └── tutorials
│ │ ├── arbitrary_video.rst
│ │ ├── category_model.rst
│ │ ├── index.rst
│ │ ├── multi_video_cat.rst
│ │ ├── preprocessing.rst
│ │ └── single_video_cat.rst
└── template
│ ├── module.rst_t
│ ├── package.rst_t
│ └── toc.rst_t
├── environment.yml
├── lab4d
├── __init__.py
├── config.py
├── config_omega.py
├── dataloader
│ ├── __init__.py
│ ├── data_utils.py
│ └── vidloader.py
├── engine
│ ├── __init__.py
│ ├── model.py
│ ├── train_utils.py
│ └── trainer.py
├── export.py
├── nnutils
│ ├── __init__.py
│ ├── appearance.py
│ ├── base.py
│ ├── deformable.py
│ ├── embedding.py
│ ├── feature.py
│ ├── intrinsics.py
│ ├── multifields.py
│ ├── nerf.py
│ ├── pose.py
│ ├── skinning.py
│ ├── time.py
│ ├── visibility.py
│ └── warping.py
├── reanimate.py
├── render.py
├── tests
│ ├── hat_map.py
│ ├── test_gpu_map.py
│ ├── test_ops.py
│ └── utils.py
├── third_party
│ ├── nvp.py
│ └── quaternion
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── add_gcc_cuda.sh
│ │ ├── backend.py
│ │ ├── mat3x3.py
│ │ ├── quaternion.py
│ │ ├── setup.py
│ │ └── src
│ │ ├── bindings.cpp
│ │ ├── matinv.cu
│ │ ├── matinv.h
│ │ ├── quaternion.cu
│ │ └── quaternion.h
├── train.py
└── utils
│ ├── __init__.py
│ ├── cam_utils.py
│ ├── camera_utils.py
│ ├── decorator.py
│ ├── geom_utils.py
│ ├── gpu_utils.py
│ ├── io.py
│ ├── loss_utils.py
│ ├── numpy_utils.py
│ ├── profile_utils.py
│ ├── quat_transform.py
│ ├── render_utils.py
│ ├── skel_utils.py
│ ├── torch_utils.py
│ ├── transforms.py
│ └── vis_utils.py
├── media
├── logo.png
└── teaser.gif
├── preprocess
├── __init__.py
├── libs
│ ├── __init__.py
│ ├── geometry.py
│ ├── io.py
│ ├── torch_models.py
│ └── utils.py
├── scripts
│ ├── __init__.py
│ ├── camera_registration.py
│ ├── canonical_registration.py
│ ├── compute_diff.py
│ ├── crop.py
│ ├── depth.py
│ ├── download.py
│ ├── extract_dinov2.py
│ ├── extract_frames.py
│ ├── manual_cameras.py
│ ├── tsdf_fusion.py
│ └── write_config.py
└── third_party
│ ├── fusion.py
│ ├── vcnplus
│ ├── compute_flow.py
│ ├── compute_flow.sh
│ ├── flowutils
│ │ ├── __init__.py
│ │ └── flowlib.py
│ ├── frame_filter.py
│ └── models
│ │ ├── VCNplus.py
│ │ ├── __init__.py
│ │ ├── conv4d.py
│ │ ├── det.py
│ │ ├── det_losses.py
│ │ ├── det_utils.py
│ │ ├── feature_extraction.py
│ │ ├── inference.py
│ │ ├── networks
│ │ ├── .gitignore
│ │ ├── DCNv2
│ │ │ ├── .gitignore
│ │ │ ├── DCN
│ │ │ │ ├── __init__.py
│ │ │ │ ├── dcn_v2.py
│ │ │ │ ├── src
│ │ │ │ │ ├── cpu
│ │ │ │ │ │ ├── dcn_v2_cpu.cpp
│ │ │ │ │ │ ├── dcn_v2_im2col_cpu.cpp
│ │ │ │ │ │ ├── dcn_v2_im2col_cpu.h
│ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cpu.cpp
│ │ │ │ │ │ └── vision.h
│ │ │ │ │ ├── cuda
│ │ │ │ │ │ ├── dcn_v2_cuda.cu
│ │ │ │ │ │ ├── dcn_v2_im2col_cuda.cu
│ │ │ │ │ │ ├── dcn_v2_im2col_cuda.h
│ │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu
│ │ │ │ │ │ └── vision.h
│ │ │ │ │ ├── dcn_v2.h
│ │ │ │ │ └── vision.cpp
│ │ │ │ ├── testcpu.py
│ │ │ │ └── testcuda.py
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── make.sh
│ │ │ └── setup.py
│ │ ├── dlav0.py
│ │ ├── large_hourglass.py
│ │ ├── msra_resnet.py
│ │ ├── pose_dla_dcn.py
│ │ └── resnet_dcn.py
│ │ └── submodule.py
│ └── viewpoint
│ ├── __init__.py
│ ├── configs
│ └── cse
│ │ ├── Base-DensePose-RCNN-FPN-Human.yaml
│ │ ├── Base-DensePose-RCNN-FPN.yaml
│ │ ├── densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml
│ │ └── densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml
│ ├── cselib.py
│ └── dp_viewpoint.py
├── scripts
├── create_collage.py
├── download_unzip.sh
├── install-deps.sh
├── render_intermediate.py
├── run_crop_all.py
├── run_preprocess.py
├── run_rendering_parallel.py
├── train.sh
├── zip_dataset.py
└── zip_logdir.py
└── setup.py
/.github/ISSUE_TEMPLATE/bug-issue-report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug/issue report
3 | about: Issues running the code / bug report to help us improve
4 | title: "[Bug/issue]"
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the issue is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Additional context**
27 | Add any other context about the problem here.
28 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/workflows/static.yml:
--------------------------------------------------------------------------------
1 | name: Build Sphinx docs and Deploy to GitHub Pages
2 |
3 | on:
4 | push:
5 | branches: ["main"]
6 | workflow_dispatch:
7 |
8 | permissions:
9 | contents: read
10 | pages: write
11 | id-token: write
12 |
13 | concurrency:
14 | group: "pages"
15 | cancel-in-progress: false
16 |
17 | jobs:
18 | build-deploy:
19 | runs-on: ubuntu-latest
20 | steps:
21 | - name: Checkout
22 | uses: actions/checkout@v3
23 | with:
24 | submodules: recursive # Ensures submodules are checked out
25 |
26 | - name: Update Submodules
27 | run: |
28 | cd ./docs
29 | git submodule update --init --recursive
30 |
31 | - name: Setup Miniconda
32 | uses: conda-incubator/setup-miniconda@v2
33 | with:
34 | python-version: 3.9
35 | mamba-version: "*"
36 | channels: conda-forge,defaults
37 | channel-priority: true
38 | activate-environment: lab4d
39 | environment-file: docs/env_min.yml
40 |
41 | - name: Build Docs
42 | shell: bash -l {0}
43 | run: |
44 | conda activate lab4d
45 | cd ./docs
46 | sphinx-apidoc -o source/api_docs ../lab4d/ -f --templatedir template/
47 | python source/obj2glb.py
48 | make html
49 |
50 | - name: Setup Pages
51 | uses: actions/configure-pages@v3
52 |
53 | - name: Upload artifact
54 | uses: actions/upload-pages-artifact@v1
55 | with:
56 | # Upload the pages
57 | path: './docs/build/html'
58 |
59 | - name: Deploy to GitHub Pages
60 | id: deployment
61 | uses: actions/deploy-pages@v2
62 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | projects
2 | viewer
3 | run.sh
4 | run-long.sh
5 | /database/processed
6 | /database/configs
7 | /database/raw
8 | /logdir
9 | /tmp
10 |
11 | lab4d.egg-info
12 | __pycache__/
13 | *.pth
14 | *.ckpt
15 |
16 | preprocess/third_party/vcnplus/vcn_rob.pth
17 | preprocess/third_party/viewpoint/human.pth
18 | preprocess/third_party/viewpoint/quad.pth
19 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "preprocess/third_party/detectron2"]
2 | path = preprocess/third_party/detectron2
3 | url = https://github.com/facebookresearch/detectron2
4 | ignore = dirty
5 | [submodule "preprocess/third_party/Track-Anything"]
6 | path = preprocess/third_party/Track-Anything
7 | url = https://github.com/gengshan-y/Track-Anything
8 | [submodule "docs/pytorch_sphinx_theme"]
9 | path = docs/pytorch_sphinx_theme
10 | url = https://github.com/gengshan-y/pytorch_sphinx_theme
11 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "files.watcherExclude": {
3 | "**/*.npy": true,
4 | "**/*.jpg": true,
5 | "**/*.mp4": true,
6 | "**/.git/objects/**": true,
7 | "**/.git/subtree-cache/**": true,
8 | "**/node_modules/*/**": true,
9 | "**/*.log": true,
10 | "database/processed/**": true,
11 | "logdir/**": true,
12 | "tmp/**": true,
13 | },
14 | "[python]": {
15 | "editor.defaultFormatter": "ms-python.black-formatter"
16 | },
17 | "python.formatting.provider": "none",
18 | "liveServer.settings.root": "docs/build/"
19 | }
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | title: Lab4d - A framework for in-the-wild 4D reconstruction from monocular videos
3 | message: 'If you use this software, please cite it as below.'
4 | type: software
5 | authors:
6 | - family-names: Yang
7 | given-names: Gengshan
8 | - family-names: Tan
9 | given-names: Jeff
10 | - family-names: Lyons
11 | given-names: Alex
12 | - family-names: Peri
13 | given-names: Neehar
14 | - family-names: Ramanan
15 | given-names: Deva
16 | url: 'https://github.com/lab4d-org/lab4d'
17 | license: MIT
18 | version: 0.0.0
19 | date-released: '2023-06-30'
20 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Gengshan Yang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | # Lab4D
8 | **[[Docs & Tutorials](https://lab4d-org.github.io/lab4d/)]**
9 | **[[Data & Checkpoints](https://lab4d-org.github.io/lab4d/data_models.html)]**
10 |
11 | *This is an alpha release and the APIs are subject to change. Please provide feedback and report bugs via github issues. Thank you for your support.*
12 |
13 | ## About
14 | **Lab4D** is a framework for 4D reconstruction from monocular videos. The software is licensed under the MIT license.
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | ## TODOs
23 | - [ ] web viewer (see [PPR branch](https://github.com/gengshan-y/ppr))
24 | - [ ] evaluation (see [PPR branch](https://github.com/gengshan-y/ppr)) and benchmarks
25 | - [ ] multi-view reconstruction
26 | - [ ] feedforward models (see [DASR](https://github.com/jefftan969/dasr))
27 |
28 | ## Acknowledgement
29 | - Our pre-processing pipeline is built upon the following open-sourced repos:
30 | - Segmentation: [Track-Anything](https://github.com/gaomingqi/Track-Anything), [Grounding-DINO](https://github.com/IDEA-Research/GroundingDINO)
31 | - Feature & correspondence: [DensePose-CSE](https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/projects/DensePose/doc/DENSEPOSE_CSE.md), [DINOv2](https://github.com/facebookresearch/dinov2), [VCNPlus](https://github.com/gengshan-y/rigidmask)
32 | - Depth: [ZoeDepth](https://github.com/isl-org/ZoeDepth)
33 | - Camera: [BANMo-viewpoint](https://github.com/facebookresearch/banmo)
34 | - We use [dqtorch](https://github.com/MightyChaos/dqtorch) for efficient rotation operations
35 | - We thank [@mjlbach](https://github.com/mjlbach), [@alexanderbergman7](https://github.com/alexanderbergman7), and [@terrancewang](https://github.com/terrancewang) for testing and feedback
36 | - We thank [@jasonyzhang](https://github.com/jasonyzhang), [@MightyChaos](https://github.com/MightyChaos), [@JudyYe](https://github.com/JudyYe), and [@andrewsonga](https://github.com/andrewsonga) for feedback
37 |
38 | If you use this project for your research, please consider citing the following papers.
39 |
40 | For building deformable object models, cite:
41 |
42 |
43 | ```
44 | @inproceedings{yang2022banmo,
45 | title={BANMo: Building Animatable 3D Neural Models from Many Casual Videos},
46 | author={Yang, Gengshan and Vo, Minh and Neverova, Natalia and Ramanan, Deva and Vedaldi, Andrea and Joo, Hanbyul},
47 | booktitle = {CVPR},
48 | year={2022}
49 | }
50 | ```
51 |
52 |
53 | For building category body and pose models, cite:
54 |
55 |
56 | ```
57 | @inproceedings{yang2023rac,
58 | title={Reconstructing Animatable Categories from Videos},
59 | author={Yang, Gengshan and Wang, Chaoyang and Reddy, N. Dinesh and Ramanan, Deva},
60 | booktitle = {CVPR},
61 | year={2023}
62 | }
63 | ```
64 |
65 |
66 | For object-scene reconstruction and extreme view synthesis, cite:
67 |
68 |
69 | ```
70 | @article{song2023totalrecon,
71 | title={Total-Recon: Deformable Scene Reconstruction for Embodied View Synthesis},
72 | author={Song, Chonghyuk and Yang, Gengshan and Deng, Kangle and Zhu, Jun-Yan and Ramanan, Deva},
73 | journal={arXiv},
74 | year={2023}
75 | }
76 | ```
77 |
78 |
79 | For training feed-forward video/image shape and pose estimators, cite:
80 |
81 |
82 | ```
83 | @inproceedings{tan2023distilling,
84 | title={Distilling Neural Fields for Real-Time Articulated Shape Reconstruction},
85 | author={Tan, Jeff and Yang, Gengshan and Ramanan, Deva},
86 | booktitle={CVPR},
87 | year={2023}
88 | }
89 | ```
90 |
91 |
92 | For the human-48 dataset cite:
93 |
94 |
95 |
96 | ```
97 | @incollection{vlasic2008articulated,
98 | title={Articulated mesh animation from multi-view silhouettes},
99 | author={Vlasic, Daniel and Baran, Ilya and Matusik, Wojciech and Popovi{\'c}, Jovan},
100 | booktitle={Acm Siggraph 2008 papers},
101 | pages={1--9},
102 | year={2008}
103 | }
104 | @article{xu2018monoperfcap,
105 | title={Monoperfcap: Human performance capture from monocular video},
106 | author={Xu, Weipeng and Chatterjee, Avishek and Zollh{\"o}fer, Michael and Rhodin, Helge and Mehta, Dushyant and Seidel, Hans-Peter and Theobalt, Christian},
107 | journal={ACM Transactions on Graphics (ToG)},
108 | volume={37},
109 | number={2},
110 | pages={1--15},
111 | year={2018},
112 | publisher={ACM New York, NY, USA}
113 | }
114 | @inproceedings{perazzi2016benchmark,
115 | title={A benchmark dataset and evaluation methodology for video object segmentation},
116 | author={Perazzi, Federico and Pont-Tuset, Jordi and McWilliams, Brian and Van Gool, Luc and Gross, Markus and Sorkine-Hornung, Alexander},
117 | booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
118 | pages={724--732},
119 | year={2016}
120 | }
121 | ```
122 |
123 |
--------------------------------------------------------------------------------
/browser/app.py:
--------------------------------------------------------------------------------
1 | # WIP by Gengshan Yang
2 | # python browser/app.py 'database/processed/Annotations/Full-Resolution/cat-85-*/vis.mp4'
3 | # python browser/app.py 'logdir/dog-98-category-comp/renderings_00*/xyz.mp4'
4 | # or python browser/app.py and type in string
5 | from flask import Flask, render_template, request, send_from_directory
6 | import os
7 | import sys
8 | import glob
9 |
10 | app = Flask(__name__)
11 |
12 |
13 | def get_files(path):
14 | matched_files = sorted(glob.glob(path))
15 | return matched_files
16 |
17 |
18 | @app.route("/", methods=["GET", "POST"])
19 | def index():
20 | files = []
21 | if request.method == "POST":
22 | path = request.form.get("path")
23 |
24 | elif len(sys.argv) > 1:
25 | path = sys.argv[1]
26 | else:
27 | path = ""
28 | files = get_files(path)
29 | return render_template("index.html", files=files)
30 |
31 |
32 | @app.route("/logdir/", methods=["GET"])
33 | def get_logdir_file(filename):
34 | return send_from_directory(os.getcwd(), filename)
35 |
36 |
37 | @app.route("/database/", methods=["GET"])
38 | def get_database_file(filename):
39 | return send_from_directory(os.getcwd(), filename)
40 |
41 |
42 | if __name__ == "__main__":
43 | app.run(debug=True)
44 |
--------------------------------------------------------------------------------
/browser/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Data Browser
6 |
73 |
74 |
75 |
76 | Data Browser
77 |
84 | Matched Files
85 |
86 | {% for file in files %}
87 |
88 | {% if file.endswith('.mp4') %}
89 |
90 |
91 |
92 | {% elif file.endswith('.jpg') %}
93 |
94 | {% else %}
95 | {{ file }}
96 | {% endif %}
97 |
98 | {% endfor %}
99 |
100 |
101 |
102 |
137 |
138 |
139 |
140 |
--------------------------------------------------------------------------------
/database/vid_data/ama-bouncing-4v.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/8b1krs9hcyvk0z0/AAAG5wle5F98KERiDHUJilUMa?dl=0
2 |
--------------------------------------------------------------------------------
/database/vid_data/ama-bouncing.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/9a90bd0p6hacqiv/AADFZOFpdsFzpGiPQqvvCsVDa?dl=0
2 |
--------------------------------------------------------------------------------
/database/vid_data/ama-samba-4v.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/4b3dx6id0ncoyhe/AAAnKtpH8wirj0sazkdZCbEMa?dl=0
2 |
--------------------------------------------------------------------------------
/database/vid_data/ama-samba.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/uox4amcyd9g2gm9/AAA8XECVaSjqpgEgwTzIxc5da?dl=0
2 |
--------------------------------------------------------------------------------
/database/vid_data/car-turnaround-2.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/l7klay8bg54ryb8/AACeEmZq4aj6RXYUdY-UaZsua
2 |
--------------------------------------------------------------------------------
/database/vid_data/car-turnaround.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/2saroz7jrnp1cy6/AACGHva9pJAIwQ6k8qgMs5Nma
2 |
--------------------------------------------------------------------------------
/database/vid_data/cat-85.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/38f29ro8aq85enk/AAA5aSgBi4otuPrEiZRm1Ih5a
2 |
--------------------------------------------------------------------------------
/database/vid_data/cat-pikachu-0.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/ls19bz5uo8juzoa/AAB0x4GUeH5PO97sB8Nak9eIa
--------------------------------------------------------------------------------
/database/vid_data/cat-pikachu.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/bcm9150d3sy68ve/AADbQVnHFbBTvsfJLoa9AM9Ba
--------------------------------------------------------------------------------
/database/vid_data/dog-98.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/85567m06uxoni42/AAASa1OnsXM2u8cxEiQSSF_Ia
2 |
--------------------------------------------------------------------------------
/database/vid_data/dog-robolounge.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/66a2f1cfudj6ep3/AAAbJE0mzMMQdLruPnO16r8la
2 |
--------------------------------------------------------------------------------
/database/vid_data/human-48.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/kv4xcntzcwfnmcv/AABqMAvjoTJw4U_8puObKBD9a
2 |
--------------------------------------------------------------------------------
/database/vid_data/human-cap.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/rl351jmtw9v5107/AACYWpO9M453NJr8ACViIeLfa
2 |
--------------------------------------------------------------------------------
/database/vid_data/room.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/hkojyikow9jcd0g/AACA5-U75SQycUYbbx8bDdlUa
2 |
--------------------------------------------------------------------------------
/database/vid_data/shiba-haru.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/u3j1ps2bcyubvs1/AACOrE2DiK-O2l74Q5Y4SlNQa
2 |
--------------------------------------------------------------------------------
/database/vid_data/squirrel-baseball.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/wur870xkv01hv32/AADsB0zeCGWyUy4czQX5jCMCa
2 |
--------------------------------------------------------------------------------
/database/vid_data/squirrel.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/1ktr3qnqwdysyvi/AAAhIRpzWB58KmCJvXu4agd_a
2 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /source/api_docs/lab4d*
3 | /source/_static/meshes/*.glb
4 | /source/_static/media/*
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | To develop locally, start liveserver and forward the port to local browser
2 |
3 | To generate the necessary files:
4 | ```
5 | sphinx-apidoc -o source/api_docs ../lab4d/ -f --templatedir template/
6 | python source/obj2glb.py
7 | ```
8 |
9 | To rebuild webpage:
10 | ```make clean; make html; mv build/html build/lab4d```
--------------------------------------------------------------------------------
/docs/env_min.yml:
--------------------------------------------------------------------------------
1 | name: lab4d
2 | channels:
3 | - pytorch
4 | - conda-forge
5 | dependencies:
6 | - pip
7 | - ninja
8 | - pytorch
9 | - torchvision
10 | - cpuonly
11 | - matplotlib
12 | - absl-py
13 | - tensorboard
14 | - trimesh
15 | - scikit-image
16 | - opencv
17 | - einops
18 | - scikit-learn
19 | - imageio=2.14.1
20 | - imageio-ffmpeg
21 | - pip:
22 | - pysdf
23 | - sphinx==6.2.1
24 | - sphinx-copybutton
25 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/source/_static/images/camera_annot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/images/camera_annot.png
--------------------------------------------------------------------------------
/docs/source/_static/images/visflo-00081.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/images/visflo-00081.jpg
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround-2-anno.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2-anno.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround-2-proxy.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2-proxy.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround-2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround-2_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2_collage.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_bev-120-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_bev-120-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_bev-120.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_bev-120.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_bev.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_bev.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_ref-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_ref.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_ref.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_turntable-120-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_turntable-120-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_turntable-120.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_turntable-120.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_turntable.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_turntable.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-85-80_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-85-80_ref-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-85.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-85.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0-comp_bev-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0-comp_bev-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0-comp_bev.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0-comp_bev.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0-proxy.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0-proxy.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_collage.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_ref-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0_ref.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_ref.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0_turntable-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_turntable-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0_turntable.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_turntable.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-7.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-7.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-7_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-7_collage.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-8_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_ref-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-8_ref.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_ref.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-8_turntable-120-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_turntable-120-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-8_turntable-120.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_turntable-120.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/comp_elev.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/comp_elev.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/dog-98-0_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-98-0_ref-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/dog-98.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-98.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/dog-dualrig-fgbg000-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-dualrig-fgbg000-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/dog-dualrig-fgbg000.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-dualrig-fgbg000.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/dog-robolounge_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-robolounge_collage.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/finch.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/finch.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/finch_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/finch_collage.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/finch_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/finch_ref-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-0_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_ref-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-0_ref.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_ref.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-0_turntable-120-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_turntable-120-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-0_turntable-120.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_turntable-120.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-reanimate-8-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-reanimate-8-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-reanimate-8.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-reanimate-8.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-cap-3-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-cap-3-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-cap-3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-cap-3.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-cap-3_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-cap-3_collage.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/penguin-1-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/penguin-1-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/penguin-2_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/penguin-2_collage.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/penguin.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/penguin.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/shiba-haru-6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-6.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/shiba-haru-7-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-7-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/shiba-haru-7.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-7.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/shiba-haru-7_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-7_collage.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/squirrel-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/squirrel-xyz.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/squirrel.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/squirrel.mp4
--------------------------------------------------------------------------------
/docs/source/_static/media_resized/squirrel_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/squirrel_collage.mp4
--------------------------------------------------------------------------------
/docs/source/api_docs/index.rst:
--------------------------------------------------------------------------------
1 | .. Lab4D documentation master file, created by
2 | sphinx-quickstart on Fri Jun 2 20:54:08 2023.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to Lab4D's documentation!
7 | =================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 |
12 | lab4d.dataloader
13 | lab4d.engine
14 | lab4d.nnutils
15 | lab4d.utils
16 |
17 | .. Indices and tables
18 | .. ==================
19 |
20 | .. * :ref:`genindex`
21 | .. * :ref:`modindex`
22 | .. * :ref:`search`
23 |
--------------------------------------------------------------------------------
/docs/source/api_docs/modules.rst:
--------------------------------------------------------------------------------
1 | lab4d
2 | =====
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | lab4d
8 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # For the full list of built-in configuration values, see the documentation:
4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
5 |
6 | # -- Project information -----------------------------------------------------
7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
8 |
9 | project = "Lab4D"
10 | copyright = "2023, Gengshan Yang, Jeff Tan, Alex Lyons, Neehar Peri, Deva Ramanan, Carnegie Mellon University"
11 | release = "0.0.0"
12 |
13 | # -- General configuration ---------------------------------------------------
14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
15 |
16 | import sys, os
17 |
18 | # Path to lab4d
19 | sys.path.insert(
20 | 0,
21 | "%s/../../" % os.path.join(os.path.dirname(__file__)),
22 | )
23 |
24 | # Allow auto-generated docs from Google format docstrings
25 | extensions = [
26 | "sphinx.ext.autodoc",
27 | "sphinx.ext.napoleon",
28 | "sphinx.ext.intersphinx",
29 | "sphinx_copybutton",
30 | ]
31 |
32 | # other pakcages
33 | intersphinx_mapping = {
34 | "python": ("https://docs.python.org/3", None),
35 | "pytorch": ("https://pytorch.org/docs/stable/", None),
36 | }
37 |
38 | # Allow documentation of multiple return types
39 | napoleon_custom_sections = [("Returns", "params_style")]
40 |
41 | templates_path = ["_templates"]
42 | exclude_patterns = []
43 |
44 | # Mocking the imports of modules that requires cuda
45 | autodoc_mock_imports = ["_quaternion"]
46 |
47 | # -- Options for HTML output -------------------------------------------------
48 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
49 |
50 | html_theme = "pytorch_sphinx_theme"
51 | html_theme_path = ["../pytorch_sphinx_theme"]
52 | html_static_path = ["_static"]
53 |
--------------------------------------------------------------------------------
/docs/source/get_started/index.rst:
--------------------------------------------------------------------------------
1 | Get Started
2 | ===================
3 |
4 | Requirements
5 | -------------------------
6 |
7 | - **Linux** machine with at least 1 GPU (we tested on 3090s)
8 | - **Conda**
9 |
10 | - Follow `this link `_ to install conda.
11 |
12 | - Recommended: use mamba for package management (more efficient than conda). Install mamba with::
13 |
14 | conda install -c conda-forge mamba -y
15 |
16 | - For developers: use `VS Code `_ with Black Formatter.
17 |
18 | Set up the environment
19 | -------------------------
20 |
21 | Clone the repository and create a conda environment with the required packages::
22 |
23 | git clone git@github.com:lab4d-org/lab4d.git --recursive
24 |
25 | cd lab4d
26 |
27 | mamba env create -f environment.yml
28 |
29 | conda activate lab4d
30 |
31 | bash scripts/install-deps.sh
32 |
33 |
34 | Running the Tutorial Code
35 | ---------------------------------------------
36 | See the `Tutorials page `_.
37 |
38 |
39 | .. .. Lab4D documentation master file, created by
40 | .. sphinx-quickstart on Fri Jun 2 20:54:08 2023.
41 | .. You can adapt this file completely to your liking, but it should at least
42 | .. contain the root `toctree` directive.
43 |
44 | .. Welcome to Lab4D's DOCUMENTATION!
45 | .. =================================
46 |
47 | .. .. toctree::
48 | .. :maxdepth: 2
49 |
50 | .. get_started
51 |
52 | .. .. Indices and tables
53 | .. .. ==================
54 |
55 | .. .. * :ref:`genindex`
56 | .. .. * :ref:`modindex`
57 | .. .. * :ref:`search`
58 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. Lab4D documentation master file, created by
2 | sphinx-quickstart on Fri Jun 2 20:54:08 2023.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to Lab4D's documentation!
7 | =================================
8 |
9 | **Lab4D** is a framework for 4D reconstruction from monocular videos.
10 |
11 | Features
12 | -------------------------------
13 | - Representation
14 |
15 | - neural implicit representation
16 |
17 | - deformation fields (neural fields, control-points, skeleton)
18 |
19 | - compositional scene
20 |
21 | - category-level models
22 |
23 | - Interface for priors
24 |
25 | - pixelwise priors: depth, flow, DINOv2 features
26 |
27 | - segmentation: track-anything, video instance segmentation
28 |
29 | - camera viewpoint: viewpoint network, manual annotation
30 |
31 | - Efficiency
32 |
33 | - multi-gpu training
34 |
35 | - dual-quaternion ops
36 |
37 | .. note::
38 |
39 | This is an alpha release and the APIs are subject to change as we continuously improve and refine it.
40 | We encourage users to provide feedback and report bugs via `github issues `_.
41 | Thank you for your support.
42 |
--------------------------------------------------------------------------------
/docs/source/obj2glb.py:
--------------------------------------------------------------------------------
1 | import trimesh
2 | import numpy as np
3 | import glob
4 | import os
5 |
6 | rootdir = os.path.dirname(__file__)
7 |
8 | for path in glob.glob("%s/_static/meshes/*.obj" % rootdir):
9 | print(path)
10 | m = trimesh.load(path, process=False)
11 | # cv coordinate to gl coordinate
12 | m.vertices = np.stack(
13 | [m.vertices[:, 0], -m.vertices[:, 1], -m.vertices[:, 2]], axis=1
14 | )
15 | m.export(path.replace(".obj", ".glb"))
16 |
--------------------------------------------------------------------------------
/docs/source/qa.rst:
--------------------------------------------------------------------------------
1 | Q&A
2 | ===========================
3 |
4 | Installation
5 | ---------------------------
6 | - Conda/mamba is not able to resolve conflicts when installing packages.
7 |
8 | - Possible cause: The base conda environment is not clean. See the discussion `in this thread `_.
9 |
10 | - Fix: Remove packages of the base environment that causes the conflict.
11 |
12 | Data pre-processing
13 | ---------------------------
14 | - My gradio app got stuck at the loading screen.
15 |
16 | - Potential fix: kill the running vscode processes, and re-run the preprocessing code.
17 |
18 | Model training
19 | ---------------------------
20 |
21 | - How to change hyperparameters when using more videos (or video frames)?
22 |
23 | - You want to increase `pixels_per_image`, `imgs_per_gpu` and use more gpus.
24 | The number of sampled rays / pixels per minibatch is computed as the number of gpus x imgs_per_gpu x pixels_per_image.
25 | Also see the note `here `__.
26 |
27 | - Training on >50 videos might cause the following os error::
28 |
29 | [Errno 24] Too many open files
30 |
31 | - To check the current file limit, run::
32 |
33 | ulimit -S -n
34 |
35 | To increate open file limit to 4096, run::
36 |
37 | ulimit -u -n 4096
38 |
39 | - Multi-GPU training hangs but single-GPU training works fine.
40 |
41 | - Run training script with `NCCL_P2P_DISABLE=1 bash scripts/train.sh ...` to disable direct GPU-to-GPU (P2P) communication. See discussion `here `__.
42 |
--------------------------------------------------------------------------------
/docs/source/resize_vids.py:
--------------------------------------------------------------------------------
1 | # python source/resize_vids.py
2 | import os
3 | import numpy as np
4 | import imageio
5 | from PIL import Image
6 |
7 | src_dir = "source/_static/media"
8 | dst_dir = "source/_static/media_resized/"
9 | max_dim = 640 * 640
10 | video_exts = [".mp4", ".avi", ".mov", ".flv", ".mkv", ".wmv"]
11 |
12 | # check for destination directory and create if it doesn't exist
13 | if not os.path.exists(dst_dir):
14 | os.makedirs(dst_dir)
15 |
16 | # iterate over video files in source directory
17 | for filename in os.listdir(src_dir):
18 | # check if file is a video, ignoring the case of the extension
19 | if any(filename.lower().endswith(ext) for ext in video_exts):
20 | # add other conditions if there are other video formats
21 | src_filepath = os.path.join(src_dir, filename)
22 | dst_filepath = os.path.splitext(filename)[0] + ".mp4"
23 | dst_filepath = os.path.join(dst_dir, dst_filepath)
24 |
25 | reader = imageio.get_reader(src_filepath)
26 | fps = reader.get_meta_data()["fps"]
27 |
28 | # obtain video dimensions
29 | first_frame = reader.get_data(0)
30 | orig_height, orig_width = first_frame.shape[:2]
31 |
32 | # check if resolution is greater than 640x640
33 | if orig_height * orig_width > max_dim:
34 | print("Resizing video: " + filename)
35 | # resize maintaining aspect ratio
36 | ratio = np.sqrt(max_dim / (orig_height * orig_width))
37 | new_width = int(orig_width * ratio)
38 | new_height = int(orig_height * ratio)
39 |
40 | writer = imageio.get_writer(dst_filepath, fps=fps)
41 |
42 | # iterate over frames in the video
43 | for i, frame in enumerate(reader):
44 | frame = Image.fromarray(frame)
45 | frame = frame.resize((new_width, new_height), Image.ANTIALIAS)
46 | writer.append_data(np.array(frame))
47 |
48 | writer.close()
49 | else:
50 | # copy video to destination directory
51 | print("Copying video: " + filename)
52 | os.system("cp " + src_filepath + " " + dst_filepath)
53 |
54 | print("Video resizing is complete!")
55 |
--------------------------------------------------------------------------------
/docs/source/tutorials/index.rst:
--------------------------------------------------------------------------------
1 | .. Lab4D documentation master file, created by
2 | sphinx-quickstart on Fri Jun 2 20:54:08 2023.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Tutorials
7 | =================================
8 |
9 | Overview
10 | ---------------------------------
11 | Inferring 4D representations given 2D observations is challenging due to its under-constrained nature.
12 | With recent advances in differentiable rendering, visual correspondence and segmentation, we built an optimization framework that
13 | reconstructs dense 4D structures with test-time optimization, by minimizing the different between the rendered 2D images and the input observations.
14 |
15 | The tutorials introduce a complete workflow of Lab4D. We'll use the method and dataset from the following papers:
16 |
17 | - `BANMo: Building Animatable 3D Neural Models from Many Casual Videos `_, CVPR 2022.
18 | - `RAC: Reconstructing Animatable Categories from Videos `_, CVPR 2023.
19 | - `Total-Recon: Deformable Scene Reconstruction for Embodied View Synthesis `_, ICCV 2023.
20 |
21 | `The tutorials assumes a basic familiarity with Python and Differentiable Rendering concepts.`
22 |
23 | Each of the tutorial can be executed in a couple of ways:
24 |
25 | - **Custom videos**: This option allows you to train a model on your own videos.
26 | - **Preprocessed data**: This option skips the preprocessing step and train models on the `preprocessed data `_ we provided.
27 | - **Render-only**: This option skips model training and allows you to render the `pre-trained model weights `_ we provided.
28 |
29 |
30 | Content
31 | ---------------------------------
32 | .. toctree::
33 | :maxdepth: 1
34 |
35 | arbitrary_video
36 | single_video_cat
37 | multi_video_cat
38 | category_model
39 | preprocessing
40 |
41 | .. Indices and tables
42 | .. ==================
43 |
44 | .. * :ref:`genindex`
45 | .. * :ref:`modindex`
46 | .. * :ref:`search`
47 |
--------------------------------------------------------------------------------
/docs/source/tutorials/preprocessing.rst:
--------------------------------------------------------------------------------
1 | 5. Pre-process custom videos
2 | ========================================
3 |
4 | In this tutorial, we show how to preprocess custom videos that can be later used for training. We provide some
5 | `raw videos `_ for you to try out.
6 | The download links are provided as `database/vid_data/$seqname`, where `$seqname`` is the name of the sequence.
7 |
8 | Taking `cat-pikachu-0` in the `second tutorial `_ for example,
9 | run the following to download and process the data::
10 |
11 | # Args: sequence name, text prompt (segmentation), category from {human, quad, other} (camera viewpoint), gpu id
12 | python scripts/run_preprocess.py cat-pikachu-0 cat quad "0"
13 |
14 | .. note::
15 | To preprocess other videos, create a folder named `database/raw/$seqname`, move the videos into it, and run the above command.
16 |
17 | `Next, we will get into the details of processing.`
18 |
19 | Frame filtering
20 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
21 | By default, we first remove near-static frames (i.e., frames without motion or with small motion) since they do not provide useful extra signal for reconstruction.
22 | To do so, we run optical flow over consecutive frames and skip a frame if the median flow magnitude is smaller than a threshold.
23 |
24 | .. note::
25 | There is a flag in `scripts/run_preprocess.py`` that you can set to False to turn on/off frame filtering.
26 |
27 | Segmentation
28 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
29 |
30 | We provide a web GUI and a command line interface for object segmentation.
31 |
32 | **Interactive segmentation**: `Track-Anything `_ will be used given text prompt "other", e.g.,::
33 |
34 | python scripts/run_preprocess.py cat-pikachu-0 other quad "0"
35 |
36 | It creates a web interfaces and asks the user to specify point prompts on the object of interest.
37 |
38 |
39 | **Automatic segmentation**: `Grounding-DINO `_ will be used to determin which object to track
40 | in the first frame given a valid text prompt e.g., ::
41 |
42 | python scripts/run_preprocess.py cat-pikachu-0 cat quad "0"
43 |
44 |
45 | .. note::
46 |
47 | There is a flag in `scripts/run_preprocess.py`` that switches the segmentation method.
48 |
49 |
50 | Object-to-camera transformations
51 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
52 |
53 | For human and quadruped animals, we use a viewpoint network (presented in BANMo) to estimate the camera viewpoint / rotation with regard to a canonical 3D coordinate.
54 |
55 | For other categories, user will be asked to annotate camera viewpoints (by aligning the orientation of a reference 3D model to the input image) for a few frames as shown below.
56 |
57 | .. raw:: html
58 |
59 |
60 |
61 |
62 |
63 | .. note::
64 |
65 | To align the 3D model with the provided image, utilize the sidebar to specify the camera's roll, elevation, and azimuth angles. After adjusting each frame, ensure you click 'save.' Once you've completed adjustments for all the videos, click 'exit.'
66 | We suggest making an annotation every time the object turns 90 degrees, such as when it changes from a front-facing position to facing left.
67 |
68 | In the `scripts/run_preprocess.py` file, there's a flag that allows you to change the method used for camera estimation."
69 |
70 | After getting the sparse annotations, we run camera registration that propogates the rotation annotations using optical flow and monocular depth.
71 | Camera translations are approximated with 2D object center and size (from segmentation) assuming a orthographic camera model.
72 |
73 |
74 | Parallelizing the pre-processing
75 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
76 |
77 | Preprocessing 10 videos takes about 90 minutes on a single device. To speed up the pre-processing,
78 | we can parallelize tasks over multiple gpus with the following::
79 |
80 | # Args: sequence name, text prompt for segmentation, category from {human, quad, other} for camera viewpoint, gpu id
81 | python scripts/run_preprocess.py cat-pikachu animal quad "0,1,2,3"
82 |
83 |
84 | Visit other `tutorials `_.
--------------------------------------------------------------------------------
/docs/template/module.rst_t:
--------------------------------------------------------------------------------
1 | {%- if show_headings %}
2 | {{- [basename, "module"] | join(' ') | e | heading }}
3 |
4 | {% endif -%}
5 | .. automodule:: {{ qualname }}
6 | {%- for option in automodule_options %}
7 | :{{ option }}:
8 | {%- endfor %}
9 |
10 |
--------------------------------------------------------------------------------
/docs/template/package.rst_t:
--------------------------------------------------------------------------------
1 | {%- macro automodule(modname, options) -%}
2 | .. automodule:: {{ modname }}
3 | {%- for option in options %}
4 | :{{ option }}:
5 | {%- endfor %}
6 | {%- endmacro %}
7 |
8 | {%- macro toctree(docnames) -%}
9 | .. toctree::
10 | :maxdepth: {{ maxdepth }}
11 | {% for docname in docnames %}
12 | {{ docname }}
13 | {%- endfor %}
14 | {%- endmacro %}
15 |
16 | {%- if is_namespace %}
17 | {{- [pkgname, "namespace"] | join(" ") | e | heading }}
18 | {% else %}
19 | {{- [pkgname, "package"] | join(" ") | e | heading }}
20 | {% endif %}
21 |
22 | {%- if is_namespace %}
23 | .. py:module:: {{ pkgname }}
24 | {% endif %}
25 |
26 | {%- if modulefirst and not is_namespace %}
27 | {{ automodule(pkgname, automodule_options) }}
28 | {% endif %}
29 |
30 | {%- if subpackages %}
31 | Subpackages
32 | -----------
33 |
34 | {{ toctree(subpackages) }}
35 | {% endif %}
36 |
37 | {%- if submodules %}
38 | {% if separatemodules %}
39 | {{ toctree(submodules) }}
40 | {% else %}
41 | {%- for submodule in submodules %}
42 | {% if show_headings %}
43 | {{- submodule | e | heading(2) }}
44 | {% endif %}
45 | {{ automodule(submodule, automodule_options) }}
46 | {% endfor %}
47 | {%- endif %}
48 | {%- endif %}
--------------------------------------------------------------------------------
/docs/template/toc.rst_t:
--------------------------------------------------------------------------------
1 | {{ header | heading }}
2 |
3 | .. toctree::
4 | :maxdepth: {{ maxdepth }}
5 | {% for docname in docnames %}
6 | {{ docname }}
7 | {%- endfor %}
8 |
9 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: lab4d
2 | channels:
3 | - pytorch
4 | - nvidia
5 | - conda-forge
6 | dependencies:
7 | - python=3.9
8 | - setuptools=66.0.0
9 | - pip
10 | - pytorch==2.0.0=py3.9_cuda11.7_cudnn8.5.0_0
11 | - torchvision
12 | - cudatoolkit-dev=11.7
13 | - gcc_linux-64=10
14 | - gxx_linux-64=10
15 | - matplotlib
16 | - ninja
17 | - absl-py
18 | - tensorboard
19 | - trimesh
20 | - scikit-image
21 | - opencv
22 | - einops
23 | - numba
24 | - gdown
25 | - scikit-learn
26 | - psutil
27 | - av
28 | - plotly
29 | - imageio
30 | - imageio-ffmpeg
31 | - tqdm
32 | - pip:
33 | - pysdf
34 | - gradio==3.49.0
35 | - timm==0.6.7
36 | - detectron2 @ git+https://github.com/facebookresearch/detectron2.git@e9f7e2b
37 | - segment_anything @ git+https://github.com/facebookresearch/segment-anything.git
38 | - groundingdino @ git+https://github.com/IDEA-Research/GroundingDINO.git
39 | - openmim
40 | - pyrender
41 |
--------------------------------------------------------------------------------
/lab4d/__init__.py:
--------------------------------------------------------------------------------
1 | # Decorate all modules with @record_function and @record_class
2 | import lab4d.dataloader.data_utils
3 | import lab4d.dataloader.vidloader
4 | import lab4d.engine.model
5 | import lab4d.engine.train_utils
6 | import lab4d.engine.trainer
7 | import lab4d.nnutils.appearance
8 | import lab4d.nnutils.base
9 | import lab4d.nnutils.deformable
10 | import lab4d.nnutils.embedding
11 | import lab4d.nnutils.feature
12 | import lab4d.nnutils.intrinsics
13 | import lab4d.nnutils.multifields
14 | import lab4d.nnutils.nerf
15 | import lab4d.nnutils.pose
16 | import lab4d.nnutils.skinning
17 | import lab4d.nnutils.time
18 | import lab4d.nnutils.visibility
19 | import lab4d.nnutils.warping
20 | import lab4d.utils.cam_utils
21 | import lab4d.utils.camera_utils
22 | import lab4d.utils.geom_utils
23 | import lab4d.utils.io
24 | import lab4d.utils.loss_utils
25 | import lab4d.utils.numpy_utils
26 | import lab4d.utils.quat_transform
27 | import lab4d.utils.render_utils
28 | import lab4d.utils.skel_utils
29 | import lab4d.utils.torch_utils
30 | import lab4d.utils.transforms
31 | import lab4d.utils.vis_utils
32 | from lab4d.utils.profile_utils import decorate_module
33 |
34 | decorate_module(lab4d.dataloader.data_utils)
35 | decorate_module(lab4d.dataloader.vidloader)
36 | decorate_module(lab4d.engine.model)
37 | decorate_module(lab4d.engine.trainer)
38 | decorate_module(lab4d.engine.train_utils)
39 | decorate_module(lab4d.nnutils.appearance)
40 | decorate_module(lab4d.nnutils.base)
41 | decorate_module(lab4d.nnutils.deformable)
42 | decorate_module(lab4d.nnutils.embedding)
43 | decorate_module(lab4d.nnutils.feature)
44 | decorate_module(lab4d.nnutils.intrinsics)
45 | decorate_module(lab4d.nnutils.multifields)
46 | decorate_module(lab4d.nnutils.nerf)
47 | decorate_module(lab4d.nnutils.pose)
48 | decorate_module(lab4d.nnutils.skinning)
49 | decorate_module(lab4d.nnutils.time)
50 | decorate_module(lab4d.nnutils.visibility)
51 | decorate_module(lab4d.nnutils.warping)
52 | decorate_module(lab4d.utils.camera_utils)
53 | decorate_module(lab4d.utils.cam_utils)
54 | decorate_module(lab4d.utils.geom_utils)
55 | decorate_module(lab4d.utils.io)
56 | decorate_module(lab4d.utils.loss_utils)
57 | decorate_module(lab4d.utils.numpy_utils)
58 | decorate_module(lab4d.utils.quat_transform)
59 | decorate_module(lab4d.utils.render_utils)
60 | decorate_module(lab4d.utils.skel_utils)
61 | decorate_module(lab4d.utils.torch_utils)
62 | decorate_module(lab4d.utils.transforms)
63 | decorate_module(lab4d.utils.vis_utils)
64 |
--------------------------------------------------------------------------------
/lab4d/config.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import os
3 |
4 | from absl import flags
5 |
6 | opts = flags.FLAGS
7 |
8 |
9 | class TrainModelConfig:
10 | # weights of reconstruction terms
11 | flags.DEFINE_float("mask_wt", 0.1, "weight for silhouette loss")
12 | flags.DEFINE_float("rgb_wt", 0.1, "weight for color loss")
13 | flags.DEFINE_float("depth_wt", 1e-4, "weight for depth loss")
14 | flags.DEFINE_float("flow_wt", 0.5, "weight for flow loss")
15 | flags.DEFINE_float("vis_wt", 1e-2, "weight for visibility loss")
16 | flags.DEFINE_float("feature_wt", 1e-2, "weight for feature reconstruction loss")
17 | flags.DEFINE_float("feat_reproj_wt", 5e-2, "weight for feature reprojection loss")
18 |
19 | # weights of regularization terms
20 | flags.DEFINE_float(
21 | "reg_visibility_wt", 1e-4, "weight for visibility regularization"
22 | )
23 | flags.DEFINE_float("reg_eikonal_wt", 1e-3, "weight for eikonal regularization")
24 | flags.DEFINE_float(
25 | "reg_deform_cyc_wt", 0.01, "weight for deform cyc regularization"
26 | )
27 | flags.DEFINE_float("reg_delta_skin_wt", 5e-3, "weight for delta skinning reg")
28 | flags.DEFINE_float("reg_skin_entropy_wt", 5e-4, "weight for delta skinning reg")
29 | flags.DEFINE_float(
30 | "reg_gauss_skin_wt", 1e-3, "weight for gauss skinning consistency"
31 | )
32 | flags.DEFINE_float("reg_cam_prior_wt", 0.1, "weight for camera regularization")
33 | flags.DEFINE_float("reg_skel_prior_wt", 0.1, "weight for skeleton regularization")
34 | flags.DEFINE_float(
35 | "reg_gauss_mask_wt", 0.01, "weight for gauss mask regularization"
36 | )
37 | flags.DEFINE_float("reg_soft_deform_wt", 100.0, "weight for soft deformation reg")
38 |
39 | # model
40 | flags.DEFINE_string("field_type", "fg", "{bg, fg, comp}")
41 | flags.DEFINE_string(
42 | "fg_motion", "rigid", "{rigid, dense, bob, skel-human, skel-quad}"
43 | )
44 | flags.DEFINE_bool("single_inst", True, "assume the same morphology over objs")
45 |
46 |
47 | class TrainOptConfig:
48 | # io-related
49 | flags.DEFINE_string("seqname", "cat", "name of the sequence")
50 | flags.DEFINE_string("logname", "tmp", "name of the saved log")
51 | flags.DEFINE_string(
52 | "data_prefix", "crop", "prefix of the data entries, {crop, full}"
53 | )
54 | flags.DEFINE_integer("train_res", 256, "size of training images")
55 | flags.DEFINE_string("logroot", "logdir/", "root directory for log files")
56 | flags.DEFINE_string("load_suffix", "", "sufix of params, {latest, 0, 10, ...}")
57 | flags.DEFINE_string("feature_type", "dinov2", "{dinov2, cse}")
58 | flags.DEFINE_string("load_path", "", "path to load pretrained model")
59 |
60 | # accuracy-related
61 | flags.DEFINE_float("learning_rate", 5e-4, "learning rate")
62 | flags.DEFINE_integer("num_rounds", 20, "number of rounds to train")
63 | flags.DEFINE_integer("iters_per_round", 200, "number of iterations per round")
64 | flags.DEFINE_integer("imgs_per_gpu", 128, "images samples per iter, per gpu")
65 | flags.DEFINE_integer("pixels_per_image", 16, "pixel samples per image")
66 | # flags.DEFINE_integer("imgs_per_gpu", 1, "size of minibatches per iter")
67 | # flags.DEFINE_integer("pixels_per_image", 4096, "number of pixel samples per image")
68 | flags.DEFINE_boolean(
69 | "freeze_bone_len", False, "do not change bone length of skeleton"
70 | )
71 | flags.DEFINE_boolean(
72 | "reset_steps",
73 | True,
74 | "reset steps of loss scheduling, set to False if resuming training",
75 | )
76 |
77 | # efficiency-related
78 | flags.DEFINE_integer("ngpu", 1, "number of gpus to use")
79 | flags.DEFINE_integer("num_workers", 2, "Number of workers for dataloading")
80 | flags.DEFINE_integer("eval_res", 64, "size used for eval visualizations")
81 | flags.DEFINE_integer("save_freq", 10, "params saving frequency")
82 | flags.DEFINE_boolean("profile", False, "profile the training loop")
83 |
84 |
85 | def get_config():
86 | return opts.flag_values_dict()
87 |
88 |
89 | def save_config():
90 | save_dir = os.path.join(opts.logroot, "%s-%s" % (opts.seqname, opts.logname))
91 | os.makedirs(save_dir, exist_ok=True)
92 | opts_path = os.path.join(save_dir, "opts.log")
93 | if os.path.exists(opts_path):
94 | os.remove(opts_path)
95 | opts.append_flags_into_file(opts_path)
96 |
--------------------------------------------------------------------------------
/lab4d/config_omega.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | from omegaconf import DictConfig, OmegaConf
3 |
4 | # Define the hierarchical configuration using a dictionary
5 | config = DictConfig(
6 | {
7 | "train": {
8 | "weights": {
9 | "recon": {
10 | "mask_wt": 0.1, # weight for silhouette loss
11 | "rgb_wt": 0.1, # weight for color loss
12 | "depth_wt": 0.01, # weight for depth loss
13 | "flow_wt": 0.5, # weight for flow loss
14 | "vis_wt": 0.01, # weight for visibility loss
15 | "feature_wt": 0.01, # weight for feature reconstruction loss
16 | "feat_reproj_wt": 0.05, # weight for feature reprojection loss
17 | },
18 | "reg": {
19 | "visibility_wt": 1e-3, # weight for visibility regularization
20 | "eikonal_wt": 1e-5, # weight for eikonal regularization
21 | "deform_cyc_wt": 0.01, # weight for deform cyc regularization
22 | "gauss_skin_wt": 1, # weight for gauss skinning consistency
23 | },
24 | },
25 | "model": {
26 | "field_type": "bg", # {bg, fg, comp}
27 | "fg_motion": "rigid", # {rigid, dense, bob, skel}
28 | "single_inst": True, # assume the same morphology over objs
29 | },
30 | "io": {
31 | "seqname": "cat", # name of the sequence
32 | "logname": "tmp", # name of the saved log
33 | "data_prefix": "full", # prefix of the data entries
34 | "train_res": 256, # size of training images
35 | "logroot": "logdir/", # root directory for log files
36 | "load_suffix": "", # sufix of params, {latest, 0, 10, ...}
37 | "save_freq": 10, # params saving frequency
38 | },
39 | "optim": {
40 | "learning_rate": 5e-4, # learning rate
41 | "num_rounds": 20, # number of rounds to trainn
42 | "iters_per_round": 200, # number of iterations per round
43 | "imgs_per_gpu": 128, # images samples per iter, per gpu
44 | "pixels_per_image": 16, # pixel samples per image
45 | "ngpu": 1, # number of gpus to use
46 | "num_workers": 2, # number of workers for dataloading
47 | },
48 | "eval_res": 64, # size used for eval visualizations
49 | "profile": False, # profile the training loop
50 | },
51 | }
52 | )
53 |
54 |
55 | def get_config():
56 | return opts.flag_values_dict()
57 |
58 |
59 | def save_config():
60 | save_dir = os.path.join(opts.logroot, opts.logname)
61 | os.makedirs(save_dir, exist_ok=True)
62 | opts_path = os.path.join(save_dir, "opts.log")
63 | if os.path.exists(opts_path):
64 | os.remove(opts_path)
65 | opts.append_flags_into_file(opts_path)
66 |
67 |
68 | # # Convert the configuration to a dictionary
69 | # config_dict = OmegaConf.to_container(config)
70 |
71 | # # Convert the dictionary back to a configuration
72 | # config2 = OmegaConf.create(config_dict)
73 |
--------------------------------------------------------------------------------
/lab4d/dataloader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/dataloader/__init__.py
--------------------------------------------------------------------------------
/lab4d/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/engine/__init__.py
--------------------------------------------------------------------------------
/lab4d/engine/train_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import os
3 |
4 | import torch
5 |
6 |
7 | def get_local_rank():
8 | try:
9 | return int(os.environ["LOCAL_RANK"])
10 | except:
11 | print("LOCAL_RANK not found, set to 0")
12 | return 0
13 |
14 |
15 | class DataParallelPassthrough(torch.nn.parallel.DistributedDataParallel):
16 | """For multi-GPU access, forward attributes to the inner module."""
17 |
18 | def __getattr__(self, name):
19 | try:
20 | return super().__getattr__(name)
21 | except AttributeError:
22 | return getattr(self.module, name)
23 |
24 | def __delattr__(self, name):
25 | try:
26 | return super().__delattr__(name)
27 | except AttributeError:
28 | return delattr(self.module, name)
29 |
--------------------------------------------------------------------------------
/lab4d/nnutils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/nnutils/__init__.py
--------------------------------------------------------------------------------
/lab4d/nnutils/appearance.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import torch
3 | import torch.nn as nn
4 |
5 | from lab4d.nnutils.time import TimeMLP
6 |
7 |
8 | class AppearanceEmbedding(TimeMLP):
9 | """Encode global appearance code over time with an MLP
10 |
11 | Args:
12 | frame_info (Dict): Metadata about the frames in a dataset
13 | appr_channels (int): Number of channels in appearance codes
14 | D (int): Number of linear layers
15 | W (int): Number of hidden units in each MLP layer
16 | num_freq_t (int): Number of frequencies in the time embedding
17 | skips (List(int)): List of layers to add skip connections at
18 | activation (Function): Activation function to use (e.g. nn.ReLU())
19 | """
20 |
21 | def __init__(
22 | self,
23 | frame_info,
24 | appr_channels,
25 | D=2,
26 | W=64,
27 | num_freq_t=6,
28 | skips=[],
29 | activation=nn.ReLU(True),
30 | time_scale=0.1,
31 | ):
32 | self.appr_channels = appr_channels
33 | # xyz encoding layers
34 | super().__init__(
35 | frame_info,
36 | D=D,
37 | W=W,
38 | num_freq_t=num_freq_t,
39 | skips=skips,
40 | activation=activation,
41 | time_scale=time_scale,
42 | )
43 |
44 | # output layers
45 | self.output = nn.Linear(W, appr_channels)
46 |
47 | def forward(self, t_embed):
48 | """
49 | Args:
50 | t: (..., self.W) Input time embeddings
51 | Returns:
52 | out: (..., appr_channels) Output appearance codes
53 | """
54 | t_feat = super().forward(t_embed)
55 | out = self.output(t_feat)
56 | return out
57 |
--------------------------------------------------------------------------------
/lab4d/nnutils/intrinsics.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 |
6 | from lab4d.nnutils.time import TimeMLP
7 |
8 |
9 | class IntrinsicsMLP(TimeMLP):
10 | """Encode camera intrinsics over time with an MLP
11 |
12 | Args:
13 | intrinsics: (N,4) Camera intrinsics (fx, fy, cx, cy)
14 | frame_info (Dict): Metadata about the frames in a dataset
15 | D (int): Number of linear layers
16 | W (int): Number of hidden units in each MLP layer
17 | num_freq_t (int): Number of frequencies in the time embedding
18 | skips (List(int)): List of layers to add skip connections at
19 | activation (Function): Activation function to use (e.g. nn.ReLU())
20 | time_scale (float): Control the sensitivity to time by scaling.
21 | Lower values make the module less sensitive to time.
22 | """
23 |
24 | def __init__(
25 | self,
26 | intrinsics,
27 | frame_info=None,
28 | D=5,
29 | W=256,
30 | num_freq_t=0,
31 | skips=[],
32 | activation=nn.ReLU(True),
33 | time_scale=0.1,
34 | ):
35 | if frame_info is None:
36 | num_frames = len(intrinsics)
37 | frame_info = {
38 | "frame_offset": np.asarray([0, num_frames]),
39 | "frame_mapping": list(range(num_frames)),
40 | "frame_offset_raw": np.asarray([0, num_frames]),
41 | }
42 | # xyz encoding layers
43 | super().__init__(
44 | frame_info,
45 | D=D,
46 | W=W,
47 | num_freq_t=num_freq_t,
48 | skips=skips,
49 | activation=activation,
50 | time_scale=time_scale,
51 | )
52 |
53 | # output layers
54 | self.focal = nn.Sequential(
55 | nn.Linear(W, W // 2),
56 | activation,
57 | nn.Linear(W // 2, 2),
58 | )
59 |
60 | # camera intrinsics: fx,fy,px,py
61 | self.base_logfocal = nn.Parameter(torch.zeros(self.time_embedding.num_vids, 2))
62 | self.base_ppoint = nn.Parameter(torch.zeros(self.time_embedding.num_vids, 2))
63 | self.register_buffer(
64 | "init_vals", torch.tensor(intrinsics, dtype=torch.float32), persistent=False
65 | )
66 |
67 | def mlp_init(self):
68 | """Initialize camera intrinsics from external values"""
69 | intrinsics = self.init_vals
70 | frame_offset = self.get_frame_offset()
71 | self.base_logfocal.data = intrinsics[frame_offset[:-1], :2].log()
72 | self.base_ppoint.data = intrinsics[frame_offset[:-1], 2:]
73 | super().mlp_init(termination_loss=1.0)
74 |
75 | def forward(self, t_embed):
76 | """
77 | Args:
78 | t_embed: (..., self.W) Input Fourier time embeddings
79 | Returns:
80 | out: (..., 4) Camera intrinsics
81 | """
82 | t_feat = super().forward(t_embed)
83 | focal = self.focal(t_feat).exp()
84 | return focal
85 |
86 | def get_vals(self, frame_id=None):
87 | """Compute camera intrinsics at the given frames.
88 |
89 | Args:
90 | frame_id: (...,) Frame id. If None, compute at all frames
91 | Returns:
92 | intrinsics: (..., 4) Output camera intrinsics
93 | """
94 | t_embed = self.time_embedding(frame_id)
95 | focal = self.forward(t_embed)
96 | if frame_id is None:
97 | inst_id = self.time_embedding.frame_to_vid
98 | else:
99 | inst_id = self.time_embedding.raw_fid_to_vid[frame_id]
100 | base_focal = self.base_logfocal[inst_id].exp()
101 | base_ppoint = self.base_ppoint[inst_id]
102 | focal = focal * base_focal
103 | # force square pixels
104 | focal[..., :] = (focal + focal.flip(-1)) / 2
105 | ppoint = base_ppoint.expand_as(focal)
106 | intrinsics = torch.cat([focal, ppoint], dim=-1)
107 | return intrinsics
108 |
--------------------------------------------------------------------------------
/lab4d/nnutils/time.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 | from lab4d.nnutils.base import BaseMLP
8 | from lab4d.nnutils.embedding import PosEmbedding, TimeEmbedding, get_fourier_embed_dim
9 |
10 |
11 | class TimeMLP(BaseMLP):
12 | """MLP that encodes a quantity over time.
13 |
14 | Args:
15 | frame_info (Dict): Metadata about the frames in a dataset
16 | D (int): Number of linear layers
17 | W (int): Number of hidden units in each MLP layer
18 | num_freq_t (int): Number of frequencies in the time embedding
19 | skips (List(int)): List of layers to add skip connections at
20 | activation (Function): Activation function to use (e.g. nn.ReLU())
21 | time_scale (float): Control the sensitivity to time by scaling.
22 | Lower values make the module less sensitive to time.
23 | """
24 |
25 | def __init__(
26 | self,
27 | frame_info,
28 | D=5,
29 | W=256,
30 | num_freq_t=6,
31 | skips=[],
32 | activation=nn.ReLU(True),
33 | time_scale=1.0,
34 | ):
35 | frame_offset = frame_info["frame_offset"]
36 | # frame_offset_raw = frame_info["frame_offset_raw"]
37 | if num_freq_t > 0:
38 | max_ts = (frame_offset[1:] - frame_offset[:-1]).max()
39 | # scale according to input frequency: num_frames = 64 -> freq = 6
40 | num_freq_t = np.log2(max_ts / 64) + num_freq_t
41 | # # scale according to input frequency: num_frames = 512 -> freq = 6
42 | # num_freq_t = np.log2(max_ts / 512) + num_freq_t
43 | num_freq_t = int(np.rint(num_freq_t))
44 | # print("max video len: %d, override num_freq_t to %d" % (max_ts, num_freq_t))
45 |
46 | super().__init__(
47 | D=D,
48 | W=W,
49 | in_channels=W,
50 | out_channels=W,
51 | skips=skips,
52 | activation=activation,
53 | final_act=True,
54 | )
55 |
56 | self.time_embedding = TimeEmbedding(
57 | num_freq_t, frame_info, out_channels=W, time_scale=time_scale
58 | )
59 |
60 | def loss_fn(y):
61 | x = self.get_vals()
62 | return F.mse_loss(x, y)
63 |
64 | self.loss_fn = loss_fn
65 |
66 | def forward(self, t_embed):
67 | """
68 | Args:
69 | t_embed: (..., self.W) Time Fourier embeddings
70 | Returns:
71 | out: (..., self.W) Time-dependent features
72 | """
73 | t_feat = super().forward(t_embed)
74 | return t_feat
75 |
76 | def mlp_init(self, loss_fn=None, termination_loss=0.0001):
77 | """Initialize the time embedding MLP to match external priors.
78 | `self.init_vals` is defined by the child class, and could be
79 | (nframes, 4, 4) camera poses or (nframes, 4) camera intrinsics
80 | """
81 | if loss_fn is None:
82 | loss_fn = self.loss_fn
83 |
84 | optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
85 |
86 | i = 0
87 | while True:
88 | optimizer.zero_grad()
89 | loss = loss_fn(self.init_vals)
90 | loss.backward()
91 | optimizer.step()
92 | if i % 100 == 0:
93 | print(f"iter: {i}, loss: {loss.item():.4f}")
94 | i += 1
95 | if loss < termination_loss:
96 | break
97 |
98 | def compute_distance_to_prior(self):
99 | """Compute L2-distance from current SE(3) / intrinsics values to
100 | external priors.
101 |
102 | Returns:
103 | loss (0,): Mean squared error to priors
104 | """
105 | return self.loss_fn(self.init_vals)
106 |
107 | def get_vals(self, frame_id=None):
108 | """Compute values at the given frames.
109 |
110 | Args:
111 | frame_id: (...,) Frame id. If None, evaluate at all frames
112 | Returns:
113 | pred: Predicted outputs
114 | """
115 | t_embed = self.time_embedding(frame_id)
116 | pred = self.forward(t_embed)
117 | return pred
118 |
119 | def get_mean_vals(self):
120 | """Compute the mean embedding over all frames"""
121 | device = self.parameters().__next__().device
122 | t_embed = self.time_embedding.get_mean_embedding(device)
123 | pred = self.forward(t_embed)
124 | return pred
125 |
126 | def get_frame_offset(self):
127 | """Return the number of frames before the first frame of each video"""
128 | return self.time_embedding.frame_offset
129 |
--------------------------------------------------------------------------------
/lab4d/nnutils/visibility.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import torch
3 | from torch import nn
4 |
5 | from lab4d.nnutils.base import CondMLP
6 | from lab4d.nnutils.embedding import PosEmbedding
7 |
8 |
9 | class VisField(nn.Module):
10 | """Predict a visibility score (-inf to +inf) for all 3D points
11 |
12 | Args:
13 | num_inst (int): Number of distinct object instances. If --nosingle_inst
14 | is passed, this is equal to the number of videos, as we assume each
15 | video captures a different instance. Otherwise, we assume all videos
16 | capture the same instance and set this to 1.
17 | D (int): Number of linear layers
18 | W (int): Number of hidden units in each MLP layer
19 | num_freq_xyz (int): Number of frequencies in position embedding
20 | inst_channels (int): Number of channels in the instance code
21 | skips (List(int)): List of layers to add skip connections at
22 | activation (Function): Activation function to use (e.g. nn.ReLU())
23 | """
24 |
25 | def __init__(
26 | self,
27 | num_inst,
28 | D=2,
29 | W=64,
30 | num_freq_xyz=10,
31 | inst_channels=32,
32 | skips=[4],
33 | activation=nn.ReLU(True),
34 | ):
35 | super().__init__()
36 |
37 | # position and direction embedding
38 | self.pos_embedding = PosEmbedding(3, num_freq_xyz)
39 |
40 | # xyz encoding layers
41 | self.basefield = CondMLP(
42 | num_inst=num_inst,
43 | D=D,
44 | W=W,
45 | in_channels=self.pos_embedding.out_channels,
46 | inst_channels=inst_channels,
47 | out_channels=1,
48 | skips=skips,
49 | activation=activation,
50 | final_act=False,
51 | )
52 |
53 | def forward(self, xyz, inst_id=None):
54 | """
55 | Args:
56 | xyz: (..., 3), xyz coordinates
57 | inst_id: (...,) instance id, or None to use the average instance
58 | Returns:
59 | out: (..., 1), visibility score
60 | """
61 | xyz_embed = self.pos_embedding(xyz)
62 | visibility = self.basefield(xyz_embed, inst_id)
63 | return visibility
64 |
--------------------------------------------------------------------------------
/lab4d/reanimate.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # python lab4d/reanimate.py --flagfile=logdir/human-48-dinov2-skel-e120/opts.log --load_suffix latest --motion_id 20 --inst_id 0
3 |
4 | import json
5 | import os
6 | import sys
7 |
8 | import numpy as np
9 | import torch
10 | import torch.backends.cudnn as cudnn
11 | from absl import app, flags
12 |
13 | cwd = os.getcwd()
14 | if cwd not in sys.path:
15 | sys.path.insert(0, cwd)
16 |
17 | from lab4d.config import get_config
18 | from lab4d.render import construct_batch_from_opts, render
19 | from lab4d.utils.profile_utils import torch_profile
20 | from lab4d.utils.quat_transform import se3_to_quaternion_translation
21 |
22 | cudnn.benchmark = True
23 |
24 |
25 | class RenderFlags:
26 | """Flags for the renderer."""
27 |
28 | flags.DEFINE_integer("motion_id", 0, "motion id")
29 |
30 |
31 | def construct_batch_from_opts_reanimate(opts, model, data_info):
32 | device = "cuda"
33 | # load motion data
34 | motion_path = "%s/%s-%s/export_%04d/fg-motion.json" % (
35 | opts["logroot"],
36 | opts["seqname"],
37 | opts["logname"],
38 | opts["motion_id"],
39 | )
40 | with open(motion_path, "r") as fp:
41 | motion_data = json.load(fp)
42 | t_articulation = np.asarray(motion_data["t_articulation"])
43 | field2cam = np.asarray(motion_data["field2cam"])
44 |
45 | opts["num_frames"] = len(t_articulation)
46 |
47 | # joint angles
48 | joint_so3 = np.asarray(motion_data["joint_so3"])
49 | joint_so3 = torch.tensor(joint_so3, dtype=torch.float32, device=device)
50 |
51 | # root pose
52 | field2cam = torch.tensor(field2cam, dtype=torch.float32, device=device)
53 | field2cam = field2cam.reshape(-1, 4, 4)
54 | field2cam = se3_to_quaternion_translation(field2cam, tuple=False)
55 |
56 | batch, raw_size = construct_batch_from_opts(opts, model, data_info)
57 |
58 | batch["joint_so3"] = joint_so3
59 | batch["field2cam"] = {"fg": field2cam}
60 | return batch, raw_size
61 |
62 |
63 | def main(_):
64 | opts = get_config()
65 | render(opts, construct_batch_func=construct_batch_from_opts_reanimate)
66 |
67 |
68 | if __name__ == "__main__":
69 | app.run(main)
70 |
--------------------------------------------------------------------------------
/lab4d/tests/hat_map.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import torch
3 |
4 | from lab4d.utils.geom_utils import hat_map, so3_to_exp_map
5 |
6 |
7 | @torch.jit.script
8 | def hat(v: torch.Tensor) -> torch.Tensor:
9 | """
10 | Compute the Hat operator [1] of a batch of 3D vectors.
11 |
12 | Args:
13 | v: Batch of vectors of shape `(minibatch , 3)`.
14 |
15 | Returns:
16 | Batch of skew-symmetric matrices of shape
17 | `(minibatch, 3 , 3)` where each matrix is of the form:
18 | `[ 0 -v_z v_y ]
19 | [ v_z 0 -v_x ]
20 | [ -v_y v_x 0 ]`
21 |
22 | Raises:
23 | ValueError if `v` is of incorrect shape.
24 |
25 | [1] https://en.wikipedia.org/wiki/Hat_operator
26 | """
27 |
28 | N, dim = v.shape
29 | if dim != 3:
30 | raise ValueError("Input vectors have to be 3-dimensional.")
31 |
32 | h = torch.zeros((N, 3, 3), dtype=v.dtype, device=v.device)
33 |
34 | x, y, z = v.unbind(1)
35 |
36 | h[:, 0, 1] = -z
37 | h[:, 0, 2] = y
38 | h[:, 1, 0] = z
39 | h[:, 1, 2] = -x
40 | h[:, 2, 0] = -y
41 | h[:, 2, 1] = x
42 |
43 | return h
44 |
45 |
46 | def so3_exp_map(log_rot, eps=0.0001):
47 | """
48 | A helper function that computes the so3 exponential map and,
49 | apart from the rotation matrix, also returns intermediate variables
50 | that can be re-used in other functions.
51 | """
52 | _, dim = log_rot.shape
53 | if dim != 3:
54 | raise ValueError("Input tensor shape has to be Nx3.")
55 |
56 | nrms = (log_rot * log_rot).sum(1)
57 | # phis ... rotation angles
58 | rot_angles = torch.clamp(nrms, eps).sqrt()
59 | # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
60 | rot_angles_inv = 1.0 / rot_angles
61 | fac1 = rot_angles_inv * rot_angles.sin()
62 | fac2 = rot_angles_inv * rot_angles_inv * (1.0 - rot_angles.cos())
63 | skews = hat(log_rot)
64 | skews_square = torch.bmm(skews, skews)
65 |
66 | R = (
67 | fac1[:, None, None] * skews
68 | # pyre-fixme[16]: `float` has no attribute `__getitem__`.
69 | + fac2[:, None, None] * skews_square
70 | + torch.eye(3, dtype=log_rot.dtype, device=log_rot.device)[None]
71 | )
72 |
73 | return R
74 |
75 |
76 | def test_hat_map():
77 | # Define a test input tensor
78 | v = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float32)
79 | # Compute the skew-symmetric matrices using the hat_map function
80 | V = hat_map(v)
81 | # Verify that the output has the correct shape
82 | assert V.shape == (3, 3, 3)
83 | # Verify that the output is correct
84 | expected_V = torch.tensor(
85 | [
86 | [[0, -3, 2], [3, 0, -1], [-2, 1, 0]],
87 | [[0, -6, 5], [6, 0, -4], [-5, 4, 0]],
88 | [[0, -9, 8], [9, 0, -7], [-8, 7, 0]],
89 | ],
90 | dtype=torch.float32,
91 | )
92 | if not torch.allclose(V, expected_V):
93 | print("Computed output:")
94 | print(V)
95 | print("Expected output:")
96 | print(expected_V)
97 | assert torch.allclose(V, expected_V)
98 |
99 |
100 | def test_so3_to_exp_map():
101 | so3 = torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])
102 | exp_map = so3_exp_map(so3)
103 | computed_exp_map = so3_to_exp_map(so3)
104 | if not torch.allclose(computed_exp_map, exp_map):
105 | print("Computed output:")
106 | print(computed_exp_map)
107 | print("Expected output:")
108 | print(exp_map)
109 |
110 |
111 | test_so3_to_exp_map()
112 | test_hat_map()
113 |
--------------------------------------------------------------------------------
/lab4d/tests/test_gpu_map.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Jeff Tan, Carnegie Mellon University.
2 | import numpy as np
3 | import torch
4 |
5 | from lab4d.tests.utils import check_func
6 | from lab4d.utils.gpu_utils import gpu_map
7 |
8 |
9 | def func(arg1, arg2):
10 | x = torch.ones(arg1, arg2, dtype=torch.int64, device="cuda")
11 | return int(torch.sum(x))
12 |
13 |
14 | def test_gpu_map_static(n_elts):
15 | """Test utils/proc_utils.py::gpu_map_static"""
16 |
17 | def impl1(n_elts):
18 | return [(i + 1) * (i + 2) for i in range(n_elts)]
19 |
20 | def impl2(n_elts):
21 | return gpu_map(func, [(x + 1, x + 2) for x in range(n_elts)], method="static")
22 |
23 | check_func(impl1, impl2, (n_elts,), name="gpu_map_static", niters=1)
24 |
25 |
26 | def test_gpu_map_dynamic(n_elts):
27 | """Test utils/proc_utils.py::gpu_map_dynamic"""
28 |
29 | def impl1(n_elts):
30 | return [(i + 1) * (i + 2) for i in range(n_elts)]
31 |
32 | def impl2(n_elts):
33 | return gpu_map(func, [(x + 1, x + 2) for x in range(n_elts)], method="dynamic")
34 |
35 | check_func(impl1, impl2, (n_elts,), name="gpu_map_dynamic", niters=1)
36 |
37 |
38 | if __name__ == "__main__":
39 | test_gpu_map_static(11)
40 | # test_gpu_map_dynamic(11)
41 |
--------------------------------------------------------------------------------
/lab4d/tests/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Jeff Tan, Carnegie Mellon University.
2 | import time
3 | from statistics import mean, stdev
4 |
5 | import numpy as np
6 | import torch
7 |
8 |
9 | def check_func(func1, func2, args=(), name="", niters=100, rtol=None, atol=None):
10 | """Verify that both input functions produce identical outputs
11 |
12 | Args:
13 | func1: First function to test
14 | func2: Second function to test
15 | args: Arguments to both functions
16 | name: Name of this test
17 | niters: Number of test iterations (default 5)
18 | rtol: Relative tolerance (by default, selected based on datatype)
19 | atol: Absolute tolerance (by default, selected based on datatype)
20 | """
21 | # Make sure cuda is already loaded
22 | torch.zeros(1, dtype=torch.float32, device="cuda")
23 |
24 | all_t1 = []
25 | all_t2 = []
26 | for i in range(niters):
27 | torch.cuda.synchronize()
28 | t1 = time.time()
29 | out1 = func1(*args)
30 | torch.cuda.synchronize()
31 | all_t1.append(time.time() - t1)
32 |
33 | torch.cuda.synchronize()
34 | t2 = time.time()
35 | out2 = func2(*args)
36 | torch.cuda.synchronize()
37 | all_t2.append(time.time() - t2)
38 |
39 | try:
40 | assert type(out1) == type(out2)
41 | if isinstance(out1, torch.Tensor) and isinstance(out2, torch.Tensor):
42 | torch.testing.assert_close(out1, out2, rtol=rtol, atol=atol)
43 | elif isinstance(out1, np.ndarray) and isinstance(out2, np.ndarray):
44 | np.testing.assert_allclose(out1, out2, rtol=rtol, atol=atol)
45 | else:
46 | assert all(
47 | elt1 == elt2 for elt1, elt2 in zip(out1, out2)
48 | ), f"out1={out1} but out2={out2}"
49 | except Exception as e:
50 | print(f"Error: {e}")
51 |
52 | all_t1 = all_t1[10:] # Remove the first few iterations to account for warmup
53 | all_t2 = all_t2[10:]
54 | avg_t1 = 1000 * mean(all_t1) # milliseconds
55 | avg_t2 = 1000 * mean(all_t2)
56 | std_t1 = 1000 * stdev(all_t1) if len(all_t1) > 1 else 0
57 | std_t2 = 1000 * stdev(all_t2) if len(all_t1) > 1 else 0
58 |
59 | print(
60 | f"Test '{name}' passed:\tavg_t1={avg_t1:.2f}ms,\tavg_t2={avg_t2:.2f}ms,"
61 | f"\tstd_t1={std_t1:.2f}ms,\tstd_t2={std_t2:.2f}ms"
62 | )
63 |
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | quaternion.egg-info/
3 | dist/
4 |
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/README.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | The code is released as [dqtorch](https://github.com/MightyChaos/dqtorch).
3 | Please refer to the repository for tutorials and general use cases.
4 |
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/__init__.py:
--------------------------------------------------------------------------------
1 | ## Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | from .mat3x3 import mat3x3_inv
3 | from .quaternion import quaternion_conjugate, quaternion_mul
4 |
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/add_gcc_cuda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | module add gcc-6.3.0
3 | module add cuda-11.1.1
4 |
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/backend.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | import os
3 |
4 | from torch.utils.cpp_extension import load
5 |
6 | _src_path = os.path.dirname(os.path.abspath(__file__))
7 |
8 | nvcc_flags = [
9 | '-O3', '-std=c++14',
10 | '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__',
11 | ]
12 |
13 | if os.name == "posix":
14 | c_flags = ['-O3', '-std=c++14']
15 | elif os.name == "nt":
16 | c_flags = ['/O2', '/std:c++17']
17 |
18 | # find cl.exe
19 | def find_cl_path():
20 | import glob
21 | for edition in ["Enterprise", "Professional", "BuildTools", "Community"]:
22 | paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True)
23 | if paths:
24 | return paths[0]
25 |
26 | # If cl.exe is not on path, try to find it.
27 | if os.system("where cl.exe >nul 2>nul") != 0:
28 | cl_path = find_cl_path()
29 | if cl_path is None:
30 | raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
31 | os.environ["PATH"] += ";" + cl_path
32 |
33 | _backend = load(name='_quaternion',
34 | extra_cflags=c_flags,
35 | extra_cuda_cflags=nvcc_flags,
36 | sources=[os.path.join(_src_path, 'src', f) for f in [
37 | 'quaternion.cu',
38 | 'matinv.cu',
39 | 'bindings.cpp',
40 | ]],
41 | )
42 |
43 | __all__ = ['_backend']
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/mat3x3.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | import torch
3 | from torch.autograd import Function
4 | from torch.autograd.function import once_differentiable
5 | from torch.cuda.amp import custom_bwd, custom_fwd
6 |
7 | try:
8 | import _quaternion as _backend
9 | except ImportError:
10 | from .backend import _backend
11 |
12 |
13 | class _Mat3x3_det(Function):
14 | @staticmethod
15 | @custom_fwd(cast_inputs=torch.float)
16 | def forward(ctx, inputs:torch.Tensor):
17 | B = inputs.shape[0]
18 | assert(inputs.shape[1] == 9)
19 | dtype = inputs.dtype
20 | device = inputs.device
21 |
22 | outputs = torch.empty(B, dtype=dtype, device=device)
23 |
24 | _backend.mat3x3_det_forward(inputs, outputs, B)
25 | ctx.save_for_backward(inputs)
26 |
27 | return outputs
28 |
29 | @staticmethod
30 | @once_differentiable
31 | @custom_bwd
32 | def backward(ctx, grad):
33 | return None
34 |
35 | _mat3x3_det = _Mat3x3_det.apply
36 | def mat3x3_det(inputs:torch.Tensor):
37 | rt_size = inputs.shape[:-2]
38 | outputs = _mat3x3_det(inputs.contiguous().view(-1,9))
39 | return outputs.view(rt_size)
40 |
41 |
42 | class _Mat3x3_scale_adjoint(Function):
43 | @staticmethod
44 | @custom_fwd(cast_inputs=torch.half)
45 | def forward(ctx, inputs:torch.Tensor, scales:torch.Tensor):
46 | B = inputs.shape[0]
47 | assert(inputs.shape[1] == 9)
48 | dtype = inputs.dtype
49 | device = inputs.device
50 | outputs = torch.empty(B, 9, dtype=dtype, device=device)
51 | _backend.mat3x3_scale_adjoint_forward(inputs, scales, outputs, B)
52 | ctx.save_for_backward(inputs, scales)
53 | return outputs
54 |
55 | @staticmethod
56 | @once_differentiable
57 | @custom_bwd
58 | def backward(ctx, *grad_outputs):
59 | return None
60 |
61 | _mat3x3_scale_adjoint = _Mat3x3_scale_adjoint.apply
62 | def mat3x3_scale_adjoint(inputs:torch.Tensor, scales:torch.Tensor):
63 | rt_size = inputs.shape
64 | outputs = _mat3x3_scale_adjoint(inputs.contiguous().view(-1,9), scales.contiguous().view(-1))
65 | return outputs.view(rt_size)
66 |
67 |
68 | class _Mat3x3_inv(Function):
69 | @staticmethod
70 | @custom_fwd(cast_inputs=torch.float)
71 | def forward(ctx, inputs:torch.Tensor):
72 | B = inputs.shape[0]
73 | assert(inputs.shape[1] == 9)
74 | dtype = inputs.dtype
75 | device = inputs.device
76 | scales = torch.empty(B, dtype=dtype, device=device)
77 | outputs = torch.empty(B, 9, dtype=dtype, device=device)
78 | _backend.mat3x3_inv_forward(inputs, outputs, scales, B)
79 | ctx.save_for_backward(outputs, scales)
80 | # print(scales)
81 | return outputs
82 |
83 | @staticmethod
84 | @once_differentiable
85 | @custom_bwd
86 | def backward(ctx, grad):
87 | inv_mats, _ = ctx.saved_tensors
88 | B = inv_mats.shape[0]
89 | assert(inv_mats.shape[1] == 9)
90 | dtype = inv_mats.dtype
91 | device = inv_mats.device
92 | grad_inputs = torch.empty(B, 9, dtype=dtype, device=device)
93 | _backend.mat3x3_inv_backward(grad, inv_mats, grad_inputs, B)
94 | return grad_inputs
95 |
96 |
97 |
98 | _mat3x3_inv = _Mat3x3_inv.apply
99 | def mat3x3_inv(inputs:torch.Tensor):
100 | rt_size = inputs.shape
101 | outputs = _mat3x3_inv(inputs.contiguous().view(-1,9))
102 | return outputs.view(rt_size)
103 |
104 | def _test_mat3x3_inv_backward(x:torch.Tensor):
105 | x_inv = mat3x3_inv(x)
106 | loss = x_inv.mean()
107 | loss.backward()
108 |
109 | def _test():
110 | import torch.utils.benchmark as benchmark
111 | N = 4096*128
112 | # N = 100
113 | x = torch.randn(N, 3, 3, requires_grad=True).float().cuda()
114 | x_det = mat3x3_det(x)
115 |
116 | # torch.autograd.gradcheck(mat3x3_inv, x)
117 |
118 | T = 100
119 | t = benchmark.Timer(
120 | stmt='mat3x3_det(x)',
121 | setup='from __main__ import mat3x3_det',
122 | globals={'x': x})
123 | print(t.timeit(T))
124 |
125 | x_adj = mat3x3_scale_adjoint(x, x_det)
126 | T = 100
127 | t = benchmark.Timer(
128 | stmt='mat3x3_scale_adjoint(x, x_det)',
129 | setup='from __main__ import mat3x3_scale_adjoint',
130 | globals={'x': x, 'x_det':x_det})
131 | print(t.timeit(T))
132 |
133 | # check correctness
134 | print(x @ x_adj)
135 |
136 | x_inv = mat3x3_inv(x)
137 | print(x @ x_inv)
138 | T = 100
139 | t = benchmark.Timer(
140 | stmt='mat3x3_inv(x)',
141 | setup='from __main__ import mat3x3_inv',
142 | globals={'x': x})
143 | print(t.timeit(T))
144 |
145 | T = 100
146 | t = benchmark.Timer(
147 | stmt='_test_mat3x3_inv_backward(x)',
148 | setup='from __main__ import _test_mat3x3_inv_backward',
149 | globals={'x': x})
150 | print(t.timeit(T))
151 |
152 |
153 | if __name__ == '__main__':
154 | _test()
155 |
156 |
157 |
158 |
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/quaternion.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | import torch
3 | from torch.autograd import Function
4 | from torch.autograd.function import once_differentiable
5 | from torch.cuda.amp import custom_bwd, custom_fwd
6 |
7 | try:
8 | import _quaternion as _backend
9 | except ImportError:
10 | from .backend import _backend
11 |
12 | class _Quaternion_mul_backward(Function):
13 | @staticmethod
14 | @custom_fwd(cast_inputs=torch.half)
15 | def forward(ctx, grad, inputs_1, inputs_2):
16 | B = inputs_1.shape[0] # batch size, coord dim
17 | D1 = inputs_1.shape[1]
18 | D2 = inputs_2.shape[1]
19 | dtype, device = inputs_1.dtype, inputs_1.device
20 | grad_inputs_1 = torch.empty(B, D1, device=device, dtype=dtype)
21 | grad_inputs_2 = torch.empty(B, D2, device=device, dtype=dtype)
22 | _backend.quaternion_mul_backward(grad, B, D1, D2, inputs_1, inputs_2, grad_inputs_1, grad_inputs_2)
23 | ctx.save_for_backward(grad, inputs_1, inputs_2)
24 | return grad_inputs_1, grad_inputs_2
25 |
26 | @staticmethod
27 | @once_differentiable
28 | @custom_bwd
29 | def backward(ctx, *grad_outputs):
30 | grad_out_1, grad_out_2 = grad_outputs
31 | grad, inputs_1, inputs_2 = ctx.saved_tensors
32 | B = inputs_1.shape[0] # batch size, coord dim
33 | D1 = inputs_1.shape[1]
34 | D2 = inputs_2.shape[1]
35 | dtype, device = inputs_1.dtype, inputs_1.device
36 | grad_grad = torch.empty(B, 4, device=device, dtype=dtype)
37 | grad_grad_inputs_1 = torch.empty(B, D1, device=device, dtype=dtype)
38 | grad_grad_inputs_2 = torch.empty(B, D2, device=device, dtype=dtype)
39 | _backend.quaternion_mul_backward_backward(grad_out_1, grad_out_2,
40 | B, D1, D2,
41 | grad, inputs_1, inputs_2,
42 | grad_grad, grad_grad_inputs_1, grad_grad_inputs_2)
43 | return grad_grad, grad_grad_inputs_1, grad_grad_inputs_2
44 |
45 | _quaternion_mul_backward = _Quaternion_mul_backward.apply
46 |
47 | class _Quaternion_mul(Function):
48 | @staticmethod
49 | @custom_fwd(cast_inputs=torch.half)
50 | def forward(ctx, inputs_1:torch.Tensor, inputs_2:torch.Tensor):
51 | # inputs: [B, input_dim], float in [-1, 1]
52 | # RETURN: [B, F], float
53 | calc_grad_inputs = inputs_1.requires_grad or inputs_2.requires_grad
54 |
55 | inputs_1 = inputs_1.contiguous()
56 | inputs_2 = inputs_2.contiguous()
57 |
58 | B = inputs_1.shape[0] # batch size, coord dim
59 | D1 = inputs_1.shape[1]
60 | D2 = inputs_2.shape[1]
61 |
62 | dtype = inputs_1.dtype
63 | device = inputs_1.device
64 |
65 | outputs = torch.empty(B, 4, dtype=dtype, device=device)
66 |
67 |
68 | _backend.quaternion_mul_forward(inputs_1, inputs_2, outputs, B, D1, D2)
69 |
70 | ctx.save_for_backward(inputs_1, inputs_2)
71 |
72 |
73 | return outputs
74 |
75 | @staticmethod
76 | @custom_bwd
77 | def backward(ctx, grad):
78 | # grad: [B, C * C]
79 |
80 | grad = grad.contiguous()
81 | inputs_1, inputs_2 = ctx.saved_tensors
82 |
83 | grad_inputs_1, grad_inputs_2 = _quaternion_mul_backward(grad, inputs_1, inputs_2)
84 |
85 | return grad_inputs_1, grad_inputs_2
86 | # else:
87 | # return None, None
88 |
89 |
90 |
91 | quaternion_mul = _Quaternion_mul.apply
92 |
93 |
94 | class _Quaternion_conjugate(torch.autograd.Function):
95 | @staticmethod
96 | @custom_fwd(cast_inputs=torch.half)
97 | def forward(ctx, inputs:torch.Tensor):
98 | B = inputs.shape[0] # batch size, coord dim
99 | outputs = torch.empty_like(inputs)
100 | _backend.quaternion_conjugate(inputs.contiguous(), B, outputs)
101 | return outputs
102 |
103 | @staticmethod
104 | @custom_bwd
105 | def backward(ctx, grad):
106 | return _Quaternion_conjugate.apply(grad)
107 |
108 |
109 | quaternion_conjugate = _Quaternion_conjugate.apply
110 |
111 |
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | import os
3 |
4 | from setuptools import setup
5 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
6 |
7 | _src_path = os.path.dirname(os.path.abspath(__file__))
8 |
9 | nvcc_flags = [
10 | '-O3', '-std=c++14',
11 | '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__',
12 | ]
13 |
14 | if os.name == "posix":
15 | c_flags = ['-O3', '-std=c++14']
16 | elif os.name == "nt":
17 | c_flags = ['/O2', '/std:c++17']
18 |
19 | # find cl.exe
20 | def find_cl_path():
21 | import glob
22 | for edition in ["Enterprise", "Professional", "BuildTools", "Community"]:
23 | paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True)
24 | if paths:
25 | return paths[0]
26 |
27 | # If cl.exe is not on path, try to find it.
28 | if os.system("where cl.exe >nul 2>nul") != 0:
29 | cl_path = find_cl_path()
30 | if cl_path is None:
31 | raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
32 | os.environ["PATH"] += ";" + cl_path
33 |
34 | setup(
35 | name='quaternion', # package name, import this to use python API
36 | ext_modules=[
37 | CUDAExtension(
38 | name='_quaternion', # extension name, import this to use CUDA API
39 | sources=[os.path.join(_src_path, 'src', f) for f in [
40 | 'quaternion.cu',
41 | 'matinv.cu',
42 | 'bindings.cpp',
43 | ]],
44 | extra_compile_args={
45 | 'cxx': c_flags,
46 | 'nvcc': nvcc_flags,
47 | }
48 | ),
49 | ],
50 | cmdclass={
51 | 'build_ext': BuildExtension,
52 | }
53 | )
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/src/bindings.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | #include
3 |
4 | #include "quaternion.h"
5 | #include "matinv.h"
6 |
7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
8 | m.def("quaternion_mul_forward", &quaternion_mul_forward, "quaternion multiplication forward (CUDA)");
9 | m.def("quaternion_mul_backward", &quaternion_mul_backward, "quaternion multiplication backward (CUDA)");
10 | m.def("quaternion_mul_backward_backward", &quaternion_mul_backward_backward, "quaternion multiplication backward (CUDA)");
11 | m.def("quaternion_conjugate", &quaternion_conjugate, "quaternion_conjugate (CUDA)");
12 | // mat3x3 inverse
13 | m.def("mat3x3_det_forward", &mat3x3_det_forward, "mat3x3_det_forward (CUDA)");
14 | m.def("mat3x3_scale_adjoint_forward", &mat3x3_scale_adjoint_forward, "mat3x3_scale_adjoint_forward (CUDA)");
15 | m.def("mat3x3_inv_forward", &mat3x3_inv_forward, "mat3x3_inv_forward (CUDA)");
16 | m.def("mat3x3_inv_backward", &mat3x3_inv_backward, "mat3x3_inv_backward (CUDA)");
17 | }
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/src/matinv.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | # pragma once
3 |
4 | #include
5 | #include
6 |
7 | void mat3x3_det_forward(at::Tensor inputs, at::Tensor outputs,const uint32_t B);
8 | void mat3x3_scale_adjoint_forward(at::Tensor inputs, at::Tensor scales, at::Tensor outputs, const uint32_t B);
9 | void mat3x3_inv_forward(at::Tensor inputs, at::Tensor outputs, at::Tensor output_scales, const uint32_t B);
10 |
11 | void mat3x3_inv_backward(at::Tensor grad, at::Tensor inv_mats, at::Tensor grad_inputs, const uint32_t B);
--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/src/quaternion.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | # pragma once
3 |
4 | #include
5 | #include
6 |
7 | // inputs: [B, D], float, in [-1, 1]
8 | // outputs: [B, F], float
9 |
10 | // encode_forward(inputs, outputs, B, input_dim, degree, calc_grad_inputs, dy_dx)
11 | void quaternion_mul_forward(at::Tensor inputs_1, at::Tensor inputs_2, at::Tensor outputs, const uint32_t B, const uint32_t D1, const uint32_t D2);
12 |
13 | // sh_encode_backward(grad, inputs, B, input_dim, degree, ctx.calc_grad_inputs, dy_dx, grad_inputs)
14 | void quaternion_mul_backward(at::Tensor grad, const uint32_t B, const uint32_t D1, const uint32_t D2, at::Tensor inputs_1, at::Tensor inputs_2, at::Tensor grad_inputs_1, at::Tensor grad_inputs_2);
15 |
16 |
17 | void quaternion_mul_backward_backward(
18 | at::Tensor grad_out_1, at::Tensor grad_out_2,
19 | const uint32_t B, const uint32_t D1, const uint32_t D2,
20 | at::Tensor grad, at::Tensor inputs_1, at::Tensor inputs_2,
21 | at::Tensor grad_grad, at::Tensor grad_grad_inputs_1, at::Tensor grad_grad_inputs_2);
22 |
23 |
24 | void quaternion_conjugate(at::Tensor inputs, const uint32_t B, at::Tensor outputs);
--------------------------------------------------------------------------------
/lab4d/train.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import os
3 | import sys
4 |
5 | import torch
6 | import torch.backends.cudnn as cudnn
7 | from absl import app
8 |
9 | cwd = os.getcwd()
10 | if cwd not in sys.path:
11 | sys.path.insert(0, cwd)
12 |
13 | from lab4d.config import get_config, save_config
14 | from lab4d.engine.train_utils import get_local_rank
15 | from lab4d.utils.profile_utils import record_function
16 |
17 | cudnn.benchmark = True
18 |
19 |
20 | def train_ddp(Trainer):
21 | local_rank = get_local_rank()
22 | torch.cuda.set_device(local_rank)
23 |
24 | opts = get_config()
25 | if local_rank == 0:
26 | save_config()
27 |
28 | torch.distributed.init_process_group(
29 | "nccl",
30 | init_method="env://",
31 | world_size=opts["ngpu"],
32 | rank=local_rank,
33 | )
34 |
35 | # torch.manual_seed(0)
36 | # torch.cuda.manual_seed(1)
37 | # torch.manual_seed(0)
38 |
39 | trainer = Trainer(opts)
40 | trainer.train()
41 |
42 |
43 | def main(_):
44 | from lab4d.engine.trainer import Trainer
45 |
46 | train_ddp(Trainer)
47 |
48 |
49 | if __name__ == "__main__":
50 | app.run(main)
51 |
--------------------------------------------------------------------------------
/lab4d/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/utils/__init__.py
--------------------------------------------------------------------------------
/lab4d/utils/cam_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # TODO: move camera-related utils to here
3 |
--------------------------------------------------------------------------------
/lab4d/utils/decorator.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | from functools import wraps
3 |
4 |
5 | def train_only_fields(method):
6 | """Decorator to skip the method and return an empty field list if not in
7 | training mode.
8 | """
9 |
10 | @wraps(method)
11 | def _impl(self, *method_args, **method_kwargs):
12 | if self.training:
13 | return method(self, *method_args, **method_kwargs)
14 | else:
15 | return {}
16 |
17 | return _impl
18 |
--------------------------------------------------------------------------------
/lab4d/utils/gpu_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Jeff Tan, Carnegie Mellon University.
2 | import multiprocessing
3 | import os
4 |
5 |
6 | def gpu_map(func, args, gpus=None, method="static"):
7 | """Map a function over GPUs
8 |
9 | Args:
10 | func (Function): Function to parallelize
11 | args (List(Tuple)): List of argument tuples, to split evenly over GPUs
12 | gpus (List(int) or None): Optional list of GPU device IDs to use
13 | method (str): Either "static" or "dynamic" (default "static").
14 | Static assignment is the fastest if workload per task is balanced;
15 | dynamic assignment better handles tasks with uneven workload.
16 | Returns:
17 | outs (List): List of outputs
18 | """
19 | mp = multiprocessing.get_context("spawn") # spawn allows CUDA usage
20 | devices = os.getenv("CUDA_VISIBLE_DEVICES")
21 | outputs = None
22 |
23 | # Compute list of GPUs
24 | if gpus is None:
25 | if devices is None:
26 | num_gpus = int(os.popen("nvidia-smi -L | wc -l").read())
27 | gpus = list(range(num_gpus))
28 | else:
29 | gpus = [int(n) for n in devices.split(",")]
30 |
31 | # Map arguments over GPUs using static or dynamic assignment
32 | try:
33 | if method == "static":
34 | # Interleave arguments across GPUs
35 | args_by_rank = [[] for rank in range(len(gpus))]
36 | for it, arg in enumerate(args):
37 | args_by_rank[it % len(gpus)].append(arg)
38 |
39 | # Spawn processes
40 | spawned_procs = []
41 | result_queue = mp.Queue()
42 | for rank, gpu_id in enumerate(gpus):
43 | # Environment variables get copied on process creation
44 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
45 | proc_args = (func, args_by_rank[rank], rank, result_queue)
46 | proc = mp.Process(target=gpu_map_static_helper, args=proc_args)
47 | proc.start()
48 | spawned_procs.append(proc)
49 |
50 | # Wait to finish
51 | for proc in spawned_procs:
52 | proc.join()
53 |
54 | # Construct output list
55 | outputs_by_rank = {}
56 | while True:
57 | try:
58 | rank, out = result_queue.get(block=False)
59 | outputs_by_rank[rank] = out
60 | except multiprocessing.queues.Empty:
61 | break
62 |
63 | outputs = []
64 | for it in range(len(args)):
65 | rank = it % len(gpus)
66 | idx = it // len(gpus)
67 | outputs.append(outputs_by_rank[rank][idx])
68 |
69 | elif method == "dynamic":
70 | gpu_queue = mp.Queue()
71 | for gpu_id in gpus:
72 | gpu_queue.put(gpu_id)
73 |
74 | # Spawn processes as GPUs become available
75 | spawned_procs = []
76 | result_queue = mp.Queue()
77 | for it, arg in enumerate(args):
78 | # Take latest available gpu_id (blocking)
79 | gpu_id = gpu_queue.get()
80 |
81 | # Environment variables get copied on process creation
82 | os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
83 | proc_args = (func, arg, it, gpu_id, result_queue, gpu_queue)
84 | proc = mp.Process(target=gpu_map_dynamic_helper, args=proc_args)
85 | proc.start()
86 | spawned_procs.append(proc)
87 |
88 | # Wait to finish
89 | for proc in spawned_procs:
90 | proc.join()
91 |
92 | # Construct output list
93 | outputs_by_it = {}
94 | while True:
95 | try:
96 | it, out = result_queue.get(block=False)
97 | outputs_by_it[it] = out
98 | except multiprocessing.queues.Empty:
99 | break
100 |
101 | outputs = []
102 | for it in range(len(args)):
103 | outputs.append(outputs_by_it[it])
104 |
105 | else:
106 | raise NotImplementedError
107 |
108 | except Exception as e:
109 | pass
110 |
111 | # Restore env vars
112 | finally:
113 | if devices is not None:
114 | os.environ["CUDA_VISIBLE_DEVICES"] = devices
115 | else:
116 | del os.environ["CUDA_VISIBLE_DEVICES"]
117 | return outputs
118 |
119 |
120 | def gpu_map_static_helper(func, args, rank, result_queue):
121 | out = [func(*arg) for arg in args]
122 | result_queue.put((rank, out))
123 |
124 |
125 | def gpu_map_dynamic_helper(func, arg, it, gpu_id, result_queue, gpu_queue):
126 | out = func(*arg)
127 | gpu_queue.put(gpu_id)
128 | result_queue.put((it, out))
129 |
--------------------------------------------------------------------------------
/lab4d/utils/io.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import glob
3 | import os
4 |
5 | import cv2
6 | import einops
7 | import imageio
8 | import numpy as np
9 |
10 | from lab4d.utils.vis_utils import img2color, make_image_grid
11 |
12 |
13 | def make_save_dir(opts, sub_dir="renderings"):
14 | """Create a subdirectory to save outputs
15 |
16 | Args:
17 | opts (Dict): Command-line options
18 | sub_dir (str): Subdirectory to create
19 | Returns:
20 | save_dir (str): Output directory
21 | """
22 | logname = "%s-%s" % (opts["seqname"], opts["logname"])
23 | save_dir = "%s/%s/%s/" % (opts["logroot"], logname, sub_dir)
24 | os.makedirs(save_dir, exist_ok=True)
25 | return save_dir
26 |
27 |
28 | def save_vid(
29 | outpath,
30 | frames,
31 | suffix=".mp4",
32 | upsample_frame=0,
33 | fps=10,
34 | target_size=None,
35 | ):
36 | """Save frames to video
37 |
38 | Args:
39 | outpath (str): Output directory
40 | frames: (N, H, W, x) Frames to output
41 | suffix (str): File type to save (".mp4" or ".gif")
42 | upsample_frame (int): Target number of frames
43 | fps (int): Target frames per second
44 | target_size: If provided, (H, W) target size of frames
45 | """
46 | # convert to 150 frames
47 | if upsample_frame < 1:
48 | upsample_frame = len(frames)
49 | frame_150 = []
50 | for i in range(int(upsample_frame)):
51 | fid = int(i / upsample_frame * len(frames))
52 | frame = frames[fid]
53 | if frame.max() <= 1:
54 | frame = frame * 255
55 | frame = frame.astype(np.uint8)
56 | if target_size is not None:
57 | frame = cv2.resize(frame, target_size[::-1])
58 | if suffix == ".gif":
59 | h, w = frame.shape[:2]
60 | fxy = np.sqrt(4e4 / (h * w))
61 | frame = cv2.resize(frame, None, fx=fxy, fy=fxy)
62 |
63 | # resize to make divisible by marco block size = 16
64 | h, w = frame.shape[:2]
65 | h = int(np.ceil(h / 16) * 16)
66 | w = int(np.ceil(w / 16) * 16)
67 | frame = cv2.resize(frame, (w, h))
68 |
69 | frame_150.append(frame)
70 | imageio.mimsave("%s%s" % (outpath, suffix), frame_150, fps=fps)
71 |
72 |
73 | def save_rendered(rendered, save_dir, raw_size, pca_fn):
74 | """Save rendered outputs
75 |
76 | Args:
77 | rendered (Dict): Maps arbitrary keys to outputs of shape (N, H, W, x)
78 | save_dir (str): Output directory
79 | raw_size: (2,) Target height and width
80 | pca_fn (Function): Function to apply PCA on feature outputs
81 | """
82 | # save rendered images
83 | for k, v in rendered.items():
84 | n, h, w = v.shape[:3]
85 | img_grid = make_image_grid(v)
86 | img_grid = img2color(k, img_grid, pca_fn=pca_fn)
87 | img_grid = (img_grid * 255).astype(np.uint8)
88 | # cv2.imwrite("%s/%s.jpg" % (save_dir, k), img_grid[:, :, ::-1])
89 |
90 | # save video
91 | frames = einops.rearrange(img_grid, "(m h) (n w) c -> (m n) h w c", h=h, w=w)
92 | frames = frames[:n]
93 | save_vid(
94 | "%s/%s" % (save_dir, k),
95 | frames,
96 | fps=30,
97 | target_size=(raw_size[0], raw_size[1]),
98 | )
99 |
--------------------------------------------------------------------------------
/lab4d/utils/loss_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import torch
3 | import numpy as np
4 | import torch.nn.functional as F
5 |
6 |
7 | def entropy_loss(prob, dim=-1):
8 | """Compute entropy of a probability distribution
9 | In the case of skinning weights, each column is a distribution over assignment to B bones.
10 | We want to encourage low entropy, i.e. each point is assigned to fewer bones.
11 |
12 | Args:
13 | prob: (..., B) Probability distribution
14 | Returns:
15 | entropy (...,) Entropy of each distribution
16 | """
17 | entropy = -(prob * (prob + 1e-9).log()).sum(dim)
18 | return entropy
19 |
20 |
21 | def cross_entropy_skin_loss(skin):
22 | """Compute entropy of a probability distribution
23 | In the case of skinning weights, each column is a distribution over assignment to B bones.
24 | We want to encourage low entropy, i.e. each point is assigned to fewer bones.
25 |
26 | Args:
27 | skin: (..., B) un-normalized skinning weights
28 | """
29 | shape = skin.shape
30 | nbones = shape[-1]
31 | full_skin = skin.clone()
32 |
33 | # find the most likely bone assignment
34 | score, indices = skin.max(-1, keepdim=True)
35 | skin = torch.zeros_like(skin).fill_(0)
36 | skin = skin.scatter(-1, indices, torch.ones_like(score))
37 |
38 | cross_entropy = F.cross_entropy(
39 | full_skin.view(-1, nbones), skin.view(-1, nbones), reduction="none"
40 | )
41 | cross_entropy = cross_entropy.view(shape[:-1])
42 | return cross_entropy
43 |
44 |
45 | def align_vectors(v1, v2):
46 | """Return the scale that best aligns v1 to v2 in the L2 sense:
47 | min || kv1-v2 ||^2
48 |
49 | Args:
50 | v1: (...,) Source vector
51 | v2: (...,) Target vector
52 | Returns:
53 | scale_fac (1,): Scale factor
54 | """
55 | scale_fac = (v1 * v2).sum() / (v1 * v1).sum()
56 | if scale_fac < 0:
57 | scale_fac = torch.tensor([1.0], device=scale_fac.device)
58 | return scale_fac
59 |
--------------------------------------------------------------------------------
/lab4d/utils/numpy_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import numpy as np
3 |
4 |
5 | def interp_wt(x, y, x2, type="linear"):
6 | """Map a scalar value from range [x0, x1] to [y0, y1] using interpolation
7 |
8 | Args:
9 | x: Input range [x0, x1]
10 | y: Output range [y0, y1]
11 | x2 (float): Scalar value in range [x0, x1]
12 | type (str): Interpolation type ("linear" or "log")
13 | Returns:
14 | y2 (float): Scalar value mapped to [y0, y1]
15 | """
16 | # Extract values from tuples
17 | x0, x1 = x
18 | y0, y1 = y
19 |
20 | # # Check if x2 is in range
21 | # if x2 < x0 or x2 > x1:
22 | # raise ValueError("x2 must be in the range [x0, x1]")
23 |
24 | if type == "linear":
25 | # Perform linear interpolation
26 | y2 = y0 + (x2 - x0) * (y1 - y0) / (x1 - x0)
27 |
28 | elif type == "log":
29 | # Transform to log space
30 | log_y0 = np.log10(y0)
31 | log_y1 = np.log10(y1)
32 |
33 | # Perform linear interpolation in log space
34 | log_y2 = log_y0 + (x2 - x0) * (log_y1 - log_y0) / (x1 - x0)
35 |
36 | # Transform back to original space
37 | y2 = 10**log_y2
38 |
39 | else:
40 | raise ValueError("interpolation_type must be 'linear' or 'log'")
41 |
42 | y2 = np.clip(y2, np.min(y), np.max(y))
43 | return y2
44 |
45 |
46 | def pca_numpy(raw_data, n_components):
47 | """Return a function that applies PCA to input data, based on the principal
48 | components of a raw data distribution.
49 |
50 | Args:
51 | raw_data (np.array): Raw data distribution, used to compute
52 | principal components.
53 | n_components (int): Number of principal components to use
54 | Returns:
55 | apply_pca_fn (Function): A function that applies PCA to input data
56 | """
57 | # center the data matrix by subtracting the mean of each feature
58 | mean = np.mean(raw_data, axis=0)
59 | centered_data = raw_data - mean
60 |
61 | # compute the covariance matrix of the centered data
62 | covariance_matrix = np.cov(centered_data.T)
63 |
64 | # compute the eigenvalues and eigenvectors of the covariance matrix
65 | eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
66 |
67 | # sort the eigenvalues in descending order and sort the eigenvectors accordingly
68 | sorted_indices = np.argsort(eigenvalues)[::-1]
69 | sorted_eigenvectors = eigenvectors[:, sorted_indices]
70 |
71 | # choose the top k eigenvectors (or all eigenvectors if k is not specified)
72 | top_eigenvectors = sorted_eigenvectors[:, :n_components]
73 |
74 | def apply_pca_fn(data, normalize=False):
75 | """
76 | Args:
77 | data (np.array): Data to apply PCA to
78 | normalize (bool): If True, normalize the data to 0,1 for visualization
79 | """
80 | shape = data.shape
81 | data = data.reshape(-1, shape[-1])
82 | data = np.dot(data - mean, top_eigenvectors)
83 |
84 | if normalize:
85 | # scale to std = 1
86 | data = data / np.sqrt(eigenvalues[sorted_indices][:n_components])
87 | data = np.clip(data, -2, 2) # clip to [-2, 2], 95.4% percentile
88 | # scale to 0,1
89 | data = (data + 2) / 4
90 |
91 | data = data.reshape(shape[:-1] + (n_components,))
92 | return data
93 |
94 | return apply_pca_fn
95 |
96 |
97 | def bilinear_interp(feat, xy_loc):
98 | """Sample from a 2D feature map using bilinear interpolation
99 |
100 | Args:
101 | feat: (H,W,x) Input feature map
102 | xy_loc: (N,2) Coordinates to sample, float
103 | Returns:
104 | feat_samp: (N,x) Sampled features
105 | """
106 | dtype = feat.dtype
107 | ul_loc = np.floor(xy_loc).astype(int) # x,y
108 | x = (xy_loc[:, 0] - ul_loc[:, 0])[:, None] # (N, 1)
109 | y = (xy_loc[:, 1] - ul_loc[:, 1])[:, None] # (N, 1)
110 | ul_loc = np.clip(ul_loc, 0, 110) # clip
111 | q11 = feat[ul_loc[:, 1], ul_loc[:, 0]] # (N, 16)
112 | q12 = feat[ul_loc[:, 1], ul_loc[:, 0] + 1]
113 | q21 = feat[ul_loc[:, 1] + 1, ul_loc[:, 0]]
114 | q22 = feat[ul_loc[:, 1] + 1, ul_loc[:, 0] + 1]
115 | feat_samp = (
116 | q11 * (1 - x) * (1 - y)
117 | + q21 * (1 - x) * (y - 0)
118 | + q12 * (x - 0) * (1 - y)
119 | + q22 * (x - 0) * (y - 0)
120 | )
121 | feat_samp = feat_samp.astype(dtype)
122 | return feat_samp
123 |
--------------------------------------------------------------------------------
/lab4d/utils/torch_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import torch
3 |
4 | @torch.enable_grad()
5 | def compute_gradient(fn, x):
6 | """
7 | gradient of mlp params wrt pts
8 | """
9 | x.requires_grad_(True)
10 | y = fn(x)
11 |
12 | # get gradient for each size-1 output
13 | gradients = []
14 | for i in range(y.shape[-1]):
15 | y_sub = y[..., i : i + 1]
16 | d_output = torch.ones_like(y_sub, requires_grad=False, device=y.device)
17 | gradient = torch.autograd.grad(
18 | outputs=y_sub,
19 | inputs=x,
20 | grad_outputs=d_output,
21 | create_graph=True,
22 | retain_graph=True,
23 | only_inputs=True,
24 | )[0]
25 | gradients.append(gradient[..., None])
26 | gradients = torch.cat(gradients, -1) # ...,input-dim, output-dim
27 | return gradients
28 |
29 | def frameid_to_vid(fid, frame_offset):
30 | """Given absolute frame ids [0, ..., N], compute the video id of each frame.
31 |
32 | Args:
33 | fid: (nframes,) Absolute frame ids
34 | e.g. [0, 1, 2, 3, 100, 101, 102, 103, 200, 201, 202, 203]
35 | frame_offset: (nvideos + 1,) Offset of each video
36 | e.g., [0, 100, 200, 300]
37 | Returns:
38 | vid: (nframes,) Maps idx to video id
39 | tid: (nframes,) Maps idx to relative frame id
40 | """
41 | vid = torch.zeros_like(fid)
42 | for i in range(frame_offset.shape[0] - 1):
43 | assign = torch.logical_and(fid >= frame_offset[i], fid < frame_offset[i + 1])
44 | vid[assign] = i
45 | return vid
46 |
47 |
48 | def remove_ddp_prefix(state_dict):
49 | """Remove distributed data parallel prefix from model checkpoint
50 |
51 | Args:
52 | state_dict (Dict): Model checkpoint
53 | Returns:
54 | new_state_dict (Dict): New model checkpoint
55 | """
56 | new_state_dict = {}
57 | for key, value in state_dict.items():
58 | if key.startswith("module."):
59 | new_key = key[7:] # Remove 'module.' prefix
60 | else:
61 | new_key = key
62 | new_state_dict[new_key] = value
63 | return new_state_dict
64 |
65 |
66 | def remove_state_startwith(state_dict, prefix):
67 | """Remove model parameters that start with a prefix
68 |
69 | Args:
70 | state_dict (Dict): Model checkpoint
71 | prefix (str): Prefix to filter
72 | Returns:
73 | new_state_dict (Dict): New model checkpoint
74 | """
75 | new_state_dict = {}
76 | for key, value in state_dict.items():
77 | if key.startswith(prefix):
78 | continue
79 | else:
80 | new_state_dict[key] = value
81 | return new_state_dict
82 |
83 |
84 | def remove_state_with(state_dict, string):
85 | """Remove model parameters that contain a string
86 |
87 | Args:
88 | state_dict (Dict): Model checkpoint
89 | string (str): String to filter
90 | Returns:
91 | new_state_dict (Dict): New model checkpoint
92 | """
93 | new_state_dict = {}
94 | for key, value in state_dict.items():
95 | if string in key:
96 | continue
97 | else:
98 | new_state_dict[key] = value
99 | return new_state_dict
100 |
101 |
102 | def compress_state_with(state_dict, string):
103 | """Initialize model parameters with the mean of the instance embedding if
104 | the parameter name contains a string
105 |
106 | Args:
107 | state_dict (Dict): Model checkpoint, modified in place
108 | string (str): String to filter
109 | """
110 | # init with the mean of inst_embedding
111 | for key, value in state_dict.items():
112 | if string in key:
113 | state_dict[key] = value.mean(dim=0, keepdim=True)
114 |
--------------------------------------------------------------------------------
/lab4d/utils/transforms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | from lab4d.utils.quat_transform import (
3 | dual_quaternion_apply,
4 | dual_quaternion_inverse,
5 | dual_quaternion_to_quaternion_translation,
6 | )
7 |
8 |
9 | def get_bone_coords(xyz, bone2obj):
10 | """Transform points from object canonical space to bone coordinates
11 |
12 | Args:
13 | xyz: (..., 3) Points in object canonical space
14 | bone2obj: ((..., B, 4), (..., B, 4)) Bone-to-object SE(3)
15 | transforms, written as dual quaternions
16 | Returns:
17 | xyz_bone: (..., B, 3) Points in bone space
18 | """
19 | # transform xyz to bone space
20 | obj2bone = dual_quaternion_inverse(bone2obj)
21 |
22 | # reshape
23 | xyz = xyz[..., None, :].expand(xyz.shape[:-1] + (bone2obj[0].shape[-2], 3)).clone()
24 | xyz_bone = dual_quaternion_apply(obj2bone, xyz)
25 | return xyz_bone
26 |
27 |
28 | def get_xyz_bone_distance(xyz, bone2obj):
29 | """Compute squared distances from points to bone centers
30 |
31 | Argss:
32 | xyz: (..., 3) Points in object canonical space
33 | bone2obj: ((..., B, 4), (..., B, 4)) Bone-to-object SE(3) transforms, written as dual quaternions
34 |
35 | Returns:
36 | dist2: (..., B) Squared distance to each bone center
37 | """
38 | _, center = dual_quaternion_to_quaternion_translation(bone2obj)
39 | dist2 = (xyz[..., None, :] - center).pow(2).sum(-1) # M, K
40 | return dist2
41 |
--------------------------------------------------------------------------------
/media/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/media/logo.png
--------------------------------------------------------------------------------
/media/teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/media/teaser.gif
--------------------------------------------------------------------------------
/preprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/__init__.py
--------------------------------------------------------------------------------
/preprocess/libs/__init__.py:
--------------------------------------------------------------------------------
1 | # import lab4d
2 | import os
3 | import sys
4 |
5 | sys.path.insert(
6 | 0,
7 | "%s/../../" % os.path.join(os.path.dirname(__file__)),
8 | )
9 |
10 | sys.path.insert(
11 | 0,
12 | "%s/../" % os.path.join(os.path.dirname(__file__)),
13 | )
14 |
15 | sys.path.insert(
16 | 0,
17 | "%s/../third_party" % os.path.join(os.path.dirname(__file__)),
18 | )
19 |
--------------------------------------------------------------------------------
/preprocess/libs/geometry.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # taken from Rigidmask: https://github.com/gengshan-y/rigidmask/blob/b308b5082d09926e687c55001c20def6b0708021/utils/dydepth.py#L425
3 | import os
4 | import sys
5 |
6 | import cv2
7 | import numpy as np
8 | import trimesh
9 |
10 | from lab4d.utils.profile_utils import record_function
11 |
12 | sys.path.insert(
13 | 0,
14 | "%s/../third_party/vcnplus/" % os.path.join(os.path.dirname(__file__)),
15 | )
16 |
17 | from flowutils.flowlib import warp_flow
18 |
19 |
20 | @record_function("compute_procrustes")
21 | def compute_procrustes_robust(pts0, pts1):
22 | """
23 | analytical solution of R/t from correspondence
24 | pts0: N x 3
25 | pts1: N x 3
26 | """
27 | num_samples = 2000
28 | min_samples = 10
29 | extent = (pts0.max(0) - pts0.min(0)).mean()
30 | threshold = extent * 0.05
31 |
32 | inliers = []
33 | samples = []
34 | idx_array = np.arange(pts0.shape[0])
35 | for i in range(num_samples):
36 | sample = np.random.choice(idx_array, size=min_samples, replace=False)
37 | sol = compute_procrustes(pts0[sample], pts1[sample])
38 |
39 | # evaluate inliers
40 | R, t = sol
41 | pts2 = R @ pts0.T + t[:, np.newaxis]
42 | dist = np.linalg.norm(pts2.T - pts1, 2, axis=1)
43 | inliers.append((dist < threshold).sum())
44 | samples.append(sample)
45 |
46 | best_idx = np.argmax(np.sum(inliers, axis=0))
47 | print("inlier_ratio: ", np.max(inliers) / pts0.shape[0])
48 | best_sample = samples[best_idx]
49 | sol = compute_procrustes(pts0[best_sample], pts1[best_sample])
50 | return sol
51 |
52 |
53 | @record_function("compute_procrustes")
54 | def compute_procrustes(pts0, pts1):
55 | """
56 | analytical solution of R/t from correspondence
57 | pts0: N x 3
58 | pts1: N x 3
59 | """
60 | if pts0.shape[0] < 10:
61 | print("Warning: too few points for procrustes. Return identity.")
62 | return np.eye(3), np.zeros(3)
63 | pts0_mean = np.mean(pts0, 0)
64 | pts1_mean = np.mean(pts1, 0)
65 | pts0_centered = pts0 - pts0_mean
66 | pts1_centered = pts1 - pts1_mean
67 | H = pts0_centered.T @ pts1_centered
68 | U, S, Vt = np.linalg.svd(H)
69 | R = Vt.T @ U.T
70 | if np.linalg.det(R) < 0:
71 | Vt[2, :] *= -1
72 | R = Vt.T @ U.T
73 | t = pts1_mean - R @ pts0_mean
74 |
75 | # pts2 = R @ pts0.T + t[:, np.newaxis]
76 | # pts2 = pts2.T
77 | # trimesh.Trimesh(pts0).export('tmp/0.obj')
78 | # trimesh.Trimesh(pts1).export('tmp/1.obj')
79 | # trimesh.Trimesh(pts2).export('tmp/2.obj')
80 | return R, t
81 |
82 |
83 | @record_function("two_frame_registration")
84 | def two_frame_registration(
85 | depth0, depth1, flow, K0, K1, mask, registration_type="procrustes"
86 | ):
87 | # prepare data
88 | shape = flow.shape[:2]
89 | x0, y0 = np.meshgrid(range(shape[1]), range(shape[0]))
90 | x0 = x0.astype(np.float32)
91 | y0 = y0.astype(np.float32)
92 | x1 = x0 + flow[:, :, 0]
93 | y1 = y0 + flow[:, :, 1]
94 | hp0 = np.stack((x0, y0, np.ones(x0.shape)), 0).reshape((3, -1))
95 | hp1 = np.stack((x1, y1, np.ones(x0.shape)), 0).reshape((3, -1))
96 |
97 | # use bg + valid pixels to compute R/t
98 | # valid_mask = np.logical_and(mask, flow[..., 2] > 0).flatten()
99 | valid_mask = mask.flatten()
100 | pts0 = np.linalg.inv(K0) @ hp0 * depth0.flatten()
101 | depth1_warped = warp_flow(depth1.astype(float), flow[..., :2]).flatten()
102 | pts1 = np.linalg.inv(K1) @ hp1 * depth1_warped
103 |
104 | if registration_type == "procrustes":
105 | # Procrustes
106 | valid_mask = np.logical_and(valid_mask, depth1_warped > 0)
107 | rmat, trans = compute_procrustes(pts0.T[valid_mask], pts1.T[valid_mask])
108 | # rmat, trans = compute_procrustes_robust(pts0.T[valid_mask], pts1.T[valid_mask])
109 | elif registration_type == "pnp":
110 | # PnP
111 | _, rvec, trans = cv2.solvePnP(
112 | pts0.T[valid_mask.flatten(), np.newaxis],
113 | hp1[:2].T[valid_mask.flatten(), np.newaxis],
114 | K0,
115 | 0,
116 | flags=cv2.SOLVEPNP_DLS,
117 | )
118 | _, rvec, trans = cv2.solvePnP(
119 | pts0.T[valid_mask, np.newaxis],
120 | hp1[:2].T[valid_mask, np.newaxis],
121 | K0,
122 | 0,
123 | rvec,
124 | trans,
125 | useExtrinsicGuess=True,
126 | flags=cv2.SOLVEPNP_ITERATIVE,
127 | )
128 | rmat = cv2.Rodrigues(rvec)[0]
129 | trans = trans[:, 0]
130 | else:
131 | raise NotImplementedError
132 |
133 | cam01 = np.eye(4)
134 | cam01[:3, :3] = rmat
135 | cam01[:3, 3] = trans
136 | return cam01
137 |
--------------------------------------------------------------------------------
/preprocess/libs/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import cv2
3 | import numpy as np
4 | from scipy.spatial.transform import Rotation as R
5 |
6 | from lab4d.utils.profile_utils import record_function
7 |
8 |
9 | @record_function("resize_to_target")
10 | def resize_to_target(flowfw, aspect_ratio=None, is_flow=False):
11 | h, w = flowfw.shape[:2]
12 | if aspect_ratio is None:
13 | factor = np.sqrt(250 * 1000 / (h * w))
14 | th, tw = int(h * factor), int(w * factor)
15 | else:
16 | rh, rw = aspect_ratio[:2]
17 | factor = np.sqrt(250 * 1000 / (rh * rw))
18 | th, tw = int(rh * factor), int(rw * factor)
19 |
20 | factor_h = th / h
21 | factor_w = tw / w
22 |
23 | flowfw_d = cv2.resize(flowfw, (tw, th))
24 |
25 | if is_flow:
26 | flowfw_d[..., 0] *= factor_w
27 | flowfw_d[..., 1] *= factor_h
28 | return flowfw_d
29 |
30 |
31 | @record_function("reduce_component")
32 | def reduce_component(mask):
33 | dtype = mask.dtype
34 | nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(
35 | mask.astype(np.uint8), connectivity=8
36 | )
37 | if nb_components > 1:
38 | max_label, max_size = max(
39 | [(i, stats[i, cv2.CC_STAT_AREA]) for i in range(1, nb_components)],
40 | key=lambda x: x[1],
41 | )
42 | mask = (output == max_label).astype(int)
43 | mask = mask.astype(dtype)
44 | return mask
45 |
46 |
47 | def robust_rot_align(rot1, rot2):
48 | """
49 | align rot1 to rot2 using RANSAC
50 | """
51 | in_thresh = 1.0 / 4 * np.pi # 45 deg
52 | n_samples = rot2.shape[0]
53 | rots = rot2[:, :3, :3] @ rot1[:, :3, :3].transpose(0, 2, 1)
54 |
55 | inliers = []
56 | for i in range(n_samples):
57 | rots_aligned = rots[i : i + 1] @ rot1[:, :3, :3]
58 | dist = rots_aligned @ rot2[:, :3, :3].transpose(0, 2, 1)
59 | dist = R.from_matrix(dist).as_rotvec()
60 | dist = np.linalg.norm(dist, 2, axis=1)
61 | inliers.append((dist < in_thresh).sum())
62 |
63 | # Convert rotation vectors back to rotation matrices
64 | best_rot = rots[np.argmax(inliers)]
65 | # print(inliers)
66 | return best_rot
67 |
--------------------------------------------------------------------------------
/preprocess/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/scripts/__init__.py
--------------------------------------------------------------------------------
/preprocess/scripts/camera_registration.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # python preprocess/scripts/camera_registration.py 2023-04-03-16-50-09-room-0000 0
3 | import glob
4 | import os
5 | import sys
6 |
7 | import cv2
8 | import numpy as np
9 | import trimesh
10 |
11 | sys.path.insert(
12 | 0,
13 | "%s/../../" % os.path.join(os.path.dirname(__file__)),
14 | )
15 |
16 | sys.path.insert(
17 | 0,
18 | "%s/../" % os.path.join(os.path.dirname(__file__)),
19 | )
20 |
21 | from libs.geometry import two_frame_registration
22 | from libs.io import flow_process, read_raw
23 | from libs.utils import reduce_component
24 |
25 | from lab4d.utils.geom_utils import K2inv, K2mat
26 | from lab4d.utils.vis_utils import draw_cams
27 |
28 |
29 | def camera_registration(seqname, component_id):
30 | imgdir = "database/processed/JPEGImages/Full-Resolution/%s" % seqname
31 | imglist = sorted(glob.glob("%s/*.jpg" % imgdir))
32 | delta = 1
33 | crop_size = 256
34 | use_full = True
35 | registration_type = "procrustes"
36 |
37 | # get camera intrinsics
38 | raw_shape = cv2.imread(imglist[0]).shape[:2]
39 | max_l = max(raw_shape)
40 | Kraw = np.array([max_l, max_l, raw_shape[1] / 2, raw_shape[0] / 2])
41 | Kraw = K2mat(Kraw)
42 |
43 | cam_current = np.eye(4) # scene to camera: I, R01 I, R12 R01 I, ...
44 | cams = [cam_current]
45 | for im0idx in range(len(imglist)):
46 | if im0idx + delta >= len(imglist):
47 | continue
48 | # TODO: load croped images directly
49 | frameid0 = int(imglist[im0idx].split("/")[-1].split(".")[0])
50 | frameid1 = int(imglist[im0idx + delta].split("/")[-1].split(".")[0])
51 | # print("%s %d %d" % (seqname, frameid0, frameid1))
52 | data_dict0 = read_raw(imglist[im0idx], delta, crop_size, use_full)
53 | data_dict1 = read_raw(imglist[im0idx + delta], -delta, crop_size, use_full)
54 | flow_process(data_dict0, data_dict1)
55 |
56 | # compute intrincs for the cropped images
57 | K0 = K2inv(data_dict0["crop2raw"]) @ Kraw
58 | K1 = K2inv(data_dict1["crop2raw"]) @ Kraw
59 |
60 | # get mask
61 | mask = data_dict0["mask"][..., 0].astype(int) == component_id
62 | if component_id > 0:
63 | # reduce the mask to the largest connected component
64 | mask = reduce_component(mask)
65 | else:
66 | # for background, additionally remove flow with low confidence
67 | mask = np.logical_and(mask, data_dict0["flow"][..., 2] > 0).flatten()
68 | cam_0_to_1 = two_frame_registration(
69 | data_dict0["depth"],
70 | data_dict1["depth"],
71 | data_dict0["flow"],
72 | K0,
73 | K1,
74 | mask,
75 | registration_type,
76 | )
77 | cam_current = cam_0_to_1 @ cam_current
78 | cams.append(cam_current)
79 |
80 | os.makedirs(imgdir.replace("JPEGImages", "Cameras"), exist_ok=True)
81 | save_path = imgdir.replace("JPEGImages", "Cameras")
82 | # for idx, img_path in enumerate(sorted(glob.glob("%s/*.jpg" % imgdir))):
83 | # frameid = int(img_path.split("/")[-1].split(".")[0])
84 | # campath = "%s/%05d-%02d.txt" % (save_path, frameid, component_id)
85 | # np.savetxt(campath, cams[idx])
86 | np.save("%s/%02d.npy" % (save_path, component_id), cams)
87 | mesh_cam = draw_cams(cams)
88 | mesh_cam.export("%s/cameras-%02d.obj" % (save_path, component_id))
89 |
90 | print("camera registration done: %s, %d" % (seqname, component_id))
91 |
92 |
93 | if __name__ == "__main__":
94 | seqname = sys.argv[1]
95 | component_id = int(sys.argv[2]) # 0: bg, 1: fg
96 |
97 | camera_registration(seqname, component_id)
98 |
--------------------------------------------------------------------------------
/preprocess/scripts/compute_diff.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # python preprocess/scripts/compute_diff.py database/processed/JPEGImages/Full-Resolution/cat-pikachu-0000/ database/processed/JPEGImages/Full-Resolution/2023-04-19-01-36-53-cat-pikachu-0000/
3 | import glob
4 | import sys
5 |
6 | import cv2
7 | import numpy as np
8 |
9 | path1 = sys.argv[1]
10 | path2 = sys.argv[2]
11 |
12 | for path1, path2 in zip(
13 | sorted(glob.glob(path1 + "/*")), sorted(glob.glob(path2 + "/*"))
14 | ):
15 | print(path1, path2)
16 |
17 | if path1.endswith(".npy"):
18 | t1 = np.load(path1).astype(np.float32)
19 | t2 = np.load(path2).astype(np.float32)
20 | elif path1.endswith(".jpg"):
21 | t1 = cv2.imread(path1).astype(np.float32)
22 | t2 = cv2.imread(path2).astype(np.float32)
23 | elif path1.endswith(".txt"):
24 | t1 = np.loadtxt(path1)
25 | t2 = np.loadtxt(path2)
26 | else:
27 | raise NotImplementedError
28 |
29 | print(np.mean(np.abs(t1 - t2)))
30 |
--------------------------------------------------------------------------------
/preprocess/scripts/crop.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # python preprocess/scripts/crop.py $seqname 256 1
3 | import glob
4 | import os
5 | import sys
6 |
7 | import numpy as np
8 | from tqdm import tqdm
9 |
10 | sys.path.insert(
11 | 0,
12 | "%s/../" % os.path.join(os.path.dirname(__file__)),
13 | )
14 |
15 | from libs.io import flow_process, read_raw
16 |
17 |
18 | def extract_crop(seqname, crop_size, use_full):
19 | if use_full:
20 | save_prefix = "full"
21 | else:
22 | save_prefix = "crop"
23 | save_prefix = "%s-%d" % (save_prefix, crop_size)
24 |
25 | delta_list = [1, 2, 4, 8]
26 |
27 | flowfw_list = {delta: [] for delta in delta_list}
28 | flowbw_list = {delta: [] for delta in delta_list}
29 | rgb_list = []
30 | mask_list = []
31 | depth_list = []
32 | crop2raw_list = []
33 | is_detected_list = []
34 |
35 | imglist = sorted(
36 | glob.glob("database/processed/JPEGImages/Full-Resolution/%s/*.jpg" % seqname)
37 | )
38 | for im0idx in tqdm(range(len(imglist))):
39 | for delta in delta_list:
40 | if im0idx % delta != 0:
41 | continue
42 | if im0idx + delta >= len(imglist):
43 | continue
44 | # print("%s %d %d" % (seqname, frameid0, frameid1))
45 | data_dict0 = read_raw(imglist[im0idx], delta, crop_size, use_full)
46 | data_dict1 = read_raw(imglist[im0idx + delta], -delta, crop_size, use_full)
47 | flow_process(data_dict0, data_dict1)
48 |
49 | # save img, mask, vis2d
50 | if delta == 1:
51 | rgb_list.append(data_dict0["img"])
52 | mask_list.append(data_dict0["mask"])
53 | depth_list.append(data_dict0["depth"])
54 | crop2raw_list.append(data_dict0["crop2raw"])
55 | is_detected_list.append(data_dict0["is_detected"])
56 |
57 | if im0idx == len(imglist) - 2:
58 | rgb_list.append(data_dict1["img"])
59 | mask_list.append(data_dict1["mask"])
60 | depth_list.append(data_dict1["depth"])
61 | crop2raw_list.append(data_dict1["crop2raw"])
62 | is_detected_list.append(data_dict1["is_detected"])
63 |
64 | flowfw_list[delta].append(data_dict0["flow"])
65 | flowbw_list[delta].append(data_dict1["flow"])
66 |
67 | # save cropped data
68 | for delta in delta_list:
69 | if len(flowfw_list[delta]) == 0:
70 | continue
71 | np.save(
72 | "database/processed/FlowFW_%d/Full-Resolution/%s/%s.npy"
73 | % (delta, seqname, save_prefix),
74 | np.stack(flowfw_list[delta], 0),
75 | )
76 | np.save(
77 | "database/processed/FlowBW_%d/Full-Resolution/%s/%s.npy"
78 | % (delta, seqname, save_prefix),
79 | np.stack(flowbw_list[delta], 0),
80 | )
81 |
82 | np.save(
83 | "database/processed/JPEGImages/Full-Resolution/%s/%s.npy"
84 | % (seqname, save_prefix),
85 | np.stack(rgb_list, 0),
86 | )
87 | np.save(
88 | "database/processed/Annotations/Full-Resolution/%s/%s.npy"
89 | % (seqname, save_prefix),
90 | np.stack(mask_list, 0),
91 | )
92 |
93 | np.save(
94 | "database/processed/Depth/Full-Resolution/%s/%s.npy" % (seqname, save_prefix),
95 | np.stack(depth_list, 0),
96 | )
97 |
98 | np.save(
99 | "database/processed/Annotations/Full-Resolution/%s/%s-crop2raw.npy"
100 | % (seqname, save_prefix),
101 | np.stack(crop2raw_list, 0),
102 | )
103 |
104 | np.save(
105 | "database/processed/Annotations/Full-Resolution/%s/%s-is_detected.npy"
106 | % (seqname, save_prefix),
107 | np.stack(is_detected_list, 0),
108 | )
109 |
110 | print("crop (size: %d, full: %d) done: %s" % (crop_size, use_full, seqname))
111 |
112 |
113 | if __name__ == "__main__":
114 | seqname = sys.argv[1]
115 | crop_size = int(sys.argv[2])
116 | use_full = bool(int(sys.argv[3]))
117 |
118 | extract_crop(seqname, crop_size, use_full)
119 |
--------------------------------------------------------------------------------
/preprocess/scripts/depth.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # python preprocess/scripts/depth.py 2023-03-30-21-20-57-cat-pikachu-5-0000
3 | import glob
4 | import os
5 | import sys
6 |
7 | import numpy as np
8 | import torch
9 | import trimesh
10 | from PIL import Image
11 |
12 | sys.path.insert(
13 | 0,
14 | "%s/../" % os.path.join(os.path.dirname(__file__)),
15 | )
16 |
17 |
18 | from libs.utils import resize_to_target
19 |
20 |
21 | def depth2pts(depth):
22 | Kmat = np.eye(3)
23 | Kmat[0, 0] = depth.shape[0]
24 | Kmat[1, 1] = depth.shape[0]
25 | Kmat[0, 2] = depth.shape[1] / 2
26 | Kmat[1, 2] = depth.shape[0] / 2
27 |
28 | xy = np.meshgrid(np.arange(depth.shape[1]), np.arange(depth.shape[0]))
29 | hxy = np.stack(
30 | [xy[0].flatten(), xy[1].flatten(), np.ones_like(xy[0].flatten())], axis=0
31 | )
32 | hxy = np.linalg.inv(Kmat) @ hxy
33 | xyz = hxy * depth.flatten()
34 | return xyz.T
35 |
36 |
37 | def extract_depth(seqname):
38 | image_dir = "database/processed/JPEGImages/Full-Resolution/%s/" % seqname
39 | output_dir = image_dir.replace("JPEGImages", "Depth")
40 |
41 | # torch.hub.help(
42 | # "intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True
43 | # ) # Triggers fresh download of MiDaS repo
44 |
45 | model_zoe_nk = torch.hub.load("isl-org/ZoeDepth", "ZoeD_NK", pretrained=True)
46 | zoe = model_zoe_nk.to("cuda")
47 |
48 | os.makedirs(output_dir, exist_ok=True)
49 | for img_path in sorted(glob.glob(f"{image_dir}/*.jpg")):
50 | # print(img_path)
51 | image = Image.open(img_path)
52 | depth = zoe.infer_pil(image)
53 | depth = resize_to_target(depth, is_flow=False).astype(np.float16)
54 | out_path = f"{output_dir}/{os.path.basename(img_path).replace('.jpg', '.npy')}"
55 | np.save(out_path, depth)
56 | # pts = depth2pts(depth)
57 |
58 | print("zoe depth done: ", seqname)
59 |
60 |
61 | if __name__ == "__main__":
62 | seqname = sys.argv[1]
63 |
64 | extract_depth(seqname)
65 |
--------------------------------------------------------------------------------
/preprocess/scripts/download.py:
--------------------------------------------------------------------------------
1 | # Usage:
2 | # python preprocess/scripts/download.py
3 | import os, sys
4 | import shutil
5 | import subprocess
6 | import zipfile
7 |
8 |
9 | def download_seq(seqname):
10 | datadir = os.path.join("database", "raw", seqname)
11 | if os.path.exists(datadir):
12 | print(f"Deleting existing directory: {datadir}")
13 | shutil.rmtree(datadir)
14 |
15 | url_path = os.path.join("database", "vid_data", f"{seqname}.txt")
16 | if not os.path.exists(url_path):
17 | # specify the folder of videos
18 | print(f"URL file does not exist: {url_path}")
19 | # ask for user input
20 | vid_path = "video_folder"
21 | while not os.path.isdir(vid_path):
22 | vid_path = input("Enter the path to video folder:")
23 | # copy folder to datadir
24 | print(f"Copying from directory: {vid_path} to {datadir}")
25 | shutil.copytree(vid_path, datadir)
26 | else:
27 | with open(url_path, "r") as f:
28 | url = f.read().strip()
29 |
30 | # Download the video
31 | print(f"Downloading from URL: {url}")
32 | tmp_zip = "tmp-%s.zip" % seqname
33 | subprocess.run(
34 | ["wget", url, "-O", tmp_zip],
35 | stdout=subprocess.DEVNULL,
36 | stderr=subprocess.DEVNULL,
37 | )
38 |
39 | # Unzip the file
40 | os.makedirs(datadir)
41 | print(f"Unzipping to directory: {datadir}")
42 | with zipfile.ZipFile(tmp_zip, "r") as zip_ref:
43 | zip_ref.extractall(datadir)
44 |
45 | # Remove the zip file
46 | os.remove(tmp_zip)
47 |
48 |
49 | def main():
50 | # Get sequence name from command line arguments
51 | if len(sys.argv) > 1:
52 | seqname = sys.argv[1]
53 | download_seq(seqname)
54 | else:
55 | print("Usage: python preprocess/scripts/download.py ")
56 |
57 |
58 | if __name__ == "__main__":
59 | main()
60 |
--------------------------------------------------------------------------------
/preprocess/scripts/extract_frames.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # python preprocess/scripts/extract_frames.py database/raw/cat-1015/10415567.mp4 tmp/
3 | import sys
4 |
5 | import imageio
6 | import numpy as np
7 |
8 |
9 | def extract_frames(in_path, out_path):
10 | print("extracting frames: ", in_path)
11 | # Open the video file
12 | reader = imageio.get_reader(in_path)
13 |
14 | # Find the first non-black frame
15 | for i, im in enumerate(reader):
16 | if np.any(im > 0):
17 | start_frame = i
18 | break
19 |
20 | # Write the video starting from the first non-black frame
21 | count = 0
22 | for i, im in enumerate(reader):
23 | if i >= start_frame:
24 | imageio.imsave("%s/%05d.jpg" % (out_path, count), im)
25 | count += 1
26 |
27 |
28 | if __name__ == "__main__":
29 | in_path = sys.argv[1]
30 | out_path = sys.argv[2]
31 | extract_frames(in_path, out_path)
32 |
--------------------------------------------------------------------------------
/preprocess/scripts/tsdf_fusion.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # python preprocess/scripts/tsdf_fusion.py 2023-04-03-18-02-32-cat-pikachu-5-0000 0
3 | import glob
4 | import os
5 | import sys
6 |
7 | import cv2
8 | import numpy as np
9 | import trimesh
10 |
11 | sys.path.insert(
12 | 0,
13 | "%s/../third_party" % os.path.join(os.path.dirname(__file__)),
14 | )
15 |
16 |
17 | sys.path.insert(
18 | 0,
19 | "%s/../" % os.path.join(os.path.dirname(__file__)),
20 | )
21 |
22 | sys.path.insert(
23 | 0,
24 | "%s/../../" % os.path.join(os.path.dirname(__file__)),
25 | )
26 |
27 | import fusion
28 | from libs.io import read_frame_data
29 |
30 | from lab4d.utils.geom_utils import K2inv, K2mat
31 | from lab4d.utils.vis_utils import draw_cams
32 |
33 | # def read_cam(imgpath, component_id):
34 | # campath = imgpath.replace("JPEGImages", "Cameras").replace(
35 | # ".jpg", "-%02d.txt" % component_id
36 | # )
37 | # scene2cam = np.loadtxt(campath)
38 | # cam2scene = np.linalg.inv(scene2cam)
39 | # return cam2scene
40 |
41 |
42 | def tsdf_fusion(seqname, component_id, crop_size=256, use_full=True):
43 | # load rgb/depth
44 | imgdir = "database/processed/JPEGImages/Full-Resolution/%s" % seqname
45 | imglist = sorted(glob.glob("%s/*.jpg" % imgdir))
46 |
47 | # camera path
48 | save_path = imgdir.replace("JPEGImages", "Cameras")
49 | save_path = "%s/%02d.npy" % (save_path, component_id)
50 | cams_prev = np.load(save_path)
51 |
52 | # get camera intrinsics
53 | raw_shape = cv2.imread(imglist[0]).shape[:2]
54 | max_l = max(raw_shape)
55 | Kraw = np.array([max_l, max_l, raw_shape[1] / 2, raw_shape[0] / 2])
56 | Kraw = K2mat(Kraw)
57 |
58 | # initialize volume
59 | vol_bnds = np.zeros((3, 2))
60 | for it, imgpath in enumerate(imglist[:-1]):
61 | rgb, depth, mask, crop2raw = read_frame_data(
62 | imgpath, crop_size, use_full, component_id
63 | )
64 | K0 = K2inv(crop2raw) @ Kraw
65 | # cam2scene = read_cam(imgpath, component_id)
66 | cam2scene = np.linalg.inv(cams_prev[it])
67 | depth[~mask] = 0
68 | depth[depth > 10] = 0
69 | view_frust_pts = fusion.get_view_frustum(depth, K0, cam2scene)
70 | vol_bnds[:, 0] = np.minimum(vol_bnds[:, 0], np.amin(view_frust_pts, axis=1))
71 | vol_bnds[:, 1] = np.maximum(vol_bnds[:, 1], np.amax(view_frust_pts, axis=1))
72 | tsdf_vol = fusion.TSDFVolume(vol_bnds, voxel_size=0.2, use_gpu=False)
73 |
74 | # fusion
75 | for it, imgpath in enumerate(imglist[:-1]):
76 | # print(imgpath)
77 | rgb, depth, mask, crop2raw = read_frame_data(
78 | imgpath, crop_size, use_full, component_id
79 | )
80 | K0 = K2inv(crop2raw) @ Kraw
81 | depth[~mask] = 0
82 | # cam2scene = read_cam(imgpath, component_id)
83 | cam2scene = np.linalg.inv(cams_prev[it])
84 | tsdf_vol.integrate(rgb, depth, K0, cam2scene, obs_weight=1.0)
85 |
86 | save_path = imgdir.replace("JPEGImages", "Cameras")
87 | # get mesh, compute center
88 | rt = tsdf_vol.get_mesh()
89 | verts, faces = rt[0], rt[1]
90 | mesh = trimesh.Trimesh(verts, faces)
91 | aabb = mesh.bounds
92 | center = aabb.mean(0)
93 | mesh.vertices = mesh.vertices - center[None]
94 | mesh.export("%s/mesh-%02d-centered.obj" % (save_path, component_id))
95 |
96 | # save cameras
97 | cams = []
98 | for it, imgpath in enumerate(imglist):
99 | # campath = imgpath.replace("JPEGImages", "Cameras").replace(
100 | # ".jpg", "-%02d.txt" % component_id
101 | # )
102 | # cam = np.loadtxt(campath)
103 | # shift the camera in the scene space
104 | cam = np.linalg.inv(cams_prev[it])
105 | cam[:3, 3] -= center
106 | cam = np.linalg.inv(cam)
107 | # np.savetxt(campath, cam)
108 | cams.append(cam)
109 | np.save("%s/%02d.npy" % (save_path, component_id), cams)
110 | mesh_cam = draw_cams(cams)
111 | mesh_cam.export("%s/cameras-%02d-centered.obj" % (save_path, component_id))
112 |
113 | print("tsdf fusion done: %s, %d" % (seqname, component_id))
114 |
115 |
116 | if __name__ == "__main__":
117 | seqname = sys.argv[1]
118 | component_id = int(sys.argv[2])
119 |
120 | tsdf_fusion(seqname, component_id)
121 |
--------------------------------------------------------------------------------
/preprocess/scripts/write_config.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # python preprocess/scripts/write_config.py ${vidname}
3 | import configparser
4 | import glob
5 | import os
6 | import sys
7 |
8 | import cv2
9 |
10 |
11 | def write_config(collection_name):
12 | min_nframe = 8
13 | imgroot = "database/processed/JPEGImages/Full-Resolution/"
14 |
15 | config = configparser.ConfigParser()
16 | config["data"] = {
17 | "init_frame": "0",
18 | "end_frame": "-1",
19 | }
20 |
21 | seqname_all = sorted(
22 | glob.glob("%s/%s-[0-9][0-9][0-9][0-9]*" % (imgroot, collection_name))
23 | )
24 | total_vid = 0
25 | for i, seqname in enumerate(seqname_all):
26 | seqname = seqname.split("/")[-1]
27 | img = cv2.imread("%s/%s/00000.jpg" % (imgroot, seqname), 0)
28 | num_fr = len(glob.glob("%s/%s/*.jpg" % (imgroot, seqname)))
29 | if num_fr < min_nframe:
30 | continue
31 |
32 | fl = max(img.shape)
33 | px = img.shape[1] // 2
34 | py = img.shape[0] // 2
35 | camtxt = [fl, fl, px, py]
36 | config["data_%d" % total_vid] = {
37 | "ks": " ".join([str(i) for i in camtxt]),
38 | "shape": " ".join([str(img.shape[0]), str(img.shape[1])]),
39 | "img_path": "database/processed/JPEGImages/Full-Resolution/%s/" % seqname,
40 | }
41 | total_vid += 1
42 |
43 | os.makedirs("database/configs", exist_ok=True)
44 | with open("database/configs/%s.config" % collection_name, "w") as configfile:
45 | config.write(configfile)
46 |
47 |
48 | if __name__ == "__main__":
49 | collection_name = sys.argv[1]
50 |
51 | write_config(collection_name)
52 |
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/compute_flow.sh:
--------------------------------------------------------------------------------
1 | # bash compute_flow.sh $seqname
2 | seqname=$1
3 |
4 | if [[ $seqname ]];
5 | then
6 | array=(1 2 4 8)
7 | for i in "${array[@]}"
8 | do
9 | python compute_flow.py --datapath ../../../database/processed/JPEGImages/Full-Resolution/$seqname/ --loadmodel ./vcn_rob.pth --dframe $i
10 | done
11 | fi
12 |
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/flowutils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/third_party/vcnplus/flowutils/__init__.py
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/frame_filter.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | import sys
3 | import os
4 |
5 | # insert path of current file
6 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
7 |
8 | import cv2
9 | import pdb
10 | import argparse
11 | import numpy as np
12 | import torch
13 | import torch.nn as nn
14 | import torch.nn.parallel
15 | import torch.backends.cudnn as cudnn
16 | import torch.utils.data
17 | import glob
18 | import shutil
19 |
20 | from models.VCNplus import VCN
21 | from models.inference import (
22 | load_eval_checkpoint,
23 | modify_flow_module,
24 | process_flow_input,
25 | make_disc_aux,
26 | )
27 |
28 | cudnn.benchmark = True
29 |
30 |
31 | def frame_filter(seqname, outdir):
32 | print("Filtering frames for %s" % (seqname))
33 | model_path = "./preprocess/third_party/vcnplus/vcn_rob.pth"
34 | maxdisp = 256 # maxium disparity. Only affect the coarsest cost volume size
35 | fac = (
36 | 1 # controls the shape of search grid. Only affect the coarse cost volume size
37 | )
38 | flow_threshold = 0.05 # flow threshold that controls frame skipping
39 | max_frames = 500 # maximum number of frames to keep (to avoid oom in tracking etc.)
40 |
41 | # construct model
42 | model = load_eval_checkpoint(model_path, maxdisp=maxdisp, fac=fac)
43 |
44 | # input and output images
45 | img_paths = sorted(
46 | glob.glob("%s/JPEGImagesRaw/Full-Resolution/%s/*.jpg" % (outdir, seqname))
47 | )
48 | output_path = "%s/JPEGImages/Full-Resolution/%s/" % (outdir, seqname)
49 | output_idxs = []
50 |
51 | # load image 0 and compute resize ratio
52 | img0_o = cv2.imread(img_paths[0])[:, :, ::-1]
53 | output_idxs.append(0)
54 |
55 | input_size = img0_o.shape
56 | inp_h, inp_w, _ = input_size
57 | max_res = 300 * 300
58 | res_fac = np.sqrt(max_res / (inp_h * inp_w))
59 | max_h = int(np.ceil(inp_h * res_fac / 64) * 64)
60 | max_w = int(np.ceil(inp_w * res_fac / 64) * 64)
61 |
62 | # modify flow module according to input size
63 | modify_flow_module(model, max_h, max_w)
64 | model.eval()
65 |
66 | # find adjacent frames with sufficiently large flow
67 | img0, img0_noaug = process_flow_input(img0_o, model.mean_L, max_h, max_w)
68 | for jnx in range(1, len(img_paths)):
69 | img1_o = cv2.imread(img_paths[jnx])[:, :, ::-1]
70 | img1, img1_noaug = process_flow_input(img1_o, model.mean_R, max_h, max_w)
71 |
72 | # forward inference
73 | disc_aux = make_disc_aux(img0_noaug, max_h, max_w, input_size)
74 | with torch.no_grad():
75 | img01 = torch.cat([img0, img1], dim=0)
76 | flowfw, _, _, _ = model(img01, disc_aux) # 1, 2, max_h, max_w
77 |
78 | flowfw[:, 0:1] /= max_w
79 | flowfw[:, 1:2] /= max_h
80 |
81 | maxflow = torch.max(torch.norm(flowfw[0], p=2, dim=0)).item()
82 | # print(jnx, "%.06f" % (maxflow))
83 |
84 | if maxflow > flow_threshold:
85 | output_idxs.append(jnx)
86 | img0_o = img1_o
87 | img0, img0_noaug = process_flow_input(img0_o, model.mean_L, max_h, max_w)
88 |
89 | if len(output_idxs) >= max_frames:
90 | break
91 |
92 | # copy selected frames to output
93 | if len(output_idxs) > 8:
94 | os.system("mkdir -p %s" % (output_path))
95 | for output_file in [f"{jnx:05d}.jpg" for jnx in output_idxs]:
96 | shutil.copy2(
97 | f"{outdir}/JPEGImagesRaw/Full-Resolution/{seqname}/{output_file}",
98 | output_path,
99 | )
100 |
101 | print("frame filtering done: %s" % seqname)
102 | else:
103 | print("lack of motion, ignored: %s" % seqname)
104 |
105 |
106 | if __name__ == "__main__":
107 | if len(sys.argv) != 3:
108 | print(f"Usage: python {sys.argv[0]} ")
109 | print(f"Example: python {sys.argv[0]} cat-pikachu-0-0000 'database/processed/'")
110 | exit()
111 | seqname = sys.argv[1]
112 | outdir = sys.argv[2]
113 | frame_filter(seqname, outdir)
114 |
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/third_party/vcnplus/models/__init__.py
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/det.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torchvision.models as models
6 | import torch
7 | import torch.nn as nn
8 | import os
9 |
10 | from .networks.msra_resnet import get_pose_net
11 | from .networks.dlav0 import get_pose_net as get_dlav0
12 | from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
13 | from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
14 | from .networks.large_hourglass import get_large_hourglass_net
15 |
16 | _model_factory = {
17 | 'res': get_pose_net, # default Resnet with deconv
18 | 'dlav0': get_dlav0, # default DLAup
19 | 'dla': get_dla_dcn,
20 | 'resdcn': get_pose_net_dcn,
21 | 'hourglass': get_large_hourglass_net,
22 | }
23 |
24 | def create_model(arch, heads, head_conv,num_input):
25 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
26 | arch = arch[:arch.find('_')] if '_' in arch else arch
27 | get_model = _model_factory[arch]
28 | model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv,num_input=num_input)
29 | return model
30 |
31 | def load_model(model, model_path, optimizer=None, resume=False,
32 | lr=None, lr_step=None):
33 | start_epoch = 0
34 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
35 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
36 | state_dict_ = checkpoint['state_dict']
37 | state_dict = {}
38 |
39 | # convert data_parallal to model
40 | for k in state_dict_:
41 | if k.startswith('module') and not k.startswith('module_list'):
42 | state_dict[k[7:]] = state_dict_[k]
43 | else:
44 | state_dict[k] = state_dict_[k]
45 | model_state_dict = model.state_dict()
46 |
47 | # check loaded parameters and created model parameters
48 | msg = 'If you see this, your model does not fully load the ' + \
49 | 'pre-trained weight. Please make sure ' + \
50 | 'you have correctly specified --arch xxx ' + \
51 | 'or set the correct --num_classes for your own dataset.'
52 | for k in state_dict:
53 | if k in model_state_dict:
54 | if state_dict[k].shape != model_state_dict[k].shape:
55 | print('Skip loading parameter {}, required shape{}, '\
56 | 'loaded shape{}. {}'.format(
57 | k, model_state_dict[k].shape, state_dict[k].shape, msg))
58 | state_dict[k] = model_state_dict[k]
59 | else:
60 | print('Drop parameter {}.'.format(k) + msg)
61 | for k in model_state_dict:
62 | if not (k in state_dict):
63 | print('No param {}.'.format(k) + msg)
64 | state_dict[k] = model_state_dict[k]
65 | model.load_state_dict(state_dict, strict=False)
66 |
67 | # resume optimizer parameters
68 | if optimizer is not None and resume:
69 | if 'optimizer' in checkpoint:
70 | optimizer.load_state_dict(checkpoint['optimizer'])
71 | start_epoch = checkpoint['epoch']
72 | start_lr = lr
73 | for step in lr_step:
74 | if start_epoch >= step:
75 | start_lr *= 0.1
76 | for param_group in optimizer.param_groups:
77 | param_group['lr'] = start_lr
78 | print('Resumed optimizer with start lr', start_lr)
79 | else:
80 | print('No optimizer parameters in checkpoint.')
81 | if optimizer is not None:
82 | return model, optimizer, start_epoch
83 | else:
84 | return model
85 |
86 | def save_model(path, epoch, model, optimizer=None):
87 | if isinstance(model, torch.nn.DataParallel):
88 | state_dict = model.module.state_dict()
89 | else:
90 | state_dict = model.state_dict()
91 | data = {'epoch': epoch,
92 | 'state_dict': state_dict}
93 | if not (optimizer is None):
94 | data['optimizer'] = optimizer.state_dict()
95 | torch.save(data, path)
96 |
97 |
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/det_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 |
8 | def _sigmoid(x):
9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
10 | return y
11 |
12 | def _gather_feat(feat, ind, mask=None):
13 | dim = feat.size(2)
14 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
15 | feat = feat.gather(1, ind)
16 | if mask is not None:
17 | mask = mask.unsqueeze(2).expand_as(feat)
18 | feat = feat[mask]
19 | feat = feat.view(-1, dim)
20 | return feat
21 |
22 | def _transpose_and_gather_feat(feat, ind):
23 | feat = feat.permute(0, 2, 3, 1).contiguous()
24 | feat = feat.view(feat.size(0), -1, feat.size(3))
25 | feat = _gather_feat(feat, ind)
26 | return feat
27 |
28 | def flip_tensor(x):
29 | return torch.flip(x, [3])
30 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
31 | # return torch.from_numpy(tmp).to(x.device)
32 |
33 | def flip_lr(x, flip_idx):
34 | tmp = x.detach().cpu().numpy()[..., ::-1].copy()
35 | shape = tmp.shape
36 | for e in flip_idx:
37 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \
38 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
39 | return torch.from_numpy(tmp.reshape(shape)).to(x.device)
40 |
41 | def flip_lr_off(x, flip_idx):
42 | tmp = x.detach().cpu().numpy()[..., ::-1].copy()
43 | shape = tmp.shape
44 | tmp = tmp.reshape(tmp.shape[0], 17, 2,
45 | tmp.shape[2], tmp.shape[3])
46 | tmp[:, :, 0, :, :] *= -1
47 | for e in flip_idx:
48 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \
49 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
50 | return torch.from_numpy(tmp.reshape(shape)).to(x.device)
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/.gitignore:
--------------------------------------------------------------------------------
1 | DCNv2/build
2 |
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .idea
3 | *.so
4 | *.o
5 | *pyc
6 | _ext
7 | build
8 | DCNv2.egg-info
9 | dist
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/__init__.py:
--------------------------------------------------------------------------------
1 | from .dcn_v2 import *
2 |
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.h:
--------------------------------------------------------------------------------
1 |
2 | /*!
3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
4 | *
5 | * COPYRIGHT
6 | *
7 | * All contributions by the University of California:
8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
9 | * All rights reserved.
10 | *
11 | * All other contributions:
12 | * Copyright (c) 2014-2017, the respective contributors
13 | * All rights reserved.
14 | *
15 | * Caffe uses a shared copyright model: each contributor holds copyright over
16 | * their contributions to Caffe. The project versioning records all such
17 | * contribution and copyright details. If a contributor wants to further mark
18 | * their specific copyright on a particular contribution, they should indicate
19 | * their copyright solely in the commit message of the change when it is
20 | * committed.
21 | *
22 | * LICENSE
23 | *
24 | * Redistribution and use in source and binary forms, with or without
25 | * modification, are permitted provided that the following conditions are met:
26 | *
27 | * 1. Redistributions of source code must retain the above copyright notice, this
28 | * list of conditions and the following disclaimer.
29 | * 2. Redistributions in binary form must reproduce the above copyright notice,
30 | * this list of conditions and the following disclaimer in the documentation
31 | * and/or other materials provided with the distribution.
32 | *
33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 | *
44 | * CONTRIBUTION AGREEMENT
45 | *
46 | * By contributing to the BVLC/caffe repository through pull-request, comment,
47 | * or otherwise, the contributor releases their content to the
48 | * license and copyright terms herein.
49 | *
50 | ***************** END Caffe Copyright Notice and Disclaimer ********************
51 | *
52 | * Copyright (c) 2018 Microsoft
53 | * Licensed under The MIT License [see LICENSE for details]
54 | * \file modulated_deformable_im2col.h
55 | * \brief Function definitions of converting an image to
56 | * column matrix based on kernel, padding, dilation, and offset.
57 | * These functions are mainly used in deformable convolution operators.
58 | * \ref: https://arxiv.org/abs/1811.11168
59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
60 | */
61 |
62 | /***************** Adapted by Charles Shang *********************/
63 | // modified from the CUDA version for CPU use by Daniel K. Suhendro
64 |
65 | #ifndef DCN_V2_IM2COL_CPU
66 | #define DCN_V2_IM2COL_CPU
67 |
68 | #ifdef __cplusplus
69 | extern "C"
70 | {
71 | #endif
72 |
73 | void modulated_deformable_im2col_cpu(const float *data_im, const float *data_offset, const float *data_mask,
74 | const int batch_size, const int channels, const int height_im, const int width_im,
75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
77 | const int dilation_h, const int dilation_w,
78 | const int deformable_group, float *data_col);
79 |
80 | void modulated_deformable_col2im_cpu(const float *data_col, const float *data_offset, const float *data_mask,
81 | const int batch_size, const int channels, const int height_im, const int width_im,
82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
84 | const int dilation_h, const int dilation_w,
85 | const int deformable_group, float *grad_im);
86 |
87 | void modulated_deformable_col2im_coord_cpu(const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
88 | const int batch_size, const int channels, const int height_im, const int width_im,
89 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
90 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
91 | const int dilation_h, const int dilation_w,
92 | const int deformable_group,
93 | float *grad_offset, float *grad_mask);
94 |
95 | #ifdef __cplusplus
96 | }
97 | #endif
98 |
99 | #endif
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/cpu/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | at::Tensor
5 | dcn_v2_cpu_forward(const at::Tensor &input,
6 | const at::Tensor &weight,
7 | const at::Tensor &bias,
8 | const at::Tensor &offset,
9 | const at::Tensor &mask,
10 | const int kernel_h,
11 | const int kernel_w,
12 | const int stride_h,
13 | const int stride_w,
14 | const int pad_h,
15 | const int pad_w,
16 | const int dilation_h,
17 | const int dilation_w,
18 | const int deformable_group);
19 |
20 | std::vector
21 | dcn_v2_cpu_backward(const at::Tensor &input,
22 | const at::Tensor &weight,
23 | const at::Tensor &bias,
24 | const at::Tensor &offset,
25 | const at::Tensor &mask,
26 | const at::Tensor &grad_output,
27 | int kernel_h, int kernel_w,
28 | int stride_h, int stride_w,
29 | int pad_h, int pad_w,
30 | int dilation_h, int dilation_w,
31 | int deformable_group);
32 |
33 |
34 | std::tuple
35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
36 | const at::Tensor &bbox,
37 | const at::Tensor &trans,
38 | const int no_trans,
39 | const float spatial_scale,
40 | const int output_dim,
41 | const int group_size,
42 | const int pooled_size,
43 | const int part_size,
44 | const int sample_per_part,
45 | const float trans_std);
46 |
47 | std::tuple
48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
49 | const at::Tensor &input,
50 | const at::Tensor &bbox,
51 | const at::Tensor &trans,
52 | const at::Tensor &top_count,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/cuda/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | at::Tensor
5 | dcn_v2_cuda_forward(const at::Tensor &input,
6 | const at::Tensor &weight,
7 | const at::Tensor &bias,
8 | const at::Tensor &offset,
9 | const at::Tensor &mask,
10 | const int kernel_h,
11 | const int kernel_w,
12 | const int stride_h,
13 | const int stride_w,
14 | const int pad_h,
15 | const int pad_w,
16 | const int dilation_h,
17 | const int dilation_w,
18 | const int deformable_group);
19 |
20 | std::vector
21 | dcn_v2_cuda_backward(const at::Tensor &input,
22 | const at::Tensor &weight,
23 | const at::Tensor &bias,
24 | const at::Tensor &offset,
25 | const at::Tensor &mask,
26 | const at::Tensor &grad_output,
27 | int kernel_h, int kernel_w,
28 | int stride_h, int stride_w,
29 | int pad_h, int pad_w,
30 | int dilation_h, int dilation_w,
31 | int deformable_group);
32 |
33 |
34 | std::tuple
35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,
36 | const at::Tensor &bbox,
37 | const at::Tensor &trans,
38 | const int no_trans,
39 | const float spatial_scale,
40 | const int output_dim,
41 | const int group_size,
42 | const int pooled_size,
43 | const int part_size,
44 | const int sample_per_part,
45 | const float trans_std);
46 |
47 | std::tuple
48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,
49 | const at::Tensor &input,
50 | const at::Tensor &bbox,
51 | const at::Tensor &trans,
52 | const at::Tensor &top_count,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/vision.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "dcn_v2.h"
3 |
4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward");
6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward");
7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward");
8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward");
9 | }
10 |
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2019, Charles Shang
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/README.md:
--------------------------------------------------------------------------------
1 | ## Deformable Convolutional Networks V2 with Pytorch 1.X
2 |
3 | ### Build
4 | ```bash
5 | ./make.sh # build
6 | python testcpu.py # run examples and gradient check on cpu
7 | python testcuda.py # run examples and gradient check on gpu
8 | ```
9 | ### Note
10 | Now the master branch is for pytorch 1.x, you can switch back to pytorch 0.4 with,
11 | ```bash
12 | git checkout pytorch_0.4
13 | ```
14 |
15 | ### Known Issues:
16 |
17 | - [x] Gradient check w.r.t offset (solved)
18 | - [ ] Backward is not reentrant (minor)
19 |
20 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
21 |
22 | Update: all gradient check passes with **double** precision.
23 |
24 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for
25 | float `<1e-15` for double),
26 | so it may not be a serious problem (?)
27 |
28 | Please post an issue or PR if you have any comments.
29 |
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python setup.py build develop
3 |
--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 | import glob
5 |
6 | import torch
7 |
8 | from torch.utils.cpp_extension import CUDA_HOME
9 | from torch.utils.cpp_extension import CppExtension
10 | from torch.utils.cpp_extension import CUDAExtension
11 |
12 | from setuptools import find_packages
13 | from setuptools import setup
14 |
15 | requirements = ["torch", "torchvision"]
16 |
17 |
18 | def get_extensions():
19 | this_dir = os.path.dirname(os.path.abspath(__file__))
20 | extensions_dir = os.path.join(this_dir, "DCN", "src")
21 |
22 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
23 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
24 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
25 |
26 | #os.environ["CC"] = "g++"
27 | sources = main_file + source_cpu
28 | extension = CppExtension
29 | extra_compile_args = {'cxx': ['-std=c++14']}
30 | define_macros = []
31 |
32 |
33 | #if torch.cuda.is_available() and CUDA_HOME is not None:
34 | if torch.cuda.is_available():
35 | extension = CUDAExtension
36 | sources += source_cuda
37 | define_macros += [("WITH_CUDA", None)]
38 | extra_compile_args["nvcc"] = [
39 | "-DCUDA_HAS_FP16=1",
40 | "-D__CUDA_NO_HALF_OPERATORS__",
41 | "-D__CUDA_NO_HALF_CONVERSIONS__",
42 | "-D__CUDA_NO_HALF2_OPERATORS__",
43 | ]
44 | else:
45 | #raise NotImplementedError('Cuda is not available')
46 | pass
47 |
48 |
49 | sources = [os.path.join(extensions_dir, s) for s in sources]
50 | include_dirs = [extensions_dir]
51 | ext_modules = [
52 | extension(
53 | "_ext",
54 | sources,
55 | include_dirs=include_dirs,
56 | define_macros=define_macros,
57 | extra_compile_args=extra_compile_args,
58 | )
59 | ]
60 | return ext_modules
61 |
62 | setup(
63 | name="DCNv2",
64 | version="0.1",
65 | author="charlesshang",
66 | url="https://github.com/charlesshang/DCNv2",
67 | description="deformable convolutional networks",
68 | packages=find_packages(exclude=("configs", "tests",)),
69 | # install_requires=requirements,
70 | ext_modules=get_extensions(),
71 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
72 | )
73 |
--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/third_party/viewpoint/__init__.py
--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/configs/cse/Base-DensePose-RCNN-FPN-Human.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-DensePose-RCNN-FPN.yaml"
2 | MODEL:
3 | ROI_DENSEPOSE_HEAD:
4 | CSE:
5 | EMBEDDERS:
6 | "smpl_27554":
7 | TYPE: vertex_feature
8 | NUM_VERTICES: 27554
9 | FEATURE_DIM: 256
10 | FEATURES_TRAINABLE: False
11 | IS_TRAINABLE: True
12 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_smpl_27554_256.pkl"
13 | DATASETS:
14 | TRAIN:
15 | - "densepose_coco_2014_train_cse"
16 | - "densepose_coco_2014_valminusminival_cse"
17 | TEST:
18 | - "densepose_coco_2014_minival_cse"
19 | CLASS_TO_MESH_NAME_MAPPING:
20 | "0": "smpl_27554"
21 |
--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/configs/cse/Base-DensePose-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
1 | VERSION: 2
2 | MODEL:
3 | META_ARCHITECTURE: "GeneralizedRCNN"
4 | BACKBONE:
5 | NAME: "build_resnet_fpn_backbone"
6 | RESNETS:
7 | OUT_FEATURES: ["res2", "res3", "res4", "res5"]
8 | FPN:
9 | IN_FEATURES: ["res2", "res3", "res4", "res5"]
10 | ANCHOR_GENERATOR:
11 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
12 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
13 | RPN:
14 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
15 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
16 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level
17 | # Detectron1 uses 2000 proposals per-batch,
18 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
19 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
20 | POST_NMS_TOPK_TRAIN: 1000
21 | POST_NMS_TOPK_TEST: 1000
22 |
23 | DENSEPOSE_ON: True
24 | ROI_HEADS:
25 | NAME: "DensePoseROIHeads"
26 | IN_FEATURES: ["p2", "p3", "p4", "p5"]
27 | NUM_CLASSES: 1
28 | ROI_BOX_HEAD:
29 | NAME: "FastRCNNConvFCHead"
30 | NUM_FC: 2
31 | POOLER_RESOLUTION: 7
32 | POOLER_SAMPLING_RATIO: 2
33 | POOLER_TYPE: "ROIAlign"
34 | ROI_DENSEPOSE_HEAD:
35 | NAME: "DensePoseV1ConvXHead"
36 | POOLER_TYPE: "ROIAlign"
37 | NUM_COARSE_SEGM_CHANNELS: 2
38 | PREDICTOR_NAME: "DensePoseEmbeddingPredictor"
39 | LOSS_NAME: "DensePoseCseLoss"
40 | CSE:
41 | # embedding loss, possible values:
42 | # - "EmbeddingLoss"
43 | # - "SoftEmbeddingLoss"
44 | EMBED_LOSS_NAME: "EmbeddingLoss"
45 | SOLVER:
46 | IMS_PER_BATCH: 16
47 | BASE_LR: 0.01
48 | STEPS: (60000, 80000)
49 | MAX_ITER: 90000
50 | WARMUP_FACTOR: 0.1
51 | CLIP_GRADIENTS:
52 | CLIP_TYPE: norm
53 | CLIP_VALUE: 1.0
54 | ENABLED: true
55 | NORM_TYPE: 2.0
56 | INPUT:
57 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
58 | DENSEPOSE_EVALUATION:
59 | TYPE: cse
60 | STORAGE: file
61 |
--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-DensePose-RCNN-FPN-Human.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 | RESNETS:
5 | DEPTH: 101
6 | ROI_DENSEPOSE_HEAD:
7 | NAME: "DensePoseDeepLabHead"
8 | CSE:
9 | EMBED_LOSS_NAME: "SoftEmbeddingLoss"
10 | SOLVER:
11 | MAX_ITER: 130000
12 | STEPS: (100000, 120000)
13 |
--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-DensePose-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_s1x/250533982/model_final_2c4512.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | ROI_HEADS:
7 | NUM_CLASSES: 1
8 | ROI_DENSEPOSE_HEAD:
9 | NAME: "DensePoseV1ConvXHead"
10 | COARSE_SEGM_TRAINED_BY_MASKS: True
11 | CSE:
12 | EMBED_LOSS_NAME: "SoftEmbeddingLoss"
13 | EMBEDDING_DIST_GAUSS_SIGMA: 0.1
14 | GEODESIC_DIST_GAUSS_SIGMA: 0.1
15 | EMBEDDERS:
16 | "cat_5001":
17 | TYPE: vertex_feature
18 | NUM_VERTICES: 5001
19 | FEATURE_DIM: 256
20 | FEATURES_TRAINABLE: False
21 | IS_TRAINABLE: True
22 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_5001_256.pkl"
23 | "dog_5002":
24 | TYPE: vertex_feature
25 | NUM_VERTICES: 5002
26 | FEATURE_DIM: 256
27 | FEATURES_TRAINABLE: False
28 | IS_TRAINABLE: True
29 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_5002_256.pkl"
30 | "sheep_5004":
31 | TYPE: vertex_feature
32 | NUM_VERTICES: 5004
33 | FEATURE_DIM: 256
34 | FEATURES_TRAINABLE: False
35 | IS_TRAINABLE: True
36 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl"
37 | "horse_5004":
38 | TYPE: vertex_feature
39 | NUM_VERTICES: 5004
40 | FEATURE_DIM: 256
41 | FEATURES_TRAINABLE: False
42 | IS_TRAINABLE: True
43 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl"
44 | "zebra_5002":
45 | TYPE: vertex_feature
46 | NUM_VERTICES: 5002
47 | FEATURE_DIM: 256
48 | FEATURES_TRAINABLE: False
49 | IS_TRAINABLE: True
50 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl"
51 | "giraffe_5002":
52 | TYPE: vertex_feature
53 | NUM_VERTICES: 5002
54 | FEATURE_DIM: 256
55 | FEATURES_TRAINABLE: False
56 | IS_TRAINABLE: True
57 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl"
58 | "elephant_5002":
59 | TYPE: vertex_feature
60 | NUM_VERTICES: 5002
61 | FEATURE_DIM: 256
62 | FEATURES_TRAINABLE: False
63 | IS_TRAINABLE: True
64 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl"
65 | "cow_5002":
66 | TYPE: vertex_feature
67 | NUM_VERTICES: 5002
68 | FEATURE_DIM: 256
69 | FEATURES_TRAINABLE: False
70 | IS_TRAINABLE: True
71 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl"
72 | "bear_4936":
73 | TYPE: vertex_feature
74 | NUM_VERTICES: 4936
75 | FEATURE_DIM: 256
76 | FEATURES_TRAINABLE: False
77 | IS_TRAINABLE: True
78 | INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl"
79 | DATASETS:
80 | TRAIN:
81 | - "densepose_lvis_v1_ds1_train_v1"
82 | TEST:
83 | - "densepose_lvis_v1_ds1_val_v1"
84 | WHITELISTED_CATEGORIES:
85 | "densepose_lvis_v1_ds1_train_v1":
86 | - 943 # sheep
87 | - 1202 # zebra
88 | - 569 # horse
89 | - 496 # giraffe
90 | - 422 # elephant
91 | - 80 # cow
92 | - 76 # bear
93 | - 225 # cat
94 | - 378 # dog
95 | "densepose_lvis_v1_ds1_val_v1":
96 | - 943 # sheep
97 | - 1202 # zebra
98 | - 569 # horse
99 | - 496 # giraffe
100 | - 422 # elephant
101 | - 80 # cow
102 | - 76 # bear
103 | - 225 # cat
104 | - 378 # dog
105 | CATEGORY_MAPS:
106 | "densepose_lvis_v1_ds1_train_v1":
107 | "1202": 943 # zebra -> sheep
108 | "569": 943 # horse -> sheep
109 | "496": 943 # giraffe -> sheep
110 | "422": 943 # elephant -> sheep
111 | "80": 943 # cow -> sheep
112 | "76": 943 # bear -> sheep
113 | "225": 943 # cat -> sheep
114 | "378": 943 # dog -> sheep
115 | "densepose_lvis_v1_ds1_val_v1":
116 | "1202": 943 # zebra -> sheep
117 | "569": 943 # horse -> sheep
118 | "496": 943 # giraffe -> sheep
119 | "422": 943 # elephant -> sheep
120 | "80": 943 # cow -> sheep
121 | "76": 943 # bear -> sheep
122 | "225": 943 # cat -> sheep
123 | "378": 943 # dog -> sheep
124 | CLASS_TO_MESH_NAME_MAPPING:
125 | # Note: different classes are mapped to a single class
126 | # mesh is chosen based on GT data, so this is just some
127 | # value which has no particular meaning
128 | "0": "sheep_5004"
129 | SOLVER:
130 | MAX_ITER: 4000
131 | STEPS: (3000, 3500)
132 | DENSEPOSE_EVALUATION:
133 | EVALUATE_MESH_ALIGNMENT: True
134 |
--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/cselib.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
3 | import cv2
4 | import numpy as np
5 | import torch
6 | import torch.nn.functional as F
7 | import pdb
8 |
9 | from detectron2.config import get_cfg
10 | from detectron2.modeling import build_model
11 | from detectron2.checkpoint import DetectionCheckpointer
12 | from detectron2.structures import Boxes as create_boxes
13 |
14 | import sys
15 |
16 | sys.path.insert(0, "preprocess/third_party/detectron2/projects/DensePose/")
17 | from densepose import add_densepose_config
18 |
19 |
20 | # load model
21 | def create_cse(is_human):
22 | if is_human:
23 | dp_config_path = "preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml"
24 | dp_weight_path = "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x/250713061/model_final_1d3314.pkl"
25 | else:
26 | dp_config_path = "preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml"
27 | dp_weight_path = "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k/253498611/model_final_6d69b7.pkl"
28 |
29 | cfg = get_cfg()
30 | add_densepose_config(cfg)
31 | cfg.merge_from_file(dp_config_path)
32 | cfg.MODEL.WEIGHTS = dp_weight_path
33 | model = build_model(cfg) # returns a torch.nn.Module
34 | DetectionCheckpointer(model).load(
35 | cfg.MODEL.WEIGHTS
36 | ) # load a file, usually from cfg.MODEL.WEIGHTS
37 | return model
38 |
39 |
40 | def preprocess_image(image, mask):
41 | h, w, _ = image.shape
42 |
43 | # resize
44 | max_size = 1333
45 | if h > w:
46 | h_rszd, w_rszd = max_size, max_size * w // h
47 | else:
48 | h_rszd, w_rszd = max_size * h // w, max_size
49 | image = cv2.resize(image, (w_rszd, h_rszd))
50 | mask = cv2.resize(mask.astype(float), (w_rszd, h_rszd)).astype(np.uint8)
51 |
52 | # pad
53 | h_pad = (1 + h_rszd // 32) * 32
54 | w_pad = (1 + w_rszd // 32) * 32
55 | image_tmp = np.zeros((h_pad, w_pad, 3)).astype(np.uint8)
56 | mask_tmp = np.zeros((h_pad, w_pad)).astype(np.uint8)
57 | image_tmp[:h_rszd, :w_rszd] = image
58 | mask_tmp[:h_rszd, :w_rszd] = mask
59 | image = image_tmp
60 | mask = mask_tmp
61 |
62 | # preprocess image and box
63 | indices = np.where(mask > 0)
64 | xid = indices[1]
65 | yid = indices[0]
66 | center = ((xid.max() + xid.min()) // 2, (yid.max() + yid.min()) // 2)
67 | length = (
68 | int((xid.max() - xid.min()) * 1.0 // 2),
69 | int((yid.max() - yid.min()) * 1.0 // 2),
70 | )
71 | bbox = [center[0] - length[0], center[1] - length[1], length[0] * 2, length[1] * 2]
72 | bbox = [
73 | max(0, bbox[0]),
74 | max(0, bbox[1]),
75 | min(w_pad, bbox[0] + bbox[2]),
76 | min(h_pad, bbox[1] + bbox[3]),
77 | ]
78 | bbox_raw = bbox.copy() # bbox in the raw image coordinate
79 | bbox_raw[0] *= w / w_rszd
80 | bbox_raw[2] *= w / w_rszd
81 | bbox_raw[1] *= h / h_rszd
82 | bbox_raw[3] *= h / h_rszd
83 | return image, mask, bbox, bbox_raw
84 |
85 |
86 | def run_cse(model, image, mask):
87 | image, mask, bbox, bbox_raw = preprocess_image(image, mask)
88 |
89 | image = torch.Tensor(image).cuda().permute(2, 0, 1)[None]
90 | image = torch.stack([(x - model.pixel_mean) / model.pixel_std for x in image])
91 | pred_boxes = torch.Tensor([bbox]).cuda()
92 | pred_boxes = create_boxes(pred_boxes)
93 |
94 | # inference
95 | model.eval()
96 | with torch.no_grad():
97 | features = model.backbone(image)
98 | features = [features[f] for f in model.roi_heads.in_features]
99 | features = [model.roi_heads.decoder(features)]
100 | features_dp = model.roi_heads.densepose_pooler(features, [pred_boxes])
101 | densepose_head_outputs = model.roi_heads.densepose_head(features_dp)
102 | densepose_predictor_outputs = model.roi_heads.densepose_predictor(
103 | densepose_head_outputs
104 | )
105 | coarse_segm_resized = densepose_predictor_outputs.coarse_segm[0]
106 | embedding_resized = densepose_predictor_outputs.embedding[0]
107 |
108 | # use input mask
109 | x, y, xx, yy = bbox
110 | mask_box = mask[y:yy, x:xx]
111 | mask_box = torch.Tensor(mask_box).cuda()[None, None]
112 | mask_box = (
113 | F.interpolate(mask_box, coarse_segm_resized.shape[1:3], mode="bilinear")[0, 0]
114 | > 0
115 | )
116 |
117 | # output embedding
118 | embedding = embedding_resized # size does not matter for a image code
119 | embedding = embedding * mask_box.float()[None]
120 |
121 | # output dp2raw
122 | bbox_raw = np.asarray(bbox_raw)
123 | dp2raw = np.concatenate(
124 | [(bbox_raw[2:] - bbox_raw[:2]) / embedding.shape[1], bbox_raw[:2]]
125 | )
126 | return embedding, dp2raw
127 |
--------------------------------------------------------------------------------
/scripts/create_collage.py:
--------------------------------------------------------------------------------
1 | # python scripts/create_collage.py --testdir logdir/penguin-fg-skel-b120/ --prefix renderings_0002
2 |
3 | from moviepy.editor import clips_array, VideoFileClip, vfx
4 | import sys, os
5 | import numpy as np
6 | import pdb
7 | import glob
8 | import argparse
9 | import itertools
10 |
11 | parser = argparse.ArgumentParser(description="combine results into a collage")
12 | parser.add_argument("--testdir", default="", help="path to test dir")
13 | parser.add_argument(
14 | "--prefix", default="renderings_ref_", type=str, help="what data to combine"
15 | )
16 | args = parser.parse_args()
17 |
18 |
19 | def main():
20 | save_path = "%s/collage.mp4" % args.testdir
21 |
22 | video_list = []
23 | for sub_seq in sorted(glob.glob("%s/%s*" % (args.testdir, args.prefix))):
24 | path_list = []
25 | path_list.append("%s/ref/ref_rgb.mp4" % sub_seq)
26 | path_list.append("%s/ref/rgb.mp4" % sub_seq)
27 | path_list.append("%s/ref/xyz.mp4" % sub_seq)
28 | path_list.append("%s/rot-0-360/rgb.mp4" % sub_seq)
29 | path_list.append("%s/rot-0-360/xyz.mp4" % sub_seq)
30 |
31 | # make sure these exist
32 | if np.sum([os.path.exists(path) for path in path_list]) == len(path_list):
33 | print("found %s" % sub_seq)
34 | video_list.append([VideoFileClip(path) for path in path_list])
35 |
36 | if len(video_list) == 0:
37 | print("no video found")
38 | return
39 |
40 | # align in time
41 | max_duration = max(
42 | [clip.duration for clip in list(itertools.chain.from_iterable(video_list))]
43 | )
44 | for i, clip_list in enumerate(video_list):
45 | for j, clip in enumerate(clip_list):
46 | video_list[i][j] = clip.resize(width=512).fx(
47 | vfx.freeze, t="end", total_duration=max_duration, padding_end=0.5
48 | )
49 |
50 | final_clip = clips_array(video_list)
51 | final_clip.write_videofile(save_path)
52 |
53 |
54 | if __name__ == "__main__":
55 | main()
56 |
--------------------------------------------------------------------------------
/scripts/download_unzip.sh:
--------------------------------------------------------------------------------
1 | # bash scripts/download_unzip.sh "$url"
2 | url=$1
3 | rootdir=$PWD
4 |
5 | filename=tmp-`date +"%Y-%m-%d-%H-%M-%S"`.zip
6 | wget $url -O $filename
7 | unzip $filename
8 | rm $filename
9 |
--------------------------------------------------------------------------------
/scripts/install-deps.sh:
--------------------------------------------------------------------------------
1 | mim install mmcv
2 |
3 | (cd lab4d/third_party/quaternion && CUDA_HOME=$CONDA_PREFIX pip install .)
4 |
5 | mkdir ./preprocess/third_party/Track-Anything/checkpoints; wget "https://www.dropbox.com/scl/fi/o86gx6zn27b494m937n2i/E2FGVI-HQ-CVPR22.pth?rlkey=j15ue65ryy8jb1mvn2htf0jtk&st=t4zyl5jk&dl=0" -O ./preprocess/third_party/Track-Anything/checkpoints/E2FGVI-HQ-CVPR22.pth
6 |
7 | wget https://www.dropbox.com/s/bgsodsnnbxdoza3/vcn_rob.pth -O ./preprocess/third_party/vcnplus/vcn_rob.pth
8 |
9 | wget https://www.dropbox.com/s/51cjzo8zgz966t5/human.pth -O preprocess/third_party/viewpoint/human.pth
10 |
11 | wget https://www.dropbox.com/s/1464pg6c9ce8rve/quad.pth -O preprocess/third_party/viewpoint/quad.pth
12 |
--------------------------------------------------------------------------------
/scripts/render_intermediate.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # python scripts/render_intermediate.py --testdir logdir/human-48-category-comp/
3 | import sys, os
4 | import pdb
5 |
6 | os.environ["PYOPENGL_PLATFORM"] = "egl" # opengl seems to only work with TPU
7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
8 |
9 | import glob
10 | import numpy as np
11 | import cv2
12 | import argparse
13 | import trimesh
14 | import pyrender
15 | from pyrender import IntrinsicsCamera, Mesh, Node, Scene, OffscreenRenderer
16 | import matplotlib
17 | import tqdm
18 |
19 | from lab4d.utils.io import save_vid
20 |
21 | cmap = matplotlib.colormaps.get_cmap("cool")
22 |
23 | parser = argparse.ArgumentParser(description="script to render cameras over epochs")
24 | parser.add_argument("--testdir", default="", help="path to test dir")
25 | parser.add_argument(
26 | "--data_class", default="fg", type=str, help="which data to render, {fg, bg}"
27 | )
28 | args = parser.parse_args()
29 |
30 | img_size = 1024
31 |
32 | # renderer
33 | r = OffscreenRenderer(img_size, img_size)
34 | cam = IntrinsicsCamera(img_size, img_size, img_size / 2, img_size / 2)
35 | # light
36 | direc_l = pyrender.DirectionalLight(color=np.ones(3), intensity=3.0)
37 | light_pose = np.asarray(
38 | [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=float
39 | )
40 | # cv to gl coords
41 | cam_pose = -np.eye(4)
42 | cam_pose[0, 0] = 1
43 | cam_pose[-1, -1] = 1
44 | rtmat = np.eye(4)
45 | # object to camera transforms
46 | rtmat[:3, :3] = cv2.Rodrigues(np.asarray([np.pi / 2, 0, 0]))[0] # bev
47 |
48 |
49 | def main():
50 | # io
51 | path_list = [
52 | i for i in glob.glob("%s/*-%s-proxy.obj" % (args.testdir, args.data_class))
53 | ]
54 | if len(path_list) == 0:
55 | print("no mesh found in %s for %s" % (args.testdir, args.data_class))
56 | return
57 | path_list = sorted(path_list, key=lambda x: int(x.split("/")[-1].split("-")[0]))
58 | outdir = "%s/renderings_proxy" % args.testdir
59 | os.makedirs(outdir, exist_ok=True)
60 |
61 | mesh_dict = {}
62 | aabb_min = np.asarray([np.inf, np.inf, np.inf])
63 | aabb_max = np.asarray([-np.inf, -np.inf, -np.inf])
64 | for mesh_path in path_list:
65 | batch_idx = int(mesh_path.split("/")[-1].split("-")[0])
66 | mesh_obj = trimesh.load(mesh_path)
67 | mesh_dict[batch_idx] = mesh_obj
68 |
69 | # update aabb
70 | aabb_min = np.minimum(aabb_min, mesh_obj.bounds[0])
71 | aabb_max = np.maximum(aabb_max, mesh_obj.bounds[1])
72 |
73 | # set camera translation
74 | rtmat[2, 3] = max(aabb_max - aabb_min) * 1.2
75 |
76 | # render
77 | frames = []
78 | for batch_idx, mesh_obj in tqdm.tqdm(mesh_dict.items()):
79 | scene = Scene(ambient_light=0.4 * np.asarray([1.0, 1.0, 1.0, 1.0]))
80 |
81 | # add object / camera
82 | mesh_obj.apply_transform(rtmat)
83 | scene.add_node(Node(mesh=Mesh.from_trimesh(mesh_obj)))
84 |
85 | # camera
86 | scene.add(cam, pose=cam_pose)
87 |
88 | # light
89 | scene.add(direc_l, pose=light_pose)
90 |
91 | # render
92 | color, depth = r.render(
93 | scene,
94 | flags=pyrender.RenderFlags.SHADOWS_DIRECTIONAL
95 | | pyrender.RenderFlags.SKIP_CULL_FACES,
96 | )
97 | # add text
98 | color = color.astype(np.uint8)
99 | color = cv2.putText(
100 | color,
101 | "batch: %02d" % batch_idx,
102 | (30, 50),
103 | cv2.FONT_HERSHEY_SIMPLEX,
104 | 2,
105 | (256, 0, 0),
106 | 2,
107 | )
108 | frames.append(color)
109 |
110 | save_vid("%s/fg" % outdir, frames, suffix=".mp4", upsample_frame=-1)
111 | print("saved to %s/fg.mp4" % outdir)
112 |
113 |
114 | if __name__ == "__main__":
115 | main()
116 |
--------------------------------------------------------------------------------
/scripts/run_crop_all.py:
--------------------------------------------------------------------------------
1 | # WIP by Gengshan Yang
2 | # TODO: use config file to go over seqs
3 | # python scripts/run_crop_all.py cat-pikachu
4 | import os
5 | import sys
6 | import glob
7 | import multiprocessing
8 | from functools import partial
9 |
10 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
11 | from preprocess.scripts.crop import extract_crop
12 |
13 | os.environ["OMP_NUM_THREADS"] = "1"
14 |
15 | vidname = sys.argv[1]
16 | path = (
17 | "database/processed/JPEGImages/Full-Resolution/%s*" % vidname
18 | ) # path to the images
19 |
20 |
21 | def process_seqname(seqname, size, region):
22 | extract_crop(seqname, size, region)
23 |
24 |
25 | if __name__ == "__main__":
26 | pool = multiprocessing.Pool(processes=32) # use up to 32 processes
27 |
28 | for seqname in sorted(glob.glob(path)):
29 | seqname = seqname.split("/")[-1]
30 | # we'll use a partial function to bind the common arguments
31 | func = partial(process_seqname, seqname, 256)
32 | pool.apply_async(func, args=(0,))
33 | pool.apply_async(func, args=(1,))
34 |
35 | pool.close()
36 | pool.join() # wait for all processes to finish
37 |
--------------------------------------------------------------------------------
/scripts/run_rendering_parallel.py:
--------------------------------------------------------------------------------
1 | # WIP by Gengshan Yang
2 | # generate three visualizations (reference view, bev, turntable) rendering, mesh export in parallel
3 | # python scripts/run_rendering_parallel.py logdir/dog-98-category-comp/opts.log 0-2 0,1,2
4 | import sys
5 | import subprocess
6 |
7 | # Set the flagfile.
8 | flagfile = sys.argv[1]
9 |
10 | # Set the range of inst_ids.
11 | start_inst_id, end_inst_id = map(int, sys.argv[2].split("-"))
12 | id_list = list(range(start_inst_id, end_inst_id + 1))
13 |
14 | # Set the devices id
15 | dev_list = sys.argv[3].split(",")
16 | dev_list = list(map(int, dev_list))
17 | num_devices = len(dev_list)
18 | id_per_device = len(id_list) // num_devices
19 |
20 | print(
21 | "rendering videos",
22 | id_list,
23 | "on devices",
24 | dev_list,
25 | )
26 |
27 | # render proxy over rounds
28 | logdir = flagfile.rsplit("/", 1)[0]
29 | subprocess.Popen(
30 | f"python scripts/render_intermediate.py --testdir {logdir}/", shell=True
31 | )
32 |
33 | # Loop over each device.
34 | for dev_id, device in enumerate(dev_list):
35 | # Initialize an empty command list for this device.
36 | command_for_device = []
37 |
38 | # Loop over the inst_ids assigned to this device.
39 | if dev_id == num_devices - 1:
40 | assigned_ids = id_list[dev_id * id_per_device :]
41 | else:
42 | assigned_ids = id_list[dev_id * id_per_device : (dev_id + 1) * id_per_device]
43 | for inst_id in assigned_ids:
44 | # Add the command for this inst_id to the device's command list.
45 | command_for_device.append(
46 | f"CUDA_VISIBLE_DEVICES={device} python lab4d/render.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id} --render_res 256 --data_prefix full"
47 | )
48 | command_for_device.append(
49 | f"CUDA_VISIBLE_DEVICES={device} python lab4d/render.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id} --render_res 256 --viewpoint rot-0-360"
50 | )
51 | # command_for_device.append(
52 | # f"CUDA_VISIBLE_DEVICES={device} python lab4d/render.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id} --render_res 256 --viewpoint bev-90"
53 | # )
54 | # command_for_device.append(
55 | # f"CUDA_VISIBLE_DEVICES={device} python lab4d/export.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id}"
56 | # )
57 |
58 | # Add a delay between commands to avoid overloading the device.
59 | command_for_device.append("sleep 1")
60 |
61 | # Join all commands for this device into a single string.
62 | command_str = "; ".join(command_for_device)
63 |
64 | # Start a screen session for this device, executing the device's command string.
65 | subprocess.Popen(
66 | f'screen -S render-{device}-{",".join(str(i) for i in assigned_ids)} -d -m bash -c "{command_str}"',
67 | shell=True,
68 | )
69 |
--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
1 | # bash scripts/train.sh lab4d/train.py 0 --seqname 2023-03-26-00-39-17-cat-pikachu
2 | main_func=$1
3 | dev=$2
4 | add_args=${*: 3:$#-1}
5 |
6 | ngpu=`echo $dev | awk -F '[\t,]' '{print NF-1}'`
7 | ngpu=$(($ngpu + 1 ))
8 | echo "using "$ngpu "gpus"
9 |
10 | # assign random port
11 | # https://github.com/pytorch/pytorch/issues/73320
12 | CUDA_VISIBLE_DEVICES=$dev torchrun \
13 | --nproc_per_node $ngpu --nnodes 1 --rdzv_backend c10d --rdzv_endpoint localhost:0 \
14 | $main_func \
15 | --ngpu $ngpu \
16 | $add_args
17 |
--------------------------------------------------------------------------------
/scripts/zip_dataset.py:
--------------------------------------------------------------------------------
1 | # Description: Zip the dataset for easy sharing
2 | # Usage: python scripts/zip_dataset.py
3 | import configparser
4 | import os
5 | import sys
6 |
7 | cwd = os.getcwd()
8 | if cwd not in sys.path:
9 | sys.path.insert(0, cwd)
10 |
11 | from preprocess.libs.io import run_bash_command
12 |
13 | vidname = sys.argv[1]
14 |
15 | args = []
16 | config = configparser.RawConfigParser()
17 | config.read("database/configs/%s.config" % vidname)
18 | for vidid in range(len(config.sections()) - 1):
19 | seqname = config.get("data_%d" % vidid, "img_path").strip("/").split("/")[-1]
20 | run_bash_command(
21 | f"zip {vidname}.zip -r database/processed/*/Full-Resolution/{seqname}"
22 | )
23 |
24 | run_bash_command(f"zip {vidname}.zip database/configs/{vidname}.config")
25 |
--------------------------------------------------------------------------------
/scripts/zip_logdir.py:
--------------------------------------------------------------------------------
1 | # Description: Zip the logdir for easy sharing
2 | # Usage: python scripts/zip_logdir
3 | import os
4 | import pdb
5 | import sys
6 |
7 | cwd = os.getcwd()
8 | if cwd not in sys.path:
9 | sys.path.insert(0, cwd)
10 |
11 | from preprocess.libs.io import run_bash_command
12 |
13 | logpath = sys.argv[1]
14 |
15 | logname = logpath.strip("/").split("/")[-1]
16 | print(logname)
17 |
18 | run_bash_command(f"zip log-{logname}.zip {logpath}/*")
19 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(
4 | name="lab4d",
5 | packages=find_packages(),
6 | )
7 |
--------------------------------------------------------------------------------