├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-issue-report.md
    │   └── feature_request.md
    └── workflows
    │   └── static.yml
├── .gitignore
├── .gitmodules
├── .vscode
    └── settings.json
├── CITATION.cff
├── LICENSE
├── README.md
├── browser
    ├── app.py
    └── templates
    │   └── index.html
├── database
    ├── mesh-templates
    │   └── cat-pikachu-remeshed.obj
    └── vid_data
    │   ├── ama-bouncing-4v.txt
    │   ├── ama-bouncing.txt
    │   ├── ama-samba-4v.txt
    │   ├── ama-samba.txt
    │   ├── car-turnaround-2.txt
    │   ├── car-turnaround.txt
    │   ├── cat-85.txt
    │   ├── cat-pikachu-0.txt
    │   ├── cat-pikachu.txt
    │   ├── dog-98.txt
    │   ├── dog-robolounge.txt
    │   ├── human-48.txt
    │   ├── human-cap.txt
    │   ├── room.txt
    │   ├── shiba-haru.txt
    │   ├── squirrel-baseball.txt
    │   └── squirrel.txt
├── docs
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── env_min.yml
    ├── make.bat
    ├── source
    │   ├── _static
    │   │   ├── images
    │   │   │   ├── camera_annot.png
    │   │   │   └── visflo-00081.jpg
    │   │   ├── media_resized
    │   │   │   ├── car-turnaround-2-anno.mp4
    │   │   │   ├── car-turnaround-2-proxy.mp4
    │   │   │   ├── car-turnaround-2.mp4
    │   │   │   ├── car-turnaround-2_collage.mp4
    │   │   │   ├── car-turnaround_bev-120-xyz.mp4
    │   │   │   ├── car-turnaround_bev-120.mp4
    │   │   │   ├── car-turnaround_bev.mp4
    │   │   │   ├── car-turnaround_ref-xyz.mp4
    │   │   │   ├── car-turnaround_ref.mp4
    │   │   │   ├── car-turnaround_turntable-120-xyz.mp4
    │   │   │   ├── car-turnaround_turntable-120.mp4
    │   │   │   ├── car-turnaround_turntable.mp4
    │   │   │   ├── cat-85-80_ref-xyz.mp4
    │   │   │   ├── cat-85.mp4
    │   │   │   ├── cat-pikachu-0-comp_bev-xyz.mp4
    │   │   │   ├── cat-pikachu-0-comp_bev.mp4
    │   │   │   ├── cat-pikachu-0-proxy.mp4
    │   │   │   ├── cat-pikachu-0.mp4
    │   │   │   ├── cat-pikachu-0_collage.mp4
    │   │   │   ├── cat-pikachu-0_ref-xyz.mp4
    │   │   │   ├── cat-pikachu-0_ref.mp4
    │   │   │   ├── cat-pikachu-0_turntable-xyz.mp4
    │   │   │   ├── cat-pikachu-0_turntable.mp4
    │   │   │   ├── cat-pikachu-7.mp4
    │   │   │   ├── cat-pikachu-7_collage.mp4
    │   │   │   ├── cat-pikachu-8_ref-xyz.mp4
    │   │   │   ├── cat-pikachu-8_ref.mp4
    │   │   │   ├── cat-pikachu-8_turntable-120-xyz.mp4
    │   │   │   ├── cat-pikachu-8_turntable-120.mp4
    │   │   │   ├── comp_elev.mp4
    │   │   │   ├── dog-98-0_ref-xyz.mp4
    │   │   │   ├── dog-98.mp4
    │   │   │   ├── dog-dualrig-fgbg000-xyz.mp4
    │   │   │   ├── dog-dualrig-fgbg000.mp4
    │   │   │   ├── dog-robolounge_collage.mp4
    │   │   │   ├── finch.mp4
    │   │   │   ├── finch_collage.mp4
    │   │   │   ├── finch_ref-xyz.mp4
    │   │   │   ├── human-48-0_ref-xyz.mp4
    │   │   │   ├── human-48-0_ref.mp4
    │   │   │   ├── human-48-0_turntable-120-xyz.mp4
    │   │   │   ├── human-48-0_turntable-120.mp4
    │   │   │   ├── human-48-reanimate-8-xyz.mp4
    │   │   │   ├── human-48-reanimate-8.mp4
    │   │   │   ├── human-48.mp4
    │   │   │   ├── human-cap-3-xyz.mp4
    │   │   │   ├── human-cap-3.mp4
    │   │   │   ├── human-cap-3_collage.mp4
    │   │   │   ├── penguin-1-xyz.mp4
    │   │   │   ├── penguin-2_collage.mp4
    │   │   │   ├── penguin.mp4
    │   │   │   ├── shiba-haru-6.mp4
    │   │   │   ├── shiba-haru-7-xyz.mp4
    │   │   │   ├── shiba-haru-7.mp4
    │   │   │   ├── shiba-haru-7_collage.mp4
    │   │   │   ├── squirrel-xyz.mp4
    │   │   │   ├── squirrel.mp4
    │   │   │   └── squirrel_collage.mp4
    │   │   └── meshes
    │   │   │   ├── car-turnaround-2-canonical-prealign.obj
    │   │   │   ├── car-turnaround-2-canonical.obj
    │   │   │   ├── car-turnaround-2-mesh.obj
    │   │   │   ├── car-turnaround-2-proxy.obj
    │   │   │   ├── cat-pikachu-0-bone.obj
    │   │   │   ├── cat-pikachu-0-mesh.obj
    │   │   │   ├── cat-pikachu-0-proxy.obj
    │   │   │   ├── cat-pikachu-bone.obj
    │   │   │   ├── cat-pikachu-mesh.obj
    │   │   │   ├── cat-pikachu-proxy.obj
    │   │   │   ├── human-48-0-mesh-0000.obj
    │   │   │   ├── human-48-0-mesh.obj
    │   │   │   ├── human-48-bone.obj
    │   │   │   └── human-48-proxy.obj
    │   ├── api_docs
    │   │   ├── index.rst
    │   │   └── modules.rst
    │   ├── conf.py
    │   ├── data_models.rst
    │   ├── get_started
    │   │   └── index.rst
    │   ├── index.rst
    │   ├── obj2glb.py
    │   ├── qa.rst
    │   ├── resize_vids.py
    │   └── tutorials
    │   │   ├── arbitrary_video.rst
    │   │   ├── category_model.rst
    │   │   ├── index.rst
    │   │   ├── multi_video_cat.rst
    │   │   ├── preprocessing.rst
    │   │   └── single_video_cat.rst
    └── template
    │   ├── module.rst_t
    │   ├── package.rst_t
    │   └── toc.rst_t
├── environment.yml
├── lab4d
    ├── __init__.py
    ├── config.py
    ├── config_omega.py
    ├── dataloader
    │   ├── __init__.py
    │   ├── data_utils.py
    │   └── vidloader.py
    ├── engine
    │   ├── __init__.py
    │   ├── model.py
    │   ├── train_utils.py
    │   └── trainer.py
    ├── export.py
    ├── nnutils
    │   ├── __init__.py
    │   ├── appearance.py
    │   ├── base.py
    │   ├── deformable.py
    │   ├── embedding.py
    │   ├── feature.py
    │   ├── intrinsics.py
    │   ├── multifields.py
    │   ├── nerf.py
    │   ├── pose.py
    │   ├── skinning.py
    │   ├── time.py
    │   ├── visibility.py
    │   └── warping.py
    ├── reanimate.py
    ├── render.py
    ├── tests
    │   ├── hat_map.py
    │   ├── test_gpu_map.py
    │   ├── test_ops.py
    │   └── utils.py
    ├── third_party
    │   ├── nvp.py
    │   └── quaternion
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── add_gcc_cuda.sh
    │   │   ├── backend.py
    │   │   ├── mat3x3.py
    │   │   ├── quaternion.py
    │   │   ├── setup.py
    │   │   └── src
    │   │       ├── bindings.cpp
    │   │       ├── matinv.cu
    │   │       ├── matinv.h
    │   │       ├── quaternion.cu
    │   │       └── quaternion.h
    ├── train.py
    └── utils
    │   ├── __init__.py
    │   ├── cam_utils.py
    │   ├── camera_utils.py
    │   ├── decorator.py
    │   ├── geom_utils.py
    │   ├── gpu_utils.py
    │   ├── io.py
    │   ├── loss_utils.py
    │   ├── numpy_utils.py
    │   ├── profile_utils.py
    │   ├── quat_transform.py
    │   ├── render_utils.py
    │   ├── skel_utils.py
    │   ├── torch_utils.py
    │   ├── transforms.py
    │   └── vis_utils.py
├── media
    ├── logo.png
    └── teaser.gif
├── preprocess
    ├── __init__.py
    ├── libs
    │   ├── __init__.py
    │   ├── geometry.py
    │   ├── io.py
    │   ├── torch_models.py
    │   └── utils.py
    ├── scripts
    │   ├── __init__.py
    │   ├── camera_registration.py
    │   ├── canonical_registration.py
    │   ├── compute_diff.py
    │   ├── crop.py
    │   ├── depth.py
    │   ├── download.py
    │   ├── extract_dinov2.py
    │   ├── extract_frames.py
    │   ├── manual_cameras.py
    │   ├── tsdf_fusion.py
    │   └── write_config.py
    └── third_party
    │   ├── fusion.py
    │   ├── vcnplus
    │       ├── compute_flow.py
    │       ├── compute_flow.sh
    │       ├── flowutils
    │       │   ├── __init__.py
    │       │   └── flowlib.py
    │       ├── frame_filter.py
    │       └── models
    │       │   ├── VCNplus.py
    │       │   ├── __init__.py
    │       │   ├── conv4d.py
    │       │   ├── det.py
    │       │   ├── det_losses.py
    │       │   ├── det_utils.py
    │       │   ├── feature_extraction.py
    │       │   ├── inference.py
    │       │   ├── networks
    │       │       ├── .gitignore
    │       │       ├── DCNv2
    │       │       │   ├── .gitignore
    │       │       │   ├── DCN
    │       │       │   │   ├── __init__.py
    │       │       │   │   ├── dcn_v2.py
    │       │       │   │   ├── src
    │       │       │   │   │   ├── cpu
    │       │       │   │   │   │   ├── dcn_v2_cpu.cpp
    │       │       │   │   │   │   ├── dcn_v2_im2col_cpu.cpp
    │       │       │   │   │   │   ├── dcn_v2_im2col_cpu.h
    │       │       │   │   │   │   ├── dcn_v2_psroi_pooling_cpu.cpp
    │       │       │   │   │   │   └── vision.h
    │       │       │   │   │   ├── cuda
    │       │       │   │   │   │   ├── dcn_v2_cuda.cu
    │       │       │   │   │   │   ├── dcn_v2_im2col_cuda.cu
    │       │       │   │   │   │   ├── dcn_v2_im2col_cuda.h
    │       │       │   │   │   │   ├── dcn_v2_psroi_pooling_cuda.cu
    │       │       │   │   │   │   └── vision.h
    │       │       │   │   │   ├── dcn_v2.h
    │       │       │   │   │   └── vision.cpp
    │       │       │   │   ├── testcpu.py
    │       │       │   │   └── testcuda.py
    │       │       │   ├── LICENSE
    │       │       │   ├── README.md
    │       │       │   ├── make.sh
    │       │       │   └── setup.py
    │       │       ├── dlav0.py
    │       │       ├── large_hourglass.py
    │       │       ├── msra_resnet.py
    │       │       ├── pose_dla_dcn.py
    │       │       └── resnet_dcn.py
    │       │   └── submodule.py
    │   └── viewpoint
    │       ├── __init__.py
    │       ├── configs
    │           └── cse
    │           │   ├── Base-DensePose-RCNN-FPN-Human.yaml
    │           │   ├── Base-DensePose-RCNN-FPN.yaml
    │           │   ├── densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml
    │           │   └── densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml
    │       ├── cselib.py
    │       └── dp_viewpoint.py
├── scripts
    ├── create_collage.py
    ├── download_unzip.sh
    ├── install-deps.sh
    ├── render_intermediate.py
    ├── run_crop_all.py
    ├── run_preprocess.py
    ├── run_rendering_parallel.py
    ├── train.sh
    ├── zip_dataset.py
    └── zip_logdir.py
└── setup.py


/.github/ISSUE_TEMPLATE/bug-issue-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug/issue report
 3 | about: Issues running the code / bug report to help us improve
 4 | title: "[Bug/issue]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the issue is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Additional context**
27 | Add any other context about the problem here.
28 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/static.yml:
--------------------------------------------------------------------------------
 1 | name: Build Sphinx docs and Deploy to GitHub Pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["main"]
 6 |   workflow_dispatch:
 7 | 
 8 | permissions:
 9 |   contents: read
10 |   pages: write
11 |   id-token: write
12 | 
13 | concurrency:
14 |   group: "pages"
15 |   cancel-in-progress: false
16 | 
17 | jobs:
18 |   build-deploy:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - name: Checkout
22 |         uses: actions/checkout@v3
23 |         with:
24 |           submodules: recursive  # Ensures submodules are checked out
25 | 
26 |       - name: Update Submodules
27 |         run: |
28 |           cd ./docs
29 |           git submodule update --init --recursive
30 | 
31 |       - name: Setup Miniconda
32 |         uses: conda-incubator/setup-miniconda@v2
33 |         with:
34 |           python-version: 3.9
35 |           mamba-version: "*"
36 |           channels: conda-forge,defaults
37 |           channel-priority: true
38 |           activate-environment: lab4d
39 |           environment-file: docs/env_min.yml
40 | 
41 |       - name: Build Docs
42 |         shell: bash -l {0}
43 |         run: |
44 |           conda activate lab4d
45 |           cd ./docs
46 |           sphinx-apidoc -o source/api_docs ../lab4d/ -f --templatedir template/
47 |           python source/obj2glb.py
48 |           make html
49 | 
50 |       - name: Setup Pages
51 |         uses: actions/configure-pages@v3
52 |         
53 |       - name: Upload artifact
54 |         uses: actions/upload-pages-artifact@v1
55 |         with:
56 |           # Upload the pages
57 |           path: './docs/build/html'
58 |           
59 |       - name: Deploy to GitHub Pages
60 |         id: deployment
61 |         uses: actions/deploy-pages@v2
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | projects
 2 | viewer
 3 | run.sh
 4 | run-long.sh
 5 | /database/processed
 6 | /database/configs
 7 | /database/raw
 8 | /logdir
 9 | /tmp
10 | 
11 | lab4d.egg-info
12 | __pycache__/
13 | *.pth
14 | *.ckpt
15 | 
16 | preprocess/third_party/vcnplus/vcn_rob.pth
17 | preprocess/third_party/viewpoint/human.pth
18 | preprocess/third_party/viewpoint/quad.pth
19 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "preprocess/third_party/detectron2"]
 2 | 	path = preprocess/third_party/detectron2
 3 | 	url = https://github.com/facebookresearch/detectron2
 4 |     ignore = dirty
 5 | [submodule "preprocess/third_party/Track-Anything"]
 6 | 	path = preprocess/third_party/Track-Anything
 7 | 	url = https://github.com/gengshan-y/Track-Anything
 8 | [submodule "docs/pytorch_sphinx_theme"]
 9 | 	path = docs/pytorch_sphinx_theme
10 | 	url = https://github.com/gengshan-y/pytorch_sphinx_theme
11 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "files.watcherExclude": {
 3 |         "**/*.npy": true,
 4 |         "**/*.jpg": true,
 5 |         "**/*.mp4": true,
 6 |         "**/.git/objects/**": true,
 7 |         "**/.git/subtree-cache/**": true,
 8 |         "**/node_modules/*/**": true,
 9 |         "**/*.log": true,
10 |         "database/processed/**": true,
11 |         "logdir/**": true,
12 |         "tmp/**": true,
13 |     },
14 |     "[python]": {
15 |         "editor.defaultFormatter": "ms-python.black-formatter"
16 |     },
17 |     "python.formatting.provider": "none",
18 |     "liveServer.settings.root": "docs/build/"
19 | }


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | title: Lab4d - A framework for in-the-wild 4D reconstruction from monocular videos
 3 | message: 'If you use this software, please cite it as below.'
 4 | type: software
 5 | authors:
 6 |   - family-names: Yang
 7 |     given-names: Gengshan
 8 |   - family-names: Tan
 9 |     given-names: Jeff
10 |   - family-names: Lyons
11 |     given-names: Alex
12 |   - family-names: Peri
13 |     given-names: Neehar
14 |   - family-names: Ramanan
15 |     given-names: Deva
16 | url: 'https://github.com/lab4d-org/lab4d'
17 | license: MIT
18 | version: 0.0.0
19 | date-released: '2023-06-30'
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Gengshan Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p>
  2 |   <picture>
  3 |   <img alt="logo" src="media/logo.png" width="350px" />
  4 |   </picture>
  5 | </p>
  6 | 
  7 | # Lab4D
  8 | **[[Docs & Tutorials](https://lab4d-org.github.io/lab4d/)]**
  9 | **[[Data & Checkpoints](https://lab4d-org.github.io/lab4d/data_models.html)]**
 10 | 
 11 | *This is an alpha release and the APIs are subject to change. Please provide feedback and report bugs via github issues. Thank you for your support.*
 12 | 
 13 | ## About
 14 | **Lab4D** is a framework for 4D reconstruction from monocular videos. The software is licensed under the MIT license. 
 15 | <p>
 16 |   <picture>
 17 |   <img alt="logo" src="media/teaser.gif" width="480px" />
 18 |   </picture>
 19 | </p>
 20 | 
 21 | 
 22 | ## TODOs
 23 | - [ ] web viewer (see [PPR branch](https://github.com/gengshan-y/ppr))
 24 | - [ ] evaluation (see [PPR branch](https://github.com/gengshan-y/ppr)) and benchmarks
 25 | - [ ] multi-view reconstruction
 26 | - [ ] feedforward models (see [DASR](https://github.com/jefftan969/dasr))
 27 | 
 28 | ## Acknowledgement
 29 | - Our pre-processing pipeline is built upon the following open-sourced repos: 
 30 |   - Segmentation: [Track-Anything](https://github.com/gaomingqi/Track-Anything), [Grounding-DINO](https://github.com/IDEA-Research/GroundingDINO)
 31 |   - Feature & correspondence: [DensePose-CSE](https://github.com/facebookresearch/detectron2/blob/cbbc1ce26473cb2a5cc8f58e8ada9ae14cb41052/projects/DensePose/doc/DENSEPOSE_CSE.md), [DINOv2](https://github.com/facebookresearch/dinov2), [VCNPlus](https://github.com/gengshan-y/rigidmask)
 32 |   - Depth: [ZoeDepth](https://github.com/isl-org/ZoeDepth)
 33 |   - Camera: [BANMo-viewpoint](https://github.com/facebookresearch/banmo)
 34 | - We use [dqtorch](https://github.com/MightyChaos/dqtorch) for efficient rotation operations
 35 | - We thank [@mjlbach](https://github.com/mjlbach), [@alexanderbergman7](https://github.com/alexanderbergman7), and [@terrancewang](https://github.com/terrancewang) for testing and feedback
 36 | - We thank [@jasonyzhang](https://github.com/jasonyzhang), [@MightyChaos](https://github.com/MightyChaos), [@JudyYe](https://github.com/JudyYe), and [@andrewsonga](https://github.com/andrewsonga) for feedback
 37 | 
 38 | If you use this project for your research, please consider citing the following papers. 
 39 | 
 40 | For building deformable object models, cite:
 41 | <details>
 42 | 
 43 | ```
 44 | @inproceedings{yang2022banmo,
 45 |   title={BANMo: Building Animatable 3D Neural Models from Many Casual Videos},
 46 |   author={Yang, Gengshan and Vo, Minh and Neverova, Natalia and Ramanan, Deva and Vedaldi, Andrea and Joo, Hanbyul},
 47 |   booktitle = {CVPR},
 48 |   year={2022}
 49 | }  
 50 | ```
 51 | </details>
 52 | 
 53 | For building category body and pose models, cite:
 54 | <details>
 55 | 
 56 | ```
 57 | @inproceedings{yang2023rac,
 58 |     title={Reconstructing Animatable Categories from Videos},
 59 |     author={Yang, Gengshan and Wang, Chaoyang and Reddy, N. Dinesh and Ramanan, Deva},
 60 |     booktitle = {CVPR},
 61 |     year={2023}
 62 | } 
 63 | ```
 64 | </details>
 65 | 
 66 | For object-scene reconstruction and extreme view synthesis, cite:
 67 | <details>
 68 | 
 69 | ```
 70 | @article{song2023totalrecon,
 71 |   title={Total-Recon: Deformable Scene Reconstruction for Embodied View Synthesis},
 72 |   author={Song, Chonghyuk and Yang, Gengshan and Deng, Kangle and Zhu, Jun-Yan and Ramanan, Deva},
 73 |   journal={arXiv},
 74 |   year={2023}
 75 | }
 76 | ```
 77 | </details>
 78 | 
 79 | For training feed-forward video/image shape and pose estimators, cite:
 80 | <details>
 81 | 
 82 | ```
 83 | @inproceedings{tan2023distilling,
 84 |   title={Distilling Neural Fields for Real-Time Articulated Shape Reconstruction},
 85 |   author={Tan, Jeff and Yang, Gengshan and Ramanan, Deva},
 86 |   booktitle={CVPR},
 87 |   year={2023}
 88 | }
 89 | ```
 90 | </details>
 91 | 
 92 | For the human-48 dataset cite:
 93 | 
 94 | <details>
 95 | 
 96 | ```
 97 | @incollection{vlasic2008articulated,
 98 |   title={Articulated mesh animation from multi-view silhouettes},
 99 |   author={Vlasic, Daniel and Baran, Ilya and Matusik, Wojciech and Popovi{\'c}, Jovan},
100 |   booktitle={Acm Siggraph 2008 papers},
101 |   pages={1--9},
102 |   year={2008}
103 | }
104 | @article{xu2018monoperfcap,
105 |   title={Monoperfcap: Human performance capture from monocular video},
106 |   author={Xu, Weipeng and Chatterjee, Avishek and Zollh{\"o}fer, Michael and Rhodin, Helge and Mehta, Dushyant and Seidel, Hans-Peter and Theobalt, Christian},
107 |   journal={ACM Transactions on Graphics (ToG)},
108 |   volume={37},
109 |   number={2},
110 |   pages={1--15},
111 |   year={2018},
112 |   publisher={ACM New York, NY, USA}
113 | }
114 | @inproceedings{perazzi2016benchmark,
115 |   title={A benchmark dataset and evaluation methodology for video object segmentation},
116 |   author={Perazzi, Federico and Pont-Tuset, Jordi and McWilliams, Brian and Van Gool, Luc and Gross, Markus and Sorkine-Hornung, Alexander},
117 |   booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
118 |   pages={724--732},
119 |   year={2016}
120 | }
121 | ```
122 | </details>
123 | 


--------------------------------------------------------------------------------
/browser/app.py:
--------------------------------------------------------------------------------
 1 | # WIP by Gengshan Yang
 2 | # python browser/app.py 'database/processed/Annotations/Full-Resolution/cat-85-*/vis.mp4'
 3 | # python browser/app.py 'logdir/dog-98-category-comp/renderings_00*/xyz.mp4'
 4 | # or python browser/app.py and type in string
 5 | from flask import Flask, render_template, request, send_from_directory
 6 | import os
 7 | import sys
 8 | import glob
 9 | 
10 | app = Flask(__name__)
11 | 
12 | 
13 | def get_files(path):
14 |     matched_files = sorted(glob.glob(path))
15 |     return matched_files
16 | 
17 | 
18 | @app.route("/", methods=["GET", "POST"])
19 | def index():
20 |     files = []
21 |     if request.method == "POST":
22 |         path = request.form.get("path")
23 | 
24 |     elif len(sys.argv) > 1:
25 |         path = sys.argv[1]
26 |     else:
27 |         path = ""
28 |     files = get_files(path)
29 |     return render_template("index.html", files=files)
30 | 
31 | 
32 | @app.route("/logdir/<path:filename>", methods=["GET"])
33 | def get_logdir_file(filename):
34 |     return send_from_directory(os.getcwd(), filename)
35 | 
36 | 
37 | @app.route("/database/<path:filename>", methods=["GET"])
38 | def get_database_file(filename):
39 |     return send_from_directory(os.getcwd(), filename)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     app.run(debug=True)
44 | 


--------------------------------------------------------------------------------
/browser/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | 
  4 | <head>
  5 |     <title>Data Browser</title>
  6 |     <style>
  7 |         body {
  8 |             font-family: Arial, sans-serif;
  9 |             background-color: #f4f4f4;
 10 |             color: #333;
 11 |             padding: 10px;
 12 |         }
 13 | 
 14 |         h1,
 15 |         h2 {
 16 |             color: #1a73e8;
 17 |         }
 18 | 
 19 |         .grid-container {
 20 |             display: grid;
 21 |             grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
 22 |             grid-gap: 20px;
 23 |             list-style: none;
 24 |             padding: 0;
 25 |         }
 26 | 
 27 |         .grid-item {
 28 |             display: flex;
 29 |             justify-content: center;
 30 |             align-items: center;
 31 |             background-color: #fff;
 32 |             border-radius: 5px;
 33 |             padding: 10px;
 34 |             box-shadow: 0 2px 5px rgba(0, 0, 0, 0.15);
 35 |         }
 36 | 
 37 |         .grid-item video,
 38 |         .grid-item img {
 39 |             max-width: 100%;
 40 |             max-height: 100%;
 41 |             border-radius: 5px;
 42 |         }
 43 | 
 44 |         form {
 45 |             background-color: #fff;
 46 |             padding: 20px;
 47 |             border-radius: 5px;
 48 |             box-shadow: 0 2px 5px rgba(0, 0, 0, 0.15);
 49 |             margin-bottom: 20px;
 50 |         }
 51 | 
 52 |         input[type="text"] {
 53 |             width: 100%;
 54 |             padding: 10px;
 55 |             border: 1px solid #ddd;
 56 |             border-radius: 5px;
 57 |             margin-bottom: 10px;
 58 |         }
 59 | 
 60 |         input[type="submit"] {
 61 |             padding: 10px 20px;
 62 |             border: none;
 63 |             border-radius: 5px;
 64 |             background-color: #1a73e8;
 65 |             color: #fff;
 66 |             cursor: pointer;
 67 |         }
 68 | 
 69 |         input[type="submit"]:hover {
 70 |             background-color: #165bbd;
 71 |         }
 72 |     </style>
 73 | </head>
 74 | 
 75 | <body>
 76 |     <h1>Data Browser</h1>
 77 |     <form method="POST">
 78 |         <label for="path">Input Glob Pattern:</label><br>
 79 |         (e.g.,
 80 |         database/processed/Annotations/Full-Resolution/cat*/vis.mp4)
 81 |         <input type="text" id="path" name="path"><br>
 82 |         <input type="submit" value="Submit">
 83 |     </form>
 84 |     <h2>Matched Files</h2>
 85 |     <ul class="grid-container">
 86 |         {% for file in files %}
 87 |         <li class="grid-item">
 88 |             {% if file.endswith('.mp4') %}
 89 |             <video controls>
 90 |                 <source src="{{ url_for('get_logdir_file', filename=file) }}" type="video/mp4">
 91 |             </video>
 92 |             {% elif file.endswith('.jpg') %}
 93 |             <img src="{{ url_for('get_database_file', filename=file) }}" alt="Image">
 94 |             {% else %}
 95 |             <p>{{ file }}</p>
 96 |             {% endif %}
 97 |         </li>
 98 |         {% endfor %}
 99 |     </ul>
100 | 
101 | 
102 |     <script>
103 |         document.addEventListener("DOMContentLoaded", function () {
104 |             var lazyImages = [].slice.call(document.querySelectorAll("img.lazy"));
105 |             var lazyVideos = [].slice.call(document.querySelectorAll("video.lazy"));
106 | 
107 |             if ("IntersectionObserver" in window) {
108 |                 let lazyImageObserver = new IntersectionObserver(function (entries, observer) {
109 |                     entries.forEach(function (entry) {
110 |                         if (entry.isIntersecting) {
111 |                             let lazyElement = entry.target;
112 |                             if (lazyElement.tagName === "IMG") {
113 |                                 lazyElement.src = lazyElement.dataset.src;
114 |                             } else if (lazyElement.tagName === "VIDEO") {
115 |                                 var source = lazyElement.querySelector('source');
116 |                                 source.src = source.dataset.src;
117 |                             }
118 |                             lazyElement.classList.remove("lazy");
119 |                             lazyElement.classList.add("lazy-loaded");
120 |                             lazyImageObserver.unobserve(lazyElement);
121 |                         }
122 |                     });
123 |                 });
124 | 
125 |                 lazyImages.forEach(function (lazyImage) {
126 |                     lazyImageObserver.observe(lazyImage);
127 |                 });
128 | 
129 |                 lazyVideos.forEach(function (lazyVideo) {
130 |                     lazyImageObserver.observe(lazyVideo);
131 |                 });
132 |             } else {
133 |                 // Possibly fall back to a more compatible method here
134 |             }
135 |         });
136 |     </script>
137 | 
138 | </body>
139 | 
140 | </html>


--------------------------------------------------------------------------------
/database/vid_data/ama-bouncing-4v.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/8b1krs9hcyvk0z0/AAAG5wle5F98KERiDHUJilUMa?dl=0
2 | 


--------------------------------------------------------------------------------
/database/vid_data/ama-bouncing.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/9a90bd0p6hacqiv/AADFZOFpdsFzpGiPQqvvCsVDa?dl=0
2 | 


--------------------------------------------------------------------------------
/database/vid_data/ama-samba-4v.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/4b3dx6id0ncoyhe/AAAnKtpH8wirj0sazkdZCbEMa?dl=0
2 | 


--------------------------------------------------------------------------------
/database/vid_data/ama-samba.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/uox4amcyd9g2gm9/AAA8XECVaSjqpgEgwTzIxc5da?dl=0
2 | 


--------------------------------------------------------------------------------
/database/vid_data/car-turnaround-2.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/l7klay8bg54ryb8/AACeEmZq4aj6RXYUdY-UaZsua
2 | 


--------------------------------------------------------------------------------
/database/vid_data/car-turnaround.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/2saroz7jrnp1cy6/AACGHva9pJAIwQ6k8qgMs5Nma
2 | 


--------------------------------------------------------------------------------
/database/vid_data/cat-85.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/38f29ro8aq85enk/AAA5aSgBi4otuPrEiZRm1Ih5a
2 | 


--------------------------------------------------------------------------------
/database/vid_data/cat-pikachu-0.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/ls19bz5uo8juzoa/AAB0x4GUeH5PO97sB8Nak9eIa


--------------------------------------------------------------------------------
/database/vid_data/cat-pikachu.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/bcm9150d3sy68ve/AADbQVnHFbBTvsfJLoa9AM9Ba


--------------------------------------------------------------------------------
/database/vid_data/dog-98.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/85567m06uxoni42/AAASa1OnsXM2u8cxEiQSSF_Ia
2 | 


--------------------------------------------------------------------------------
/database/vid_data/dog-robolounge.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/66a2f1cfudj6ep3/AAAbJE0mzMMQdLruPnO16r8la
2 | 


--------------------------------------------------------------------------------
/database/vid_data/human-48.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/kv4xcntzcwfnmcv/AABqMAvjoTJw4U_8puObKBD9a
2 | 


--------------------------------------------------------------------------------
/database/vid_data/human-cap.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/rl351jmtw9v5107/AACYWpO9M453NJr8ACViIeLfa
2 | 


--------------------------------------------------------------------------------
/database/vid_data/room.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/hkojyikow9jcd0g/AACA5-U75SQycUYbbx8bDdlUa
2 | 


--------------------------------------------------------------------------------
/database/vid_data/shiba-haru.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/u3j1ps2bcyubvs1/AACOrE2DiK-O2l74Q5Y4SlNQa
2 | 


--------------------------------------------------------------------------------
/database/vid_data/squirrel-baseball.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/wur870xkv01hv32/AADsB0zeCGWyUy4czQX5jCMCa
2 | 


--------------------------------------------------------------------------------
/database/vid_data/squirrel.txt:
--------------------------------------------------------------------------------
1 | https://www.dropbox.com/sh/1ktr3qnqwdysyvi/AAAhIRpzWB58KmCJvXu4agd_a
2 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /source/api_docs/lab4d*
3 | /source/_static/meshes/*.glb
4 | /source/_static/media/*


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | To develop locally, start liveserver and forward the port to local browser
 2 | 
 3 | To generate the necessary files:
 4 | ```
 5 | sphinx-apidoc -o source/api_docs ../lab4d/ -f --templatedir template/
 6 | python source/obj2glb.py
 7 | ```
 8 | 
 9 | To rebuild webpage:
10 | ```make clean; make html; mv build/html build/lab4d```


--------------------------------------------------------------------------------
/docs/env_min.yml:
--------------------------------------------------------------------------------
 1 | name: lab4d
 2 | channels:
 3 |   - pytorch
 4 |   - conda-forge
 5 | dependencies:
 6 |   - pip
 7 |   - ninja
 8 |   - pytorch
 9 |   - torchvision
10 |   - cpuonly
11 |   - matplotlib
12 |   - absl-py 
13 |   - tensorboard 
14 |   - trimesh 
15 |   - scikit-image
16 |   - opencv 
17 |   - einops 
18 |   - scikit-learn 
19 |   - imageio=2.14.1 
20 |   - imageio-ffmpeg
21 |   - pip:
22 |     - pysdf
23 |     - sphinx==6.2.1
24 |     - sphinx-copybutton
25 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/_static/images/camera_annot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/images/camera_annot.png


--------------------------------------------------------------------------------
/docs/source/_static/images/visflo-00081.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/images/visflo-00081.jpg


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround-2-anno.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2-anno.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround-2-proxy.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2-proxy.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround-2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround-2_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround-2_collage.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_bev-120-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_bev-120-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_bev-120.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_bev-120.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_bev.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_bev.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_ref-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_ref.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_ref.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_turntable-120-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_turntable-120-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_turntable-120.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_turntable-120.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/car-turnaround_turntable.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/car-turnaround_turntable.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-85-80_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-85-80_ref-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-85.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-85.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0-comp_bev-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0-comp_bev-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0-comp_bev.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0-comp_bev.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0-proxy.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0-proxy.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_collage.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_ref-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0_ref.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_ref.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0_turntable-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_turntable-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-0_turntable.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-0_turntable.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-7.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-7.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-7_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-7_collage.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-8_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_ref-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-8_ref.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_ref.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-8_turntable-120-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_turntable-120-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/cat-pikachu-8_turntable-120.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/cat-pikachu-8_turntable-120.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/comp_elev.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/comp_elev.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/dog-98-0_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-98-0_ref-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/dog-98.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-98.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/dog-dualrig-fgbg000-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-dualrig-fgbg000-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/dog-dualrig-fgbg000.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-dualrig-fgbg000.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/dog-robolounge_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/dog-robolounge_collage.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/finch.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/finch.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/finch_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/finch_collage.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/finch_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/finch_ref-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-0_ref-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_ref-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-0_ref.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_ref.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-0_turntable-120-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_turntable-120-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-0_turntable-120.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-0_turntable-120.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-reanimate-8-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-reanimate-8-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48-reanimate-8.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48-reanimate-8.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-48.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-48.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-cap-3-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-cap-3-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-cap-3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-cap-3.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/human-cap-3_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/human-cap-3_collage.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/penguin-1-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/penguin-1-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/penguin-2_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/penguin-2_collage.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/penguin.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/penguin.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/shiba-haru-6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-6.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/shiba-haru-7-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-7-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/shiba-haru-7.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-7.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/shiba-haru-7_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/shiba-haru-7_collage.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/squirrel-xyz.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/squirrel-xyz.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/squirrel.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/squirrel.mp4


--------------------------------------------------------------------------------
/docs/source/_static/media_resized/squirrel_collage.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/docs/source/_static/media_resized/squirrel_collage.mp4


--------------------------------------------------------------------------------
/docs/source/api_docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Lab4D documentation master file, created by
 2 |    sphinx-quickstart on Fri Jun  2 20:54:08 2023.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to Lab4D's documentation!
 7 | =================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |    lab4d.dataloader
13 |    lab4d.engine
14 |    lab4d.nnutils
15 |    lab4d.utils
16 | 
17 | .. Indices and tables
18 | .. ==================
19 | 
20 | .. * :ref:`genindex`
21 | .. * :ref:`modindex`
22 | .. * :ref:`search`
23 | 


--------------------------------------------------------------------------------
/docs/source/api_docs/modules.rst:
--------------------------------------------------------------------------------
1 | lab4d
2 | =====
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    lab4d
8 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | project = "Lab4D"
10 | copyright = "2023, Gengshan Yang, Jeff Tan, Alex Lyons, Neehar Peri, Deva Ramanan, Carnegie Mellon University"
11 | release = "0.0.0"
12 | 
13 | # -- General configuration ---------------------------------------------------
14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
15 | 
16 | import sys, os
17 | 
18 | # Path to lab4d
19 | sys.path.insert(
20 |     0,
21 |     "%s/../../" % os.path.join(os.path.dirname(__file__)),
22 | )
23 | 
24 | # Allow auto-generated docs from Google format docstrings
25 | extensions = [
26 |     "sphinx.ext.autodoc",
27 |     "sphinx.ext.napoleon",
28 |     "sphinx.ext.intersphinx",
29 |     "sphinx_copybutton",
30 | ]
31 | 
32 | # other pakcages
33 | intersphinx_mapping = {
34 |     "python": ("https://docs.python.org/3", None),
35 |     "pytorch": ("https://pytorch.org/docs/stable/", None),
36 | }
37 | 
38 | # Allow documentation of multiple return types
39 | napoleon_custom_sections = [("Returns", "params_style")]
40 | 
41 | templates_path = ["_templates"]
42 | exclude_patterns = []
43 | 
44 | # Mocking the imports of modules that requires cuda
45 | autodoc_mock_imports = ["_quaternion"]
46 | 
47 | # -- Options for HTML output -------------------------------------------------
48 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
49 | 
50 | html_theme = "pytorch_sphinx_theme"
51 | html_theme_path = ["../pytorch_sphinx_theme"]
52 | html_static_path = ["_static"]
53 | 


--------------------------------------------------------------------------------
/docs/source/get_started/index.rst:
--------------------------------------------------------------------------------
 1 | Get Started
 2 | ===================
 3 |  
 4 | Requirements
 5 | -------------------------
 6 | 
 7 | - **Linux** machine with at least 1 GPU (we tested on 3090s)
 8 | - **Conda**
 9 | 
10 |   - Follow `this link <https://conda.io/projects/conda/en/latest/user-guide/install/linux.html#installing-on-linux>`_ to install conda.
11 | 
12 |   - Recommended: use mamba for package management (more efficient than conda). Install mamba with::
13 |     
14 |       conda install -c conda-forge mamba -y
15 | 
16 | - For developers: use `VS Code <https://code.visualstudio.com/>`_ with Black Formatter.
17 | 
18 | Set up the environment
19 | -------------------------
20 | 
21 | Clone the repository and create a conda environment with the required packages::
22 | 
23 |     git clone git@github.com:lab4d-org/lab4d.git --recursive
24 | 
25 |     cd lab4d
26 | 
27 |     mamba env create -f environment.yml
28 | 
29 |     conda activate lab4d
30 | 
31 |     bash scripts/install-deps.sh
32 | 
33 | 
34 | Running the Tutorial Code
35 | ---------------------------------------------
36 | See the `Tutorials page </lab4d/tutorials>`_.
37 | 
38 | 
39 | .. .. Lab4D documentation master file, created by
40 | ..    sphinx-quickstart on Fri Jun  2 20:54:08 2023.
41 | ..    You can adapt this file completely to your liking, but it should at least
42 | ..    contain the root `toctree` directive.
43 | 
44 | .. Welcome to Lab4D's DOCUMENTATION!
45 | .. =================================
46 | 
47 | .. .. toctree::
48 | ..    :maxdepth: 2
49 | 
50 | ..    get_started
51 | 
52 | .. .. Indices and tables
53 | .. .. ==================
54 | 
55 | .. .. * :ref:`genindex`
56 | .. .. * :ref:`modindex`
57 | .. .. * :ref:`search`
58 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. Lab4D documentation master file, created by
 2 |    sphinx-quickstart on Fri Jun  2 20:54:08 2023.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to Lab4D's documentation!
 7 | =================================
 8 | 
 9 | **Lab4D** is a framework for 4D reconstruction from monocular videos. 
10 | 
11 | Features
12 | -------------------------------
13 | - Representation
14 | 
15 |   - neural implicit representation
16 | 
17 |   - deformation fields (neural fields, control-points, skeleton)
18 | 
19 |   - compositional scene
20 | 
21 |   - category-level models
22 | 
23 | - Interface for priors
24 | 
25 |   - pixelwise priors: depth, flow, DINOv2 features
26 | 
27 |   - segmentation: track-anything, video instance segmentation
28 | 
29 |   - camera viewpoint: viewpoint network, manual annotation
30 | 
31 | - Efficiency
32 | 
33 |   - multi-gpu training
34 | 
35 |   - dual-quaternion ops
36 | 
37 | .. note::
38 | 
39 |   This is an alpha release and the APIs are subject to change as we continuously improve and refine it. 
40 |   We encourage users to provide feedback and report bugs via `github issues <https://github.com/lab4d-org/lab4d/issues/new/choose>`_. 
41 |   Thank you for your support. 
42 | 


--------------------------------------------------------------------------------
/docs/source/obj2glb.py:
--------------------------------------------------------------------------------
 1 | import trimesh
 2 | import numpy as np
 3 | import glob
 4 | import os
 5 | 
 6 | rootdir = os.path.dirname(__file__)
 7 | 
 8 | for path in glob.glob("%s/_static/meshes/*.obj" % rootdir):
 9 |     print(path)
10 |     m = trimesh.load(path, process=False)
11 |     # cv coordinate to gl coordinate
12 |     m.vertices = np.stack(
13 |         [m.vertices[:, 0], -m.vertices[:, 1], -m.vertices[:, 2]], axis=1
14 |     )
15 |     m.export(path.replace(".obj", ".glb"))
16 | 


--------------------------------------------------------------------------------
/docs/source/qa.rst:
--------------------------------------------------------------------------------
 1 | Q&A
 2 | ===========================
 3 | 
 4 | Installation
 5 | ---------------------------
 6 | - Conda/mamba is not able to resolve conflicts when installing packages.
 7 | 
 8 |   - Possible cause: The base conda environment is not clean. See the discussion `in this thread <https://stackoverflow.com/questions/57243296/why-is-it-recommended-to-not-install-additional-packages-in-the-conda-base-envir>`_.
 9 |   
10 |   - Fix: Remove packages of the base environment that causes the conflict.
11 | 
12 | Data pre-processing
13 | ---------------------------
14 | - My gradio app got stuck at the loading screen.
15 | 
16 |   - Potential fix: kill the running vscode processes, and re-run the preprocessing code.
17 | 
18 | Model training
19 | ---------------------------
20 | 
21 | - How to change hyperparameters when using more videos (or video frames)? 
22 | 
23 |   - You want to increase `pixels_per_image`, `imgs_per_gpu` and use more gpus.
24 |     The number of sampled rays / pixels per minibatch is computed as the number of gpus x imgs_per_gpu x pixels_per_image. 
25 |     Also see the note `here <https://lab4d-org.github.io/lab4d/tutorials/multi_video_cat.html#training>`__.
26 | 
27 | - Training on >50 videos might cause the following os error::
28 | 
29 |    [Errno 24] Too many open files
30 | 
31 |   - To check the current file limit, run::
32 |     
33 |         ulimit -S -n
34 | 
35 |     To increate open file limit to 4096, run::
36 |       
37 |         ulimit -u -n 4096
38 | 
39 | - Multi-GPU training hangs but single-GPU training works fine.
40 | 
41 |   - Run training script with `NCCL_P2P_DISABLE=1 bash scripts/train.sh ...` to disable direct GPU-to-GPU (P2P) communication. See discussion `here <https://github.com/NVIDIA/nccl/issues/631>`__.
42 |   


--------------------------------------------------------------------------------
/docs/source/resize_vids.py:
--------------------------------------------------------------------------------
 1 | # python source/resize_vids.py
 2 | import os
 3 | import numpy as np
 4 | import imageio
 5 | from PIL import Image
 6 | 
 7 | src_dir = "source/_static/media"
 8 | dst_dir = "source/_static/media_resized/"
 9 | max_dim = 640 * 640
10 | video_exts = [".mp4", ".avi", ".mov", ".flv", ".mkv", ".wmv"]
11 | 
12 | # check for destination directory and create if it doesn't exist
13 | if not os.path.exists(dst_dir):
14 |     os.makedirs(dst_dir)
15 | 
16 | # iterate over video files in source directory
17 | for filename in os.listdir(src_dir):
18 |     # check if file is a video, ignoring the case of the extension
19 |     if any(filename.lower().endswith(ext) for ext in video_exts):
20 |         # add other conditions if there are other video formats
21 |         src_filepath = os.path.join(src_dir, filename)
22 |         dst_filepath = os.path.splitext(filename)[0] + ".mp4"
23 |         dst_filepath = os.path.join(dst_dir, dst_filepath)
24 | 
25 |         reader = imageio.get_reader(src_filepath)
26 |         fps = reader.get_meta_data()["fps"]
27 | 
28 |         # obtain video dimensions
29 |         first_frame = reader.get_data(0)
30 |         orig_height, orig_width = first_frame.shape[:2]
31 | 
32 |         # check if resolution is greater than 640x640
33 |         if orig_height * orig_width > max_dim:
34 |             print("Resizing video: " + filename)
35 |             # resize maintaining aspect ratio
36 |             ratio = np.sqrt(max_dim / (orig_height * orig_width))
37 |             new_width = int(orig_width * ratio)
38 |             new_height = int(orig_height * ratio)
39 | 
40 |             writer = imageio.get_writer(dst_filepath, fps=fps)
41 | 
42 |             # iterate over frames in the video
43 |             for i, frame in enumerate(reader):
44 |                 frame = Image.fromarray(frame)
45 |                 frame = frame.resize((new_width, new_height), Image.ANTIALIAS)
46 |                 writer.append_data(np.array(frame))
47 | 
48 |             writer.close()
49 |         else:
50 |             # copy video to destination directory
51 |             print("Copying video: " + filename)
52 |             os.system("cp " + src_filepath + " " + dst_filepath)
53 | 
54 | print("Video resizing is complete!")
55 | 


--------------------------------------------------------------------------------
/docs/source/tutorials/index.rst:
--------------------------------------------------------------------------------
 1 | .. Lab4D documentation master file, created by
 2 |    sphinx-quickstart on Fri Jun  2 20:54:08 2023.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Tutorials
 7 | =================================
 8 | 
 9 | Overview
10 | ---------------------------------
11 | Inferring 4D representations given 2D observations is challenging due to its under-constrained nature. 
12 | With recent advances in differentiable rendering, visual correspondence and segmentation, we built an optimization framework that 
13 | reconstructs dense 4D structures with test-time optimization, by minimizing the different between the rendered 2D images and the input observations.
14 | 
15 | The tutorials introduce a complete workflow of Lab4D. We'll use the method and dataset from the following papers:
16 | 
17 | - `BANMo: Building Animatable 3D Neural Models from Many Casual Videos <https://banmo-www.github.io/>`_, CVPR 2022.
18 | - `RAC: Reconstructing Animatable Categories from Videos <https://gengshan-y.github.io/rac-www/>`_, CVPR 2023.
19 | - `Total-Recon: Deformable Scene Reconstruction for Embodied View Synthesis <https://andrewsonga.github.io/totalrecon/>`_, ICCV 2023.
20 | 
21 | `The tutorials assumes a basic familiarity with Python and Differentiable Rendering concepts.`
22 | 
23 | Each of the tutorial can be executed in a couple of ways:
24 | 
25 | - **Custom videos**: This option allows you to train a model on your own videos.
26 | - **Preprocessed data**: This option skips the preprocessing step and train models on the `preprocessed data </lab4d/data_models.html>`_ we provided.
27 | - **Render-only**: This option skips model training and allows you to render the `pre-trained model weights </lab4d/data_models.html>`_ we provided.
28 | 
29 | 
30 | Content
31 | ---------------------------------
32 | .. toctree::
33 |    :maxdepth: 1
34 | 
35 |    arbitrary_video
36 |    single_video_cat
37 |    multi_video_cat
38 |    category_model
39 |    preprocessing
40 | 
41 | .. Indices and tables
42 | .. ==================
43 | 
44 | .. * :ref:`genindex`
45 | .. * :ref:`modindex`
46 | .. * :ref:`search`
47 | 


--------------------------------------------------------------------------------
/docs/source/tutorials/preprocessing.rst:
--------------------------------------------------------------------------------
 1 | 5. Pre-process custom videos
 2 | ========================================
 3 | 
 4 | In this tutorial, we show how to preprocess custom videos that can be later used for training. We provide some 
 5 | `raw videos </lab4d/data_models.html#raw-videos>`_ for you to try out. 
 6 | The download links are provided as `database/vid_data/$seqname`, where `$seqname`` is the name of the sequence.
 7 | 
 8 | Taking `cat-pikachu-0` in the `second tutorial </lab4d/tutorials/single_video_cat.html>`_ for example, 
 9 | run the following to download and process the data::
10 | 
11 |   # Args: sequence name, text prompt (segmentation), category from {human, quad, other} (camera viewpoint), gpu id
12 |   python scripts/run_preprocess.py cat-pikachu-0 cat quad "0"
13 | 
14 | .. note::
15 |     To preprocess other videos, create a folder named `database/raw/$seqname`, move the videos into it, and run the above command.
16 | 
17 | `Next, we will get into the details of processing.`
18 | 
19 | Frame filtering
20 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
21 | By default, we first remove near-static frames (i.e., frames without motion or with small motion) since they do not provide useful extra signal for reconstruction.
22 | To do so, we run optical flow over consecutive frames and skip a frame if the median flow magnitude is smaller than a threshold.
23 | 
24 | .. note::
25 |     There is a flag in `scripts/run_preprocess.py`` that you can set to False to turn on/off frame filtering.
26 | 
27 | Segmentation
28 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
29 | 
30 | We provide a web GUI and a command line interface for object segmentation. 
31 | 
32 | **Interactive segmentation**: `Track-Anything <https://github.com/gaomingqi/Track-Anything>`_ will be used given text prompt "other", e.g.,::
33 |   
34 |     python scripts/run_preprocess.py cat-pikachu-0 other quad "0"
35 | 
36 | It creates a web interfaces and asks the user to specify point prompts on the object of interest.
37 | 
38 | 
39 | **Automatic segmentation**: `Grounding-DINO <https://github.com/IDEA-Research/GroundingDINO>`_ will be used to determin which object to track 
40 | in the first frame given a valid text prompt e.g., ::
41 |     
42 |     python scripts/run_preprocess.py cat-pikachu-0 cat quad "0"
43 | 
44 |   
45 | .. note::
46 | 
47 |   There is a flag in `scripts/run_preprocess.py`` that switches the segmentation method.
48 | 
49 | 
50 | Object-to-camera transformations
51 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
52 | 
53 | For human and quadruped animals, we use a viewpoint network (presented in BANMo) to estimate the camera viewpoint / rotation with regard to a canonical 3D coordinate.
54 | 
55 | For other categories, user will be asked to annotate camera viewpoints (by aligning the orientation of a reference 3D model to the input image)  for a few frames as shown below.
56 | 
57 | .. raw:: html
58 | 
59 |   <div style="display: flex; justify-content: center;">
60 |     <image width="100%" src="/lab4d/_static/images/camera_annot.png"> </image>
61 |   </div>
62 | 
63 | .. note::
64 | 
65 |   To align the 3D model with the provided image, utilize the sidebar to specify the camera's roll, elevation, and azimuth angles. After adjusting each frame, ensure you click 'save.' Once you've completed adjustments for all the videos, click 'exit.'
66 |   We suggest making an annotation every time the object turns 90 degrees, such as when it changes from a front-facing position to facing left.
67 |   
68 |   In the `scripts/run_preprocess.py` file, there's a flag that allows you to change the method used for camera estimation."
69 |   
70 | After getting the sparse annotations, we run camera registration that propogates the rotation annotations using optical flow and monocular depth.
71 | Camera translations are approximated with 2D object center and size (from segmentation) assuming a orthographic camera model. 
72 | 
73 | 
74 | Parallelizing the pre-processing
75 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
76 | 
77 | Preprocessing 10 videos takes about 90 minutes on a single device. To speed up the pre-processing, 
78 | we can parallelize tasks over multiple gpus with the following::
79 | 
80 |   # Args: sequence name, text prompt for segmentation, category from {human, quad, other} for camera viewpoint, gpu id
81 |   python scripts/run_preprocess.py cat-pikachu animal quad "0,1,2,3"
82 | 
83 | 
84 | Visit other `tutorials </lab4d/tutorials/#content>`_.


--------------------------------------------------------------------------------
/docs/template/module.rst_t:
--------------------------------------------------------------------------------
 1 | {%- if show_headings %}
 2 | {{- [basename, "module"] | join(' ') | e | heading }}
 3 | 
 4 | {% endif -%}
 5 | .. automodule:: {{ qualname }}
 6 | {%- for option in automodule_options %}
 7 |    :{{ option }}:
 8 | {%- endfor %}
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/template/package.rst_t:
--------------------------------------------------------------------------------
 1 | {%- macro automodule(modname, options) -%}
 2 | .. automodule:: {{ modname }}
 3 | {%- for option in options %}
 4 |    :{{ option }}:
 5 | {%- endfor %}
 6 | {%- endmacro %}
 7 | 
 8 | {%- macro toctree(docnames) -%}
 9 | .. toctree::
10 |    :maxdepth: {{ maxdepth }}
11 | {% for docname in docnames %}
12 |    {{ docname }}
13 | {%- endfor %}
14 | {%- endmacro %}
15 | 
16 | {%- if is_namespace %}
17 | {{- [pkgname, "namespace"] | join(" ") | e | heading }}
18 | {% else %}
19 | {{- [pkgname, "package"] | join(" ") | e | heading }}
20 | {% endif %}
21 | 
22 | {%- if is_namespace %}
23 | .. py:module:: {{ pkgname }}
24 | {% endif %}
25 | 
26 | {%- if modulefirst and not is_namespace %}
27 | {{ automodule(pkgname, automodule_options) }}
28 | {% endif %}
29 | 
30 | {%- if subpackages %}
31 | Subpackages
32 | -----------
33 | 
34 | {{ toctree(subpackages) }}
35 | {% endif %}
36 | 
37 | {%- if submodules %}
38 | {% if separatemodules %}
39 | {{ toctree(submodules) }}
40 | {% else %}
41 | {%- for submodule in submodules %}
42 | {% if show_headings %}
43 | {{- submodule | e | heading(2) }}
44 | {% endif %}
45 | {{ automodule(submodule, automodule_options) }}
46 | {% endfor %}
47 | {%- endif %}
48 | {%- endif %}


--------------------------------------------------------------------------------
/docs/template/toc.rst_t:
--------------------------------------------------------------------------------
1 | {{ header | heading }}
2 | 
3 | .. toctree::
4 |    :maxdepth: {{ maxdepth }}
5 | {% for docname in docnames %}
6 |    {{ docname }}
7 | {%- endfor %}
8 | 
9 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: lab4d
 2 | channels:
 3 |   - pytorch
 4 |   - nvidia
 5 |   - conda-forge
 6 | dependencies:
 7 |   - python=3.9
 8 |   - setuptools=66.0.0
 9 |   - pip
10 |   - pytorch==2.0.0=py3.9_cuda11.7_cudnn8.5.0_0
11 |   - torchvision
12 |   - cudatoolkit-dev=11.7
13 |   - gcc_linux-64=10
14 |   - gxx_linux-64=10
15 |   - matplotlib
16 |   - ninja
17 |   - absl-py 
18 |   - tensorboard 
19 |   - trimesh 
20 |   - scikit-image
21 |   - opencv 
22 |   - einops 
23 |   - numba 
24 |   - gdown 
25 |   - scikit-learn 
26 |   - psutil 
27 |   - av 
28 |   - plotly
29 |   - imageio
30 |   - imageio-ffmpeg
31 |   - tqdm
32 |   - pip:
33 |     - pysdf
34 |     - gradio==3.49.0
35 |     - timm==0.6.7
36 |     - detectron2 @ git+https://github.com/facebookresearch/detectron2.git@e9f7e2b
37 |     - segment_anything @ git+https://github.com/facebookresearch/segment-anything.git
38 |     - groundingdino @ git+https://github.com/IDEA-Research/GroundingDINO.git
39 |     - openmim
40 |     - pyrender
41 | 


--------------------------------------------------------------------------------
/lab4d/__init__.py:
--------------------------------------------------------------------------------
 1 | # Decorate all modules with @record_function and @record_class
 2 | import lab4d.dataloader.data_utils
 3 | import lab4d.dataloader.vidloader
 4 | import lab4d.engine.model
 5 | import lab4d.engine.train_utils
 6 | import lab4d.engine.trainer
 7 | import lab4d.nnutils.appearance
 8 | import lab4d.nnutils.base
 9 | import lab4d.nnutils.deformable
10 | import lab4d.nnutils.embedding
11 | import lab4d.nnutils.feature
12 | import lab4d.nnutils.intrinsics
13 | import lab4d.nnutils.multifields
14 | import lab4d.nnutils.nerf
15 | import lab4d.nnutils.pose
16 | import lab4d.nnutils.skinning
17 | import lab4d.nnutils.time
18 | import lab4d.nnutils.visibility
19 | import lab4d.nnutils.warping
20 | import lab4d.utils.cam_utils
21 | import lab4d.utils.camera_utils
22 | import lab4d.utils.geom_utils
23 | import lab4d.utils.io
24 | import lab4d.utils.loss_utils
25 | import lab4d.utils.numpy_utils
26 | import lab4d.utils.quat_transform
27 | import lab4d.utils.render_utils
28 | import lab4d.utils.skel_utils
29 | import lab4d.utils.torch_utils
30 | import lab4d.utils.transforms
31 | import lab4d.utils.vis_utils
32 | from lab4d.utils.profile_utils import decorate_module
33 | 
34 | decorate_module(lab4d.dataloader.data_utils)
35 | decorate_module(lab4d.dataloader.vidloader)
36 | decorate_module(lab4d.engine.model)
37 | decorate_module(lab4d.engine.trainer)
38 | decorate_module(lab4d.engine.train_utils)
39 | decorate_module(lab4d.nnutils.appearance)
40 | decorate_module(lab4d.nnutils.base)
41 | decorate_module(lab4d.nnutils.deformable)
42 | decorate_module(lab4d.nnutils.embedding)
43 | decorate_module(lab4d.nnutils.feature)
44 | decorate_module(lab4d.nnutils.intrinsics)
45 | decorate_module(lab4d.nnutils.multifields)
46 | decorate_module(lab4d.nnutils.nerf)
47 | decorate_module(lab4d.nnutils.pose)
48 | decorate_module(lab4d.nnutils.skinning)
49 | decorate_module(lab4d.nnutils.time)
50 | decorate_module(lab4d.nnutils.visibility)
51 | decorate_module(lab4d.nnutils.warping)
52 | decorate_module(lab4d.utils.camera_utils)
53 | decorate_module(lab4d.utils.cam_utils)
54 | decorate_module(lab4d.utils.geom_utils)
55 | decorate_module(lab4d.utils.io)
56 | decorate_module(lab4d.utils.loss_utils)
57 | decorate_module(lab4d.utils.numpy_utils)
58 | decorate_module(lab4d.utils.quat_transform)
59 | decorate_module(lab4d.utils.render_utils)
60 | decorate_module(lab4d.utils.skel_utils)
61 | decorate_module(lab4d.utils.torch_utils)
62 | decorate_module(lab4d.utils.transforms)
63 | decorate_module(lab4d.utils.vis_utils)
64 | 


--------------------------------------------------------------------------------
/lab4d/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | import os
 3 | 
 4 | from absl import flags
 5 | 
 6 | opts = flags.FLAGS
 7 | 
 8 | 
 9 | class TrainModelConfig:
10 |     # weights of reconstruction terms
11 |     flags.DEFINE_float("mask_wt", 0.1, "weight for silhouette loss")
12 |     flags.DEFINE_float("rgb_wt", 0.1, "weight for color loss")
13 |     flags.DEFINE_float("depth_wt", 1e-4, "weight for depth loss")
14 |     flags.DEFINE_float("flow_wt", 0.5, "weight for flow loss")
15 |     flags.DEFINE_float("vis_wt", 1e-2, "weight for visibility loss")
16 |     flags.DEFINE_float("feature_wt", 1e-2, "weight for feature reconstruction loss")
17 |     flags.DEFINE_float("feat_reproj_wt", 5e-2, "weight for feature reprojection loss")
18 | 
19 |     # weights of regularization terms
20 |     flags.DEFINE_float(
21 |         "reg_visibility_wt", 1e-4, "weight for visibility regularization"
22 |     )
23 |     flags.DEFINE_float("reg_eikonal_wt", 1e-3, "weight for eikonal regularization")
24 |     flags.DEFINE_float(
25 |         "reg_deform_cyc_wt", 0.01, "weight for deform cyc regularization"
26 |     )
27 |     flags.DEFINE_float("reg_delta_skin_wt", 5e-3, "weight for delta skinning reg")
28 |     flags.DEFINE_float("reg_skin_entropy_wt", 5e-4, "weight for delta skinning reg")
29 |     flags.DEFINE_float(
30 |         "reg_gauss_skin_wt", 1e-3, "weight for gauss skinning consistency"
31 |     )
32 |     flags.DEFINE_float("reg_cam_prior_wt", 0.1, "weight for camera regularization")
33 |     flags.DEFINE_float("reg_skel_prior_wt", 0.1, "weight for skeleton regularization")
34 |     flags.DEFINE_float(
35 |         "reg_gauss_mask_wt", 0.01, "weight for gauss mask regularization"
36 |     )
37 |     flags.DEFINE_float("reg_soft_deform_wt", 100.0, "weight for soft deformation reg")
38 | 
39 |     # model
40 |     flags.DEFINE_string("field_type", "fg", "{bg, fg, comp}")
41 |     flags.DEFINE_string(
42 |         "fg_motion", "rigid", "{rigid, dense, bob, skel-human, skel-quad}"
43 |     )
44 |     flags.DEFINE_bool("single_inst", True, "assume the same morphology over objs")
45 | 
46 | 
47 | class TrainOptConfig:
48 |     # io-related
49 |     flags.DEFINE_string("seqname", "cat", "name of the sequence")
50 |     flags.DEFINE_string("logname", "tmp", "name of the saved log")
51 |     flags.DEFINE_string(
52 |         "data_prefix", "crop", "prefix of the data entries, {crop, full}"
53 |     )
54 |     flags.DEFINE_integer("train_res", 256, "size of training images")
55 |     flags.DEFINE_string("logroot", "logdir/", "root directory for log files")
56 |     flags.DEFINE_string("load_suffix", "", "sufix of params, {latest, 0, 10, ...}")
57 |     flags.DEFINE_string("feature_type", "dinov2", "{dinov2, cse}")
58 |     flags.DEFINE_string("load_path", "", "path to load pretrained model")
59 | 
60 |     # accuracy-related
61 |     flags.DEFINE_float("learning_rate", 5e-4, "learning rate")
62 |     flags.DEFINE_integer("num_rounds", 20, "number of rounds to train")
63 |     flags.DEFINE_integer("iters_per_round", 200, "number of iterations per round")
64 |     flags.DEFINE_integer("imgs_per_gpu", 128, "images samples per iter, per gpu")
65 |     flags.DEFINE_integer("pixels_per_image", 16, "pixel samples per image")
66 |     # flags.DEFINE_integer("imgs_per_gpu", 1, "size of minibatches per iter")
67 |     # flags.DEFINE_integer("pixels_per_image", 4096, "number of pixel samples per image")
68 |     flags.DEFINE_boolean(
69 |         "freeze_bone_len", False, "do not change bone length of skeleton"
70 |     )
71 |     flags.DEFINE_boolean(
72 |         "reset_steps",
73 |         True,
74 |         "reset steps of loss scheduling, set to False if resuming training",
75 |     )
76 | 
77 |     # efficiency-related
78 |     flags.DEFINE_integer("ngpu", 1, "number of gpus to use")
79 |     flags.DEFINE_integer("num_workers", 2, "Number of workers for dataloading")
80 |     flags.DEFINE_integer("eval_res", 64, "size used for eval visualizations")
81 |     flags.DEFINE_integer("save_freq", 10, "params saving frequency")
82 |     flags.DEFINE_boolean("profile", False, "profile the training loop")
83 | 
84 | 
85 | def get_config():
86 |     return opts.flag_values_dict()
87 | 
88 | 
89 | def save_config():
90 |     save_dir = os.path.join(opts.logroot, "%s-%s" % (opts.seqname, opts.logname))
91 |     os.makedirs(save_dir, exist_ok=True)
92 |     opts_path = os.path.join(save_dir, "opts.log")
93 |     if os.path.exists(opts_path):
94 |         os.remove(opts_path)
95 |     opts.append_flags_into_file(opts_path)
96 | 


--------------------------------------------------------------------------------
/lab4d/config_omega.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | from omegaconf import DictConfig, OmegaConf
 3 | 
 4 | # Define the hierarchical configuration using a dictionary
 5 | config = DictConfig(
 6 |     {
 7 |         "train": {
 8 |             "weights": {
 9 |                 "recon": {
10 |                     "mask_wt": 0.1,  # weight for silhouette loss
11 |                     "rgb_wt": 0.1,  # weight for color loss
12 |                     "depth_wt": 0.01,  # weight for depth loss
13 |                     "flow_wt": 0.5,  # weight for flow loss
14 |                     "vis_wt": 0.01,  # weight for visibility loss
15 |                     "feature_wt": 0.01,  # weight for feature reconstruction loss
16 |                     "feat_reproj_wt": 0.05,  # weight for feature reprojection loss
17 |                 },
18 |                 "reg": {
19 |                     "visibility_wt": 1e-3,  # weight for visibility regularization
20 |                     "eikonal_wt": 1e-5,  # weight for eikonal regularization
21 |                     "deform_cyc_wt": 0.01,  # weight for deform cyc regularization
22 |                     "gauss_skin_wt": 1,  # weight for gauss skinning consistency
23 |                 },
24 |             },
25 |             "model": {
26 |                 "field_type": "bg",  # {bg, fg, comp}
27 |                 "fg_motion": "rigid",  # {rigid, dense, bob, skel}
28 |                 "single_inst": True,  # assume the same morphology over objs
29 |             },
30 |             "io": {
31 |                 "seqname": "cat",  # name of the sequence
32 |                 "logname": "tmp",  # name of the saved log
33 |                 "data_prefix": "full",  # prefix of the data entries
34 |                 "train_res": 256,  # size of training images
35 |                 "logroot": "logdir/",  # root directory for log files
36 |                 "load_suffix": "",  # sufix of params, {latest, 0, 10, ...}
37 |                 "save_freq": 10,  # params saving frequency
38 |             },
39 |             "optim": {
40 |                 "learning_rate": 5e-4,  # learning rate
41 |                 "num_rounds": 20,  # number of rounds to trainn
42 |                 "iters_per_round": 200,  # number of iterations per round
43 |                 "imgs_per_gpu": 128,  # images samples per iter, per gpu
44 |                 "pixels_per_image": 16,  # pixel samples per image
45 |                 "ngpu": 1,  # number of gpus to use
46 |                 "num_workers": 2,  # number of workers for dataloading
47 |             },
48 |             "eval_res": 64,  # size used for eval visualizations
49 |             "profile": False,  # profile the training loop
50 |         },
51 |     }
52 | )
53 | 
54 | 
55 | def get_config():
56 |     return opts.flag_values_dict()
57 | 
58 | 
59 | def save_config():
60 |     save_dir = os.path.join(opts.logroot, opts.logname)
61 |     os.makedirs(save_dir, exist_ok=True)
62 |     opts_path = os.path.join(save_dir, "opts.log")
63 |     if os.path.exists(opts_path):
64 |         os.remove(opts_path)
65 |     opts.append_flags_into_file(opts_path)
66 | 
67 | 
68 | # # Convert the configuration to a dictionary
69 | # config_dict = OmegaConf.to_container(config)
70 | 
71 | # # Convert the dictionary back to a configuration
72 | # config2 = OmegaConf.create(config_dict)
73 | 


--------------------------------------------------------------------------------
/lab4d/dataloader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/dataloader/__init__.py


--------------------------------------------------------------------------------
/lab4d/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/engine/__init__.py


--------------------------------------------------------------------------------
/lab4d/engine/train_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | import os
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def get_local_rank():
 8 |     try:
 9 |         return int(os.environ["LOCAL_RANK"])
10 |     except:
11 |         print("LOCAL_RANK not found, set to 0")
12 |         return 0
13 | 
14 | 
15 | class DataParallelPassthrough(torch.nn.parallel.DistributedDataParallel):
16 |     """For multi-GPU access, forward attributes to the inner module."""
17 | 
18 |     def __getattr__(self, name):
19 |         try:
20 |             return super().__getattr__(name)
21 |         except AttributeError:
22 |             return getattr(self.module, name)
23 | 
24 |     def __delattr__(self, name):
25 |         try:
26 |             return super().__delattr__(name)
27 |         except AttributeError:
28 |             return delattr(self.module, name)
29 | 


--------------------------------------------------------------------------------
/lab4d/nnutils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/nnutils/__init__.py


--------------------------------------------------------------------------------
/lab4d/nnutils/appearance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from lab4d.nnutils.time import TimeMLP
 6 | 
 7 | 
 8 | class AppearanceEmbedding(TimeMLP):
 9 |     """Encode global appearance code over time with an MLP
10 | 
11 |     Args:
12 |         frame_info (Dict): Metadata about the frames in a dataset
13 |         appr_channels (int): Number of channels in appearance codes
14 |         D (int): Number of linear layers
15 |         W (int): Number of hidden units in each MLP layer
16 |         num_freq_t (int): Number of frequencies in the time embedding
17 |         skips (List(int)): List of layers to add skip connections at
18 |         activation (Function): Activation function to use (e.g. nn.ReLU())
19 |     """
20 | 
21 |     def __init__(
22 |         self,
23 |         frame_info,
24 |         appr_channels,
25 |         D=2,
26 |         W=64,
27 |         num_freq_t=6,
28 |         skips=[],
29 |         activation=nn.ReLU(True),
30 |         time_scale=0.1,
31 |     ):
32 |         self.appr_channels = appr_channels
33 |         # xyz encoding layers
34 |         super().__init__(
35 |             frame_info,
36 |             D=D,
37 |             W=W,
38 |             num_freq_t=num_freq_t,
39 |             skips=skips,
40 |             activation=activation,
41 |             time_scale=time_scale,
42 |         )
43 | 
44 |         # output layers
45 |         self.output = nn.Linear(W, appr_channels)
46 | 
47 |     def forward(self, t_embed):
48 |         """
49 |         Args:
50 |             t: (..., self.W) Input time embeddings
51 |         Returns:
52 |             out: (..., appr_channels) Output appearance codes
53 |         """
54 |         t_feat = super().forward(t_embed)
55 |         out = self.output(t_feat)
56 |         return out
57 | 


--------------------------------------------------------------------------------
/lab4d/nnutils/intrinsics.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from lab4d.nnutils.time import TimeMLP
  7 | 
  8 | 
  9 | class IntrinsicsMLP(TimeMLP):
 10 |     """Encode camera intrinsics over time with an MLP
 11 | 
 12 |     Args:
 13 |         intrinsics: (N,4) Camera intrinsics (fx, fy, cx, cy)
 14 |         frame_info (Dict): Metadata about the frames in a dataset
 15 |         D (int): Number of linear layers
 16 |         W (int): Number of hidden units in each MLP layer
 17 |         num_freq_t (int): Number of frequencies in the time embedding
 18 |         skips (List(int)): List of layers to add skip connections at
 19 |         activation (Function): Activation function to use (e.g. nn.ReLU())
 20 |         time_scale (float): Control the sensitivity to time by scaling.
 21 |           Lower values make the module less sensitive to time.
 22 |     """
 23 | 
 24 |     def __init__(
 25 |         self,
 26 |         intrinsics,
 27 |         frame_info=None,
 28 |         D=5,
 29 |         W=256,
 30 |         num_freq_t=0,
 31 |         skips=[],
 32 |         activation=nn.ReLU(True),
 33 |         time_scale=0.1,
 34 |     ):
 35 |         if frame_info is None:
 36 |             num_frames = len(intrinsics)
 37 |             frame_info = {
 38 |                 "frame_offset": np.asarray([0, num_frames]),
 39 |                 "frame_mapping": list(range(num_frames)),
 40 |                 "frame_offset_raw": np.asarray([0, num_frames]),
 41 |             }
 42 |         # xyz encoding layers
 43 |         super().__init__(
 44 |             frame_info,
 45 |             D=D,
 46 |             W=W,
 47 |             num_freq_t=num_freq_t,
 48 |             skips=skips,
 49 |             activation=activation,
 50 |             time_scale=time_scale,
 51 |         )
 52 | 
 53 |         # output layers
 54 |         self.focal = nn.Sequential(
 55 |             nn.Linear(W, W // 2),
 56 |             activation,
 57 |             nn.Linear(W // 2, 2),
 58 |         )
 59 | 
 60 |         # camera intrinsics: fx,fy,px,py
 61 |         self.base_logfocal = nn.Parameter(torch.zeros(self.time_embedding.num_vids, 2))
 62 |         self.base_ppoint = nn.Parameter(torch.zeros(self.time_embedding.num_vids, 2))
 63 |         self.register_buffer(
 64 |             "init_vals", torch.tensor(intrinsics, dtype=torch.float32), persistent=False
 65 |         )
 66 | 
 67 |     def mlp_init(self):
 68 |         """Initialize camera intrinsics from external values"""
 69 |         intrinsics = self.init_vals
 70 |         frame_offset = self.get_frame_offset()
 71 |         self.base_logfocal.data = intrinsics[frame_offset[:-1], :2].log()
 72 |         self.base_ppoint.data = intrinsics[frame_offset[:-1], 2:]
 73 |         super().mlp_init(termination_loss=1.0)
 74 | 
 75 |     def forward(self, t_embed):
 76 |         """
 77 |         Args:
 78 |             t_embed: (..., self.W) Input Fourier time embeddings
 79 |         Returns:
 80 |             out: (..., 4) Camera intrinsics
 81 |         """
 82 |         t_feat = super().forward(t_embed)
 83 |         focal = self.focal(t_feat).exp()
 84 |         return focal
 85 | 
 86 |     def get_vals(self, frame_id=None):
 87 |         """Compute camera intrinsics at the given frames.
 88 | 
 89 |         Args:
 90 |             frame_id: (...,) Frame id. If None, compute at all frames
 91 |         Returns:
 92 |             intrinsics: (..., 4) Output camera intrinsics
 93 |         """
 94 |         t_embed = self.time_embedding(frame_id)
 95 |         focal = self.forward(t_embed)
 96 |         if frame_id is None:
 97 |             inst_id = self.time_embedding.frame_to_vid
 98 |         else:
 99 |             inst_id = self.time_embedding.raw_fid_to_vid[frame_id]
100 |         base_focal = self.base_logfocal[inst_id].exp()
101 |         base_ppoint = self.base_ppoint[inst_id]
102 |         focal = focal * base_focal
103 |         # force square pixels
104 |         focal[..., :] = (focal + focal.flip(-1)) / 2
105 |         ppoint = base_ppoint.expand_as(focal)
106 |         intrinsics = torch.cat([focal, ppoint], dim=-1)
107 |         return intrinsics
108 | 


--------------------------------------------------------------------------------
/lab4d/nnutils/time.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from lab4d.nnutils.base import BaseMLP
  8 | from lab4d.nnutils.embedding import PosEmbedding, TimeEmbedding, get_fourier_embed_dim
  9 | 
 10 | 
 11 | class TimeMLP(BaseMLP):
 12 |     """MLP that encodes a quantity over time.
 13 | 
 14 |     Args:
 15 |         frame_info (Dict): Metadata about the frames in a dataset
 16 |         D (int): Number of linear layers
 17 |         W (int): Number of hidden units in each MLP layer
 18 |         num_freq_t (int): Number of frequencies in the time embedding
 19 |         skips (List(int)): List of layers to add skip connections at
 20 |         activation (Function): Activation function to use (e.g. nn.ReLU())
 21 |         time_scale (float): Control the sensitivity to time by scaling.
 22 |             Lower values make the module less sensitive to time.
 23 |     """
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         frame_info,
 28 |         D=5,
 29 |         W=256,
 30 |         num_freq_t=6,
 31 |         skips=[],
 32 |         activation=nn.ReLU(True),
 33 |         time_scale=1.0,
 34 |     ):
 35 |         frame_offset = frame_info["frame_offset"]
 36 |         # frame_offset_raw = frame_info["frame_offset_raw"]
 37 |         if num_freq_t > 0:
 38 |             max_ts = (frame_offset[1:] - frame_offset[:-1]).max()
 39 |             # scale according to input frequency: num_frames = 64 -> freq = 6
 40 |             num_freq_t = np.log2(max_ts / 64) + num_freq_t
 41 |             # # scale according to input frequency: num_frames = 512 -> freq = 6
 42 |             # num_freq_t = np.log2(max_ts / 512) + num_freq_t
 43 |             num_freq_t = int(np.rint(num_freq_t))
 44 |             # print("max video len: %d, override num_freq_t to %d" % (max_ts, num_freq_t))
 45 | 
 46 |         super().__init__(
 47 |             D=D,
 48 |             W=W,
 49 |             in_channels=W,
 50 |             out_channels=W,
 51 |             skips=skips,
 52 |             activation=activation,
 53 |             final_act=True,
 54 |         )
 55 | 
 56 |         self.time_embedding = TimeEmbedding(
 57 |             num_freq_t, frame_info, out_channels=W, time_scale=time_scale
 58 |         )
 59 | 
 60 |         def loss_fn(y):
 61 |             x = self.get_vals()
 62 |             return F.mse_loss(x, y)
 63 | 
 64 |         self.loss_fn = loss_fn
 65 | 
 66 |     def forward(self, t_embed):
 67 |         """
 68 |         Args:
 69 |             t_embed: (..., self.W) Time Fourier embeddings
 70 |         Returns:
 71 |             out: (..., self.W) Time-dependent features
 72 |         """
 73 |         t_feat = super().forward(t_embed)
 74 |         return t_feat
 75 | 
 76 |     def mlp_init(self, loss_fn=None, termination_loss=0.0001):
 77 |         """Initialize the time embedding MLP to match external priors.
 78 |         `self.init_vals` is defined by the child class, and could be
 79 |         (nframes, 4, 4) camera poses or (nframes, 4) camera intrinsics
 80 |         """
 81 |         if loss_fn is None:
 82 |             loss_fn = self.loss_fn
 83 | 
 84 |         optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
 85 | 
 86 |         i = 0
 87 |         while True:
 88 |             optimizer.zero_grad()
 89 |             loss = loss_fn(self.init_vals)
 90 |             loss.backward()
 91 |             optimizer.step()
 92 |             if i % 100 == 0:
 93 |                 print(f"iter: {i}, loss: {loss.item():.4f}")
 94 |             i += 1
 95 |             if loss < termination_loss:
 96 |                 break
 97 | 
 98 |     def compute_distance_to_prior(self):
 99 |         """Compute L2-distance from current SE(3) / intrinsics values to
100 |         external priors.
101 | 
102 |         Returns:
103 |             loss (0,): Mean squared error to priors
104 |         """
105 |         return self.loss_fn(self.init_vals)
106 | 
107 |     def get_vals(self, frame_id=None):
108 |         """Compute values at the given frames.
109 | 
110 |         Args:
111 |             frame_id: (...,) Frame id. If None, evaluate at all frames
112 |         Returns:
113 |             pred: Predicted outputs
114 |         """
115 |         t_embed = self.time_embedding(frame_id)
116 |         pred = self.forward(t_embed)
117 |         return pred
118 | 
119 |     def get_mean_vals(self):
120 |         """Compute the mean embedding over all frames"""
121 |         device = self.parameters().__next__().device
122 |         t_embed = self.time_embedding.get_mean_embedding(device)
123 |         pred = self.forward(t_embed)
124 |         return pred
125 | 
126 |     def get_frame_offset(self):
127 |         """Return the number of frames before the first frame of each video"""
128 |         return self.time_embedding.frame_offset
129 | 


--------------------------------------------------------------------------------
/lab4d/nnutils/visibility.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from lab4d.nnutils.base import CondMLP
 6 | from lab4d.nnutils.embedding import PosEmbedding
 7 | 
 8 | 
 9 | class VisField(nn.Module):
10 |     """Predict a visibility score (-inf to +inf) for all 3D points
11 | 
12 |     Args:
13 |         num_inst (int): Number of distinct object instances. If --nosingle_inst
14 |             is passed, this is equal to the number of videos, as we assume each
15 |             video captures a different instance. Otherwise, we assume all videos
16 |             capture the same instance and set this to 1.
17 |         D (int): Number of linear layers
18 |         W (int): Number of hidden units in each MLP layer
19 |         num_freq_xyz (int): Number of frequencies in position embedding
20 |         inst_channels (int): Number of channels in the instance code
21 |         skips (List(int)): List of layers to add skip connections at
22 |         activation (Function): Activation function to use (e.g. nn.ReLU())
23 |     """
24 | 
25 |     def __init__(
26 |         self,
27 |         num_inst,
28 |         D=2,
29 |         W=64,
30 |         num_freq_xyz=10,
31 |         inst_channels=32,
32 |         skips=[4],
33 |         activation=nn.ReLU(True),
34 |     ):
35 |         super().__init__()
36 | 
37 |         # position and direction embedding
38 |         self.pos_embedding = PosEmbedding(3, num_freq_xyz)
39 | 
40 |         # xyz encoding layers
41 |         self.basefield = CondMLP(
42 |             num_inst=num_inst,
43 |             D=D,
44 |             W=W,
45 |             in_channels=self.pos_embedding.out_channels,
46 |             inst_channels=inst_channels,
47 |             out_channels=1,
48 |             skips=skips,
49 |             activation=activation,
50 |             final_act=False,
51 |         )
52 | 
53 |     def forward(self, xyz, inst_id=None):
54 |         """
55 |         Args:
56 |             xyz: (..., 3), xyz coordinates
57 |             inst_id: (...,) instance id, or None to use the average instance
58 |         Returns:
59 |             out: (..., 1), visibility score
60 |         """
61 |         xyz_embed = self.pos_embedding(xyz)
62 |         visibility = self.basefield(xyz_embed, inst_id)
63 |         return visibility
64 | 


--------------------------------------------------------------------------------
/lab4d/reanimate.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | # python lab4d/reanimate.py --flagfile=logdir/human-48-dinov2-skel-e120/opts.log --load_suffix latest --motion_id 20 --inst_id 0
 3 | 
 4 | import json
 5 | import os
 6 | import sys
 7 | 
 8 | import numpy as np
 9 | import torch
10 | import torch.backends.cudnn as cudnn
11 | from absl import app, flags
12 | 
13 | cwd = os.getcwd()
14 | if cwd not in sys.path:
15 |     sys.path.insert(0, cwd)
16 | 
17 | from lab4d.config import get_config
18 | from lab4d.render import construct_batch_from_opts, render
19 | from lab4d.utils.profile_utils import torch_profile
20 | from lab4d.utils.quat_transform import se3_to_quaternion_translation
21 | 
22 | cudnn.benchmark = True
23 | 
24 | 
25 | class RenderFlags:
26 |     """Flags for the renderer."""
27 | 
28 |     flags.DEFINE_integer("motion_id", 0, "motion id")
29 | 
30 | 
31 | def construct_batch_from_opts_reanimate(opts, model, data_info):
32 |     device = "cuda"
33 |     # load motion data
34 |     motion_path = "%s/%s-%s/export_%04d/fg-motion.json" % (
35 |         opts["logroot"],
36 |         opts["seqname"],
37 |         opts["logname"],
38 |         opts["motion_id"],
39 |     )
40 |     with open(motion_path, "r") as fp:
41 |         motion_data = json.load(fp)
42 |     t_articulation = np.asarray(motion_data["t_articulation"])
43 |     field2cam = np.asarray(motion_data["field2cam"])
44 | 
45 |     opts["num_frames"] = len(t_articulation)
46 | 
47 |     # joint angles
48 |     joint_so3 = np.asarray(motion_data["joint_so3"])
49 |     joint_so3 = torch.tensor(joint_so3, dtype=torch.float32, device=device)
50 | 
51 |     # root pose
52 |     field2cam = torch.tensor(field2cam, dtype=torch.float32, device=device)
53 |     field2cam = field2cam.reshape(-1, 4, 4)
54 |     field2cam = se3_to_quaternion_translation(field2cam, tuple=False)
55 | 
56 |     batch, raw_size = construct_batch_from_opts(opts, model, data_info)
57 | 
58 |     batch["joint_so3"] = joint_so3
59 |     batch["field2cam"] = {"fg": field2cam}
60 |     return batch, raw_size
61 | 
62 | 
63 | def main(_):
64 |     opts = get_config()
65 |     render(opts, construct_batch_func=construct_batch_from_opts_reanimate)
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     app.run(main)
70 | 


--------------------------------------------------------------------------------
/lab4d/tests/hat_map.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | import torch
  3 | 
  4 | from lab4d.utils.geom_utils import hat_map, so3_to_exp_map
  5 | 
  6 | 
  7 | @torch.jit.script
  8 | def hat(v: torch.Tensor) -> torch.Tensor:
  9 |     """
 10 |     Compute the Hat operator [1] of a batch of 3D vectors.
 11 | 
 12 |     Args:
 13 |         v: Batch of vectors of shape `(minibatch , 3)`.
 14 | 
 15 |     Returns:
 16 |         Batch of skew-symmetric matrices of shape
 17 |         `(minibatch, 3 , 3)` where each matrix is of the form:
 18 |             `[    0  -v_z   v_y ]
 19 |              [  v_z     0  -v_x ]
 20 |              [ -v_y   v_x     0 ]`
 21 | 
 22 |     Raises:
 23 |         ValueError if `v` is of incorrect shape.
 24 | 
 25 |     [1] https://en.wikipedia.org/wiki/Hat_operator
 26 |     """
 27 | 
 28 |     N, dim = v.shape
 29 |     if dim != 3:
 30 |         raise ValueError("Input vectors have to be 3-dimensional.")
 31 | 
 32 |     h = torch.zeros((N, 3, 3), dtype=v.dtype, device=v.device)
 33 | 
 34 |     x, y, z = v.unbind(1)
 35 | 
 36 |     h[:, 0, 1] = -z
 37 |     h[:, 0, 2] = y
 38 |     h[:, 1, 0] = z
 39 |     h[:, 1, 2] = -x
 40 |     h[:, 2, 0] = -y
 41 |     h[:, 2, 1] = x
 42 | 
 43 |     return h
 44 | 
 45 | 
 46 | def so3_exp_map(log_rot, eps=0.0001):
 47 |     """
 48 |     A helper function that computes the so3 exponential map and,
 49 |     apart from the rotation matrix, also returns intermediate variables
 50 |     that can be re-used in other functions.
 51 |     """
 52 |     _, dim = log_rot.shape
 53 |     if dim != 3:
 54 |         raise ValueError("Input tensor shape has to be Nx3.")
 55 | 
 56 |     nrms = (log_rot * log_rot).sum(1)
 57 |     # phis ... rotation angles
 58 |     rot_angles = torch.clamp(nrms, eps).sqrt()
 59 |     # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
 60 |     rot_angles_inv = 1.0 / rot_angles
 61 |     fac1 = rot_angles_inv * rot_angles.sin()
 62 |     fac2 = rot_angles_inv * rot_angles_inv * (1.0 - rot_angles.cos())
 63 |     skews = hat(log_rot)
 64 |     skews_square = torch.bmm(skews, skews)
 65 | 
 66 |     R = (
 67 |         fac1[:, None, None] * skews
 68 |         # pyre-fixme[16]: `float` has no attribute `__getitem__`.
 69 |         + fac2[:, None, None] * skews_square
 70 |         + torch.eye(3, dtype=log_rot.dtype, device=log_rot.device)[None]
 71 |     )
 72 | 
 73 |     return R
 74 | 
 75 | 
 76 | def test_hat_map():
 77 |     # Define a test input tensor
 78 |     v = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float32)
 79 |     # Compute the skew-symmetric matrices using the hat_map function
 80 |     V = hat_map(v)
 81 |     # Verify that the output has the correct shape
 82 |     assert V.shape == (3, 3, 3)
 83 |     # Verify that the output is correct
 84 |     expected_V = torch.tensor(
 85 |         [
 86 |             [[0, -3, 2], [3, 0, -1], [-2, 1, 0]],
 87 |             [[0, -6, 5], [6, 0, -4], [-5, 4, 0]],
 88 |             [[0, -9, 8], [9, 0, -7], [-8, 7, 0]],
 89 |         ],
 90 |         dtype=torch.float32,
 91 |     )
 92 |     if not torch.allclose(V, expected_V):
 93 |         print("Computed output:")
 94 |         print(V)
 95 |         print("Expected output:")
 96 |         print(expected_V)
 97 |     assert torch.allclose(V, expected_V)
 98 | 
 99 | 
100 | def test_so3_to_exp_map():
101 |     so3 = torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])
102 |     exp_map = so3_exp_map(so3)
103 |     computed_exp_map = so3_to_exp_map(so3)
104 |     if not torch.allclose(computed_exp_map, exp_map):
105 |         print("Computed output:")
106 |         print(computed_exp_map)
107 |         print("Expected output:")
108 |         print(exp_map)
109 | 
110 | 
111 | test_so3_to_exp_map()
112 | test_hat_map()
113 | 


--------------------------------------------------------------------------------
/lab4d/tests/test_gpu_map.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Jeff Tan, Carnegie Mellon University.
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | from lab4d.tests.utils import check_func
 6 | from lab4d.utils.gpu_utils import gpu_map
 7 | 
 8 | 
 9 | def func(arg1, arg2):
10 |     x = torch.ones(arg1, arg2, dtype=torch.int64, device="cuda")
11 |     return int(torch.sum(x))
12 | 
13 | 
14 | def test_gpu_map_static(n_elts):
15 |     """Test utils/proc_utils.py::gpu_map_static"""
16 | 
17 |     def impl1(n_elts):
18 |         return [(i + 1) * (i + 2) for i in range(n_elts)]
19 | 
20 |     def impl2(n_elts):
21 |         return gpu_map(func, [(x + 1, x + 2) for x in range(n_elts)], method="static")
22 | 
23 |     check_func(impl1, impl2, (n_elts,), name="gpu_map_static", niters=1)
24 | 
25 | 
26 | def test_gpu_map_dynamic(n_elts):
27 |     """Test utils/proc_utils.py::gpu_map_dynamic"""
28 | 
29 |     def impl1(n_elts):
30 |         return [(i + 1) * (i + 2) for i in range(n_elts)]
31 | 
32 |     def impl2(n_elts):
33 |         return gpu_map(func, [(x + 1, x + 2) for x in range(n_elts)], method="dynamic")
34 | 
35 |     check_func(impl1, impl2, (n_elts,), name="gpu_map_dynamic", niters=1)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     test_gpu_map_static(11)
40 |     # test_gpu_map_dynamic(11)
41 | 


--------------------------------------------------------------------------------
/lab4d/tests/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Jeff Tan, Carnegie Mellon University.
 2 | import time
 3 | from statistics import mean, stdev
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | 
 8 | 
 9 | def check_func(func1, func2, args=(), name="", niters=100, rtol=None, atol=None):
10 |     """Verify that both input functions produce identical outputs
11 | 
12 |     Args:
13 |         func1: First function to test
14 |         func2: Second function to test
15 |         args: Arguments to both functions
16 |         name: Name of this test
17 |         niters: Number of test iterations (default 5)
18 |         rtol: Relative tolerance (by default, selected based on datatype)
19 |         atol: Absolute tolerance (by default, selected based on datatype)
20 |     """
21 |     # Make sure cuda is already loaded
22 |     torch.zeros(1, dtype=torch.float32, device="cuda")
23 | 
24 |     all_t1 = []
25 |     all_t2 = []
26 |     for i in range(niters):
27 |         torch.cuda.synchronize()
28 |         t1 = time.time()
29 |         out1 = func1(*args)
30 |         torch.cuda.synchronize()
31 |         all_t1.append(time.time() - t1)
32 | 
33 |         torch.cuda.synchronize()
34 |         t2 = time.time()
35 |         out2 = func2(*args)
36 |         torch.cuda.synchronize()
37 |         all_t2.append(time.time() - t2)
38 | 
39 |         try:
40 |             assert type(out1) == type(out2)
41 |             if isinstance(out1, torch.Tensor) and isinstance(out2, torch.Tensor):
42 |                 torch.testing.assert_close(out1, out2, rtol=rtol, atol=atol)
43 |             elif isinstance(out1, np.ndarray) and isinstance(out2, np.ndarray):
44 |                 np.testing.assert_allclose(out1, out2, rtol=rtol, atol=atol)
45 |             else:
46 |                 assert all(
47 |                     elt1 == elt2 for elt1, elt2 in zip(out1, out2)
48 |                 ), f"out1={out1} but out2={out2}"
49 |         except Exception as e:
50 |             print(f"Error: {e}")
51 | 
52 |     all_t1 = all_t1[10:]  # Remove the first few iterations to account for warmup
53 |     all_t2 = all_t2[10:]
54 |     avg_t1 = 1000 * mean(all_t1)  # milliseconds
55 |     avg_t2 = 1000 * mean(all_t2)
56 |     std_t1 = 1000 * stdev(all_t1) if len(all_t1) > 1 else 0
57 |     std_t2 = 1000 * stdev(all_t2) if len(all_t1) > 1 else 0
58 | 
59 |     print(
60 |         f"Test '{name}' passed:\tavg_t1={avg_t1:.2f}ms,\tavg_t2={avg_t2:.2f}ms,"
61 |         f"\tstd_t1={std_t1:.2f}ms,\tstd_t2={std_t2:.2f}ms"
62 |     )
63 | 


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | quaternion.egg-info/
3 | dist/
4 | 


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/README.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | The code is released as [dqtorch](https://github.com/MightyChaos/dqtorch).
3 | Please refer to the repository for tutorials and general use cases.
4 | 


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/__init__.py:
--------------------------------------------------------------------------------
1 | ## Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
2 | from .mat3x3 import mat3x3_inv
3 | from .quaternion import quaternion_conjugate, quaternion_mul
4 | 


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/add_gcc_cuda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | module add gcc-6.3.0
3 | module add cuda-11.1.1
4 | 


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/backend.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
 2 | import os
 3 | 
 4 | from torch.utils.cpp_extension import load
 5 | 
 6 | _src_path = os.path.dirname(os.path.abspath(__file__))
 7 | 
 8 | nvcc_flags = [
 9 |     '-O3', '-std=c++14',
10 |     '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__',
11 | ]
12 | 
13 | if os.name == "posix":
14 |     c_flags = ['-O3', '-std=c++14']
15 | elif os.name == "nt":
16 |     c_flags = ['/O2', '/std:c++17']
17 | 
18 |     # find cl.exe
19 |     def find_cl_path():
20 |         import glob
21 |         for edition in ["Enterprise", "Professional", "BuildTools", "Community"]:
22 |             paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True)
23 |             if paths:
24 |                 return paths[0]
25 | 
26 |     # If cl.exe is not on path, try to find it.
27 |     if os.system("where cl.exe >nul 2>nul") != 0:
28 |         cl_path = find_cl_path()
29 |         if cl_path is None:
30 |             raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
31 |         os.environ["PATH"] += ";" + cl_path
32 | 
33 | _backend = load(name='_quaternion',
34 |                 extra_cflags=c_flags,
35 |                 extra_cuda_cflags=nvcc_flags,
36 |                 sources=[os.path.join(_src_path, 'src', f) for f in [
37 |                     'quaternion.cu',
38 |                     'matinv.cu',
39 |                     'bindings.cpp',
40 |                 ]],
41 |                 )
42 | 
43 | __all__ = ['_backend']


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/mat3x3.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
  2 | import torch
  3 | from torch.autograd import Function
  4 | from torch.autograd.function import once_differentiable
  5 | from torch.cuda.amp import custom_bwd, custom_fwd
  6 | 
  7 | try:
  8 |     import _quaternion as _backend
  9 | except ImportError:
 10 |     from .backend import _backend
 11 | 
 12 | 
 13 | class _Mat3x3_det(Function):
 14 |     @staticmethod
 15 |     @custom_fwd(cast_inputs=torch.float)
 16 |     def forward(ctx, inputs:torch.Tensor):
 17 |         B = inputs.shape[0]
 18 |         assert(inputs.shape[1] == 9)
 19 |         dtype = inputs.dtype
 20 |         device = inputs.device
 21 |         
 22 |         outputs = torch.empty(B, dtype=dtype, device=device)
 23 | 
 24 |         _backend.mat3x3_det_forward(inputs, outputs, B)
 25 |         ctx.save_for_backward(inputs)
 26 | 
 27 |         return outputs
 28 | 
 29 |     @staticmethod
 30 |     @once_differentiable
 31 |     @custom_bwd
 32 |     def backward(ctx, grad):
 33 |         return None
 34 | 
 35 | _mat3x3_det = _Mat3x3_det.apply
 36 | def mat3x3_det(inputs:torch.Tensor):
 37 |     rt_size = inputs.shape[:-2]
 38 |     outputs = _mat3x3_det(inputs.contiguous().view(-1,9))
 39 |     return outputs.view(rt_size)
 40 | 
 41 | 
 42 | class _Mat3x3_scale_adjoint(Function):
 43 |     @staticmethod
 44 |     @custom_fwd(cast_inputs=torch.half)
 45 |     def forward(ctx, inputs:torch.Tensor, scales:torch.Tensor):
 46 |         B = inputs.shape[0]
 47 |         assert(inputs.shape[1] == 9)
 48 |         dtype = inputs.dtype
 49 |         device = inputs.device    
 50 |         outputs = torch.empty(B, 9, dtype=dtype, device=device)
 51 |         _backend.mat3x3_scale_adjoint_forward(inputs, scales, outputs, B)
 52 |         ctx.save_for_backward(inputs, scales)
 53 |         return outputs
 54 | 
 55 |     @staticmethod
 56 |     @once_differentiable
 57 |     @custom_bwd
 58 |     def backward(ctx, *grad_outputs):
 59 |         return None
 60 | 
 61 | _mat3x3_scale_adjoint = _Mat3x3_scale_adjoint.apply
 62 | def mat3x3_scale_adjoint(inputs:torch.Tensor, scales:torch.Tensor):
 63 |     rt_size = inputs.shape
 64 |     outputs = _mat3x3_scale_adjoint(inputs.contiguous().view(-1,9), scales.contiguous().view(-1))
 65 |     return outputs.view(rt_size)
 66 | 
 67 | 
 68 | class _Mat3x3_inv(Function):
 69 |     @staticmethod
 70 |     @custom_fwd(cast_inputs=torch.float)
 71 |     def forward(ctx, inputs:torch.Tensor):
 72 |         B = inputs.shape[0]
 73 |         assert(inputs.shape[1] == 9)
 74 |         dtype = inputs.dtype
 75 |         device = inputs.device    
 76 |         scales = torch.empty(B, dtype=dtype, device=device)
 77 |         outputs = torch.empty(B, 9, dtype=dtype, device=device)
 78 |         _backend.mat3x3_inv_forward(inputs, outputs, scales, B)
 79 |         ctx.save_for_backward(outputs, scales)
 80 |         # print(scales)
 81 |         return outputs
 82 | 
 83 |     @staticmethod
 84 |     @once_differentiable
 85 |     @custom_bwd
 86 |     def backward(ctx, grad):
 87 |         inv_mats, _ = ctx.saved_tensors
 88 |         B = inv_mats.shape[0]
 89 |         assert(inv_mats.shape[1] == 9)
 90 |         dtype = inv_mats.dtype
 91 |         device = inv_mats.device  
 92 |         grad_inputs = torch.empty(B, 9, dtype=dtype, device=device)
 93 |         _backend.mat3x3_inv_backward(grad, inv_mats, grad_inputs, B)
 94 |         return grad_inputs
 95 | 
 96 | 
 97 | 
 98 | _mat3x3_inv = _Mat3x3_inv.apply
 99 | def mat3x3_inv(inputs:torch.Tensor):
100 |     rt_size = inputs.shape
101 |     outputs = _mat3x3_inv(inputs.contiguous().view(-1,9))
102 |     return outputs.view(rt_size)  
103 | 
104 | def _test_mat3x3_inv_backward(x:torch.Tensor):
105 |     x_inv = mat3x3_inv(x)
106 |     loss = x_inv.mean()
107 |     loss.backward()
108 | 
109 | def _test():
110 |     import torch.utils.benchmark as benchmark
111 |     N = 4096*128
112 |     # N = 100
113 |     x = torch.randn(N, 3, 3, requires_grad=True).float().cuda()
114 |     x_det = mat3x3_det(x)
115 | 
116 |     # torch.autograd.gradcheck(mat3x3_inv, x)
117 | 
118 |     T = 100
119 |     t = benchmark.Timer(
120 |         stmt='mat3x3_det(x)',
121 |         setup='from __main__ import mat3x3_det',
122 |         globals={'x': x})
123 |     print(t.timeit(T))
124 | 
125 |     x_adj = mat3x3_scale_adjoint(x, x_det)
126 |     T = 100
127 |     t = benchmark.Timer(
128 |         stmt='mat3x3_scale_adjoint(x, x_det)',
129 |         setup='from __main__ import mat3x3_scale_adjoint',
130 |         globals={'x': x, 'x_det':x_det})
131 |     print(t.timeit(T))
132 | 
133 |     # check correctness
134 |     print(x @ x_adj)
135 | 
136 |     x_inv = mat3x3_inv(x)
137 |     print(x @ x_inv)
138 |     T = 100
139 |     t = benchmark.Timer(
140 |         stmt='mat3x3_inv(x)',
141 |         setup='from __main__ import mat3x3_inv',
142 |         globals={'x': x})
143 |     print(t.timeit(T))
144 | 
145 |     T = 100
146 |     t = benchmark.Timer(
147 |         stmt='_test_mat3x3_inv_backward(x)',
148 |         setup='from __main__ import _test_mat3x3_inv_backward',
149 |         globals={'x': x})
150 |     print(t.timeit(T))
151 | 
152 | 
153 | if __name__ == '__main__':
154 |     _test()
155 | 
156 | 
157 | 
158 | 


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/quaternion.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
  2 | import torch
  3 | from torch.autograd import Function
  4 | from torch.autograd.function import once_differentiable
  5 | from torch.cuda.amp import custom_bwd, custom_fwd
  6 | 
  7 | try:
  8 |     import _quaternion as _backend
  9 | except ImportError:
 10 |     from .backend import _backend
 11 | 
 12 | class _Quaternion_mul_backward(Function):
 13 |     @staticmethod
 14 |     @custom_fwd(cast_inputs=torch.half)
 15 |     def forward(ctx, grad, inputs_1, inputs_2):
 16 |         B = inputs_1.shape[0] # batch size, coord dim
 17 |         D1 = inputs_1.shape[1]
 18 |         D2 = inputs_2.shape[1]  
 19 |         dtype, device = inputs_1.dtype, inputs_1.device
 20 |         grad_inputs_1 = torch.empty(B, D1, device=device, dtype=dtype)
 21 |         grad_inputs_2 = torch.empty(B, D2, device=device, dtype=dtype)
 22 |         _backend.quaternion_mul_backward(grad, B, D1, D2, inputs_1, inputs_2, grad_inputs_1, grad_inputs_2)
 23 |         ctx.save_for_backward(grad, inputs_1, inputs_2)
 24 |         return grad_inputs_1, grad_inputs_2  
 25 | 
 26 |     @staticmethod
 27 |     @once_differentiable
 28 |     @custom_bwd  
 29 |     def backward(ctx, *grad_outputs):
 30 |         grad_out_1, grad_out_2 = grad_outputs
 31 |         grad, inputs_1, inputs_2 = ctx.saved_tensors
 32 |         B = inputs_1.shape[0] # batch size, coord dim
 33 |         D1 = inputs_1.shape[1]
 34 |         D2 = inputs_2.shape[1]  
 35 |         dtype, device = inputs_1.dtype, inputs_1.device
 36 |         grad_grad = torch.empty(B, 4, device=device, dtype=dtype)
 37 |         grad_grad_inputs_1 = torch.empty(B, D1, device=device, dtype=dtype)
 38 |         grad_grad_inputs_2 = torch.empty(B, D2, device=device, dtype=dtype)
 39 |         _backend.quaternion_mul_backward_backward(grad_out_1, grad_out_2,
 40 |             B, D1, D2,
 41 |             grad, inputs_1, inputs_2,
 42 |             grad_grad, grad_grad_inputs_1, grad_grad_inputs_2)
 43 |         return grad_grad, grad_grad_inputs_1, grad_grad_inputs_2
 44 | 
 45 | _quaternion_mul_backward = _Quaternion_mul_backward.apply
 46 | 
 47 | class _Quaternion_mul(Function):
 48 |     @staticmethod
 49 |     @custom_fwd(cast_inputs=torch.half)
 50 |     def forward(ctx, inputs_1:torch.Tensor, inputs_2:torch.Tensor):
 51 |         # inputs: [B, input_dim], float in [-1, 1]
 52 |         # RETURN: [B, F], float
 53 |         calc_grad_inputs = inputs_1.requires_grad or inputs_2.requires_grad
 54 | 
 55 |         inputs_1 = inputs_1.contiguous()
 56 |         inputs_2 = inputs_2.contiguous()
 57 |         
 58 |         B = inputs_1.shape[0] # batch size, coord dim
 59 |         D1 = inputs_1.shape[1]
 60 |         D2 = inputs_2.shape[1]
 61 | 
 62 |         dtype = inputs_1.dtype
 63 |         device = inputs_1.device
 64 |         
 65 |         outputs = torch.empty(B, 4, dtype=dtype, device=device)
 66 | 
 67 | 
 68 |         _backend.quaternion_mul_forward(inputs_1, inputs_2, outputs, B, D1, D2)
 69 | 
 70 |         ctx.save_for_backward(inputs_1, inputs_2)
 71 | 
 72 | 
 73 |         return outputs
 74 |     
 75 |     @staticmethod
 76 |     @custom_bwd
 77 |     def backward(ctx, grad):
 78 |         # grad: [B, C * C]
 79 | 
 80 |         grad = grad.contiguous()
 81 |         inputs_1, inputs_2 = ctx.saved_tensors
 82 | 
 83 |         grad_inputs_1, grad_inputs_2 = _quaternion_mul_backward(grad, inputs_1, inputs_2)
 84 | 
 85 |         return grad_inputs_1, grad_inputs_2
 86 |         # else:
 87 |         #     return None, None
 88 | 
 89 | 
 90 | 
 91 | quaternion_mul = _Quaternion_mul.apply
 92 | 
 93 | 
 94 | class _Quaternion_conjugate(torch.autograd.Function):
 95 |     @staticmethod
 96 |     @custom_fwd(cast_inputs=torch.half)
 97 |     def forward(ctx, inputs:torch.Tensor):
 98 |         B = inputs.shape[0] # batch size, coord dim
 99 |         outputs = torch.empty_like(inputs)
100 |         _backend.quaternion_conjugate(inputs.contiguous(), B, outputs)
101 |         return outputs
102 | 
103 |     @staticmethod
104 |     @custom_bwd
105 |     def backward(ctx, grad):
106 |         return _Quaternion_conjugate.apply(grad)
107 | 
108 | 
109 | quaternion_conjugate = _Quaternion_conjugate.apply
110 | 
111 | 


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
 2 | import os
 3 | 
 4 | from setuptools import setup
 5 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 6 | 
 7 | _src_path = os.path.dirname(os.path.abspath(__file__))
 8 | 
 9 | nvcc_flags = [
10 |     '-O3', '-std=c++14',
11 |     '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__',
12 | ]
13 | 
14 | if os.name == "posix":
15 |     c_flags = ['-O3', '-std=c++14']
16 | elif os.name == "nt":
17 |     c_flags = ['/O2', '/std:c++17']
18 | 
19 |     # find cl.exe
20 |     def find_cl_path():
21 |         import glob
22 |         for edition in ["Enterprise", "Professional", "BuildTools", "Community"]:
23 |             paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True)
24 |             if paths:
25 |                 return paths[0]
26 | 
27 |     # If cl.exe is not on path, try to find it.
28 |     if os.system("where cl.exe >nul 2>nul") != 0:
29 |         cl_path = find_cl_path()
30 |         if cl_path is None:
31 |             raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
32 |         os.environ["PATH"] += ";" + cl_path
33 | 
34 | setup(
35 |     name='quaternion', # package name, import this to use python API
36 |     ext_modules=[
37 |         CUDAExtension(
38 |             name='_quaternion', # extension name, import this to use CUDA API
39 |             sources=[os.path.join(_src_path, 'src', f) for f in [
40 |                 'quaternion.cu',
41 |                 'matinv.cu',
42 |                 'bindings.cpp',
43 |             ]],
44 |             extra_compile_args={
45 |                 'cxx': c_flags,
46 |                 'nvcc': nvcc_flags,
47 |             }
48 |         ),
49 |     ],
50 |     cmdclass={
51 |         'build_ext': BuildExtension,
52 |     }
53 | )


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/src/bindings.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "quaternion.h"
 5 | #include "matinv.h"
 6 | 
 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 8 |     m.def("quaternion_mul_forward", &quaternion_mul_forward, "quaternion multiplication forward (CUDA)");
 9 |     m.def("quaternion_mul_backward", &quaternion_mul_backward, "quaternion multiplication backward (CUDA)");
10 |     m.def("quaternion_mul_backward_backward", &quaternion_mul_backward_backward, "quaternion multiplication backward (CUDA)");
11 |     m.def("quaternion_conjugate", &quaternion_conjugate, "quaternion_conjugate (CUDA)");
12 |     // mat3x3 inverse
13 |     m.def("mat3x3_det_forward", &mat3x3_det_forward, "mat3x3_det_forward (CUDA)");
14 |     m.def("mat3x3_scale_adjoint_forward", &mat3x3_scale_adjoint_forward, "mat3x3_scale_adjoint_forward (CUDA)");
15 |     m.def("mat3x3_inv_forward", &mat3x3_inv_forward, "mat3x3_inv_forward (CUDA)");
16 |     m.def("mat3x3_inv_backward", &mat3x3_inv_backward, "mat3x3_inv_backward (CUDA)");
17 | }


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/src/matinv.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
 2 | # pragma once
 3 | 
 4 | #include <stdint.h>
 5 | #include <torch/torch.h>
 6 | 
 7 | void mat3x3_det_forward(at::Tensor inputs, at::Tensor outputs,const uint32_t B);
 8 | void mat3x3_scale_adjoint_forward(at::Tensor inputs, at::Tensor scales, at::Tensor outputs, const uint32_t B);
 9 | void mat3x3_inv_forward(at::Tensor inputs, at::Tensor outputs, at::Tensor output_scales, const uint32_t B);
10 | 
11 | void mat3x3_inv_backward(at::Tensor grad, at::Tensor inv_mats, at::Tensor grad_inputs, const uint32_t B);


--------------------------------------------------------------------------------
/lab4d/third_party/quaternion/src/quaternion.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 Chaoyang Wang, Carnegie Mellon University.
 2 | # pragma once
 3 | 
 4 | #include <stdint.h>
 5 | #include <torch/torch.h>
 6 | 
 7 | // inputs: [B, D], float, in [-1, 1]
 8 | // outputs: [B, F], float
 9 | 
10 | // encode_forward(inputs, outputs, B, input_dim, degree, calc_grad_inputs, dy_dx)
11 | void quaternion_mul_forward(at::Tensor inputs_1, at::Tensor inputs_2, at::Tensor outputs, const uint32_t B, const uint32_t D1, const uint32_t D2);
12 | 
13 | // sh_encode_backward(grad, inputs, B, input_dim, degree, ctx.calc_grad_inputs, dy_dx, grad_inputs)
14 | void quaternion_mul_backward(at::Tensor grad, const uint32_t B, const uint32_t D1, const uint32_t D2, at::Tensor inputs_1, at::Tensor inputs_2,  at::Tensor grad_inputs_1, at::Tensor grad_inputs_2);
15 | 
16 | 
17 | void quaternion_mul_backward_backward(
18 |     at::Tensor grad_out_1, at::Tensor grad_out_2, 
19 |     const uint32_t B, const uint32_t D1, const uint32_t D2, 
20 |     at::Tensor grad, at::Tensor inputs_1, at::Tensor inputs_2, 
21 |     at::Tensor grad_grad, at::Tensor grad_grad_inputs_1, at::Tensor grad_grad_inputs_2);
22 | 
23 | 
24 | void quaternion_conjugate(at::Tensor inputs, const uint32_t B, at::Tensor outputs);


--------------------------------------------------------------------------------
/lab4d/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | import os
 3 | import sys
 4 | 
 5 | import torch
 6 | import torch.backends.cudnn as cudnn
 7 | from absl import app
 8 | 
 9 | cwd = os.getcwd()
10 | if cwd not in sys.path:
11 |     sys.path.insert(0, cwd)
12 | 
13 | from lab4d.config import get_config, save_config
14 | from lab4d.engine.train_utils import get_local_rank
15 | from lab4d.utils.profile_utils import record_function
16 | 
17 | cudnn.benchmark = True
18 | 
19 | 
20 | def train_ddp(Trainer):
21 |     local_rank = get_local_rank()
22 |     torch.cuda.set_device(local_rank)
23 | 
24 |     opts = get_config()
25 |     if local_rank == 0:
26 |         save_config()
27 | 
28 |     torch.distributed.init_process_group(
29 |         "nccl",
30 |         init_method="env://",
31 |         world_size=opts["ngpu"],
32 |         rank=local_rank,
33 |     )
34 | 
35 |     # torch.manual_seed(0)
36 |     # torch.cuda.manual_seed(1)
37 |     # torch.manual_seed(0)
38 | 
39 |     trainer = Trainer(opts)
40 |     trainer.train()
41 | 
42 | 
43 | def main(_):
44 |     from lab4d.engine.trainer import Trainer
45 | 
46 |     train_ddp(Trainer)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     app.run(main)
51 | 


--------------------------------------------------------------------------------
/lab4d/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/lab4d/utils/__init__.py


--------------------------------------------------------------------------------
/lab4d/utils/cam_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
2 | # TODO: move camera-related utils to here
3 | 


--------------------------------------------------------------------------------
/lab4d/utils/decorator.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | from functools import wraps
 3 | 
 4 | 
 5 | def train_only_fields(method):
 6 |     """Decorator to skip the method and return an empty field list if not in
 7 |     training mode.
 8 |     """
 9 | 
10 |     @wraps(method)
11 |     def _impl(self, *method_args, **method_kwargs):
12 |         if self.training:
13 |             return method(self, *method_args, **method_kwargs)
14 |         else:
15 |             return {}
16 | 
17 |     return _impl
18 | 


--------------------------------------------------------------------------------
/lab4d/utils/gpu_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Jeff Tan, Carnegie Mellon University.
  2 | import multiprocessing
  3 | import os
  4 | 
  5 | 
  6 | def gpu_map(func, args, gpus=None, method="static"):
  7 |     """Map a function over GPUs
  8 | 
  9 |     Args:
 10 |         func (Function): Function to parallelize
 11 |         args (List(Tuple)): List of argument tuples, to split evenly over GPUs
 12 |         gpus (List(int) or None): Optional list of GPU device IDs to use
 13 |         method (str): Either "static" or "dynamic" (default "static").
 14 |             Static assignment is the fastest if workload per task is balanced;
 15 |             dynamic assignment better handles tasks with uneven workload.
 16 |     Returns:
 17 |         outs (List): List of outputs
 18 |     """
 19 |     mp = multiprocessing.get_context("spawn")  # spawn allows CUDA usage
 20 |     devices = os.getenv("CUDA_VISIBLE_DEVICES")
 21 |     outputs = None
 22 | 
 23 |     # Compute list of GPUs
 24 |     if gpus is None:
 25 |         if devices is None:
 26 |             num_gpus = int(os.popen("nvidia-smi -L | wc -l").read())
 27 |             gpus = list(range(num_gpus))
 28 |         else:
 29 |             gpus = [int(n) for n in devices.split(",")]
 30 | 
 31 |     # Map arguments over GPUs using static or dynamic assignment
 32 |     try:
 33 |         if method == "static":
 34 |             # Interleave arguments across GPUs
 35 |             args_by_rank = [[] for rank in range(len(gpus))]
 36 |             for it, arg in enumerate(args):
 37 |                 args_by_rank[it % len(gpus)].append(arg)
 38 | 
 39 |             # Spawn processes
 40 |             spawned_procs = []
 41 |             result_queue = mp.Queue()
 42 |             for rank, gpu_id in enumerate(gpus):
 43 |                 # Environment variables get copied on process creation
 44 |                 os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
 45 |                 proc_args = (func, args_by_rank[rank], rank, result_queue)
 46 |                 proc = mp.Process(target=gpu_map_static_helper, args=proc_args)
 47 |                 proc.start()
 48 |                 spawned_procs.append(proc)
 49 | 
 50 |             # Wait to finish
 51 |             for proc in spawned_procs:
 52 |                 proc.join()
 53 | 
 54 |             # Construct output list
 55 |             outputs_by_rank = {}
 56 |             while True:
 57 |                 try:
 58 |                     rank, out = result_queue.get(block=False)
 59 |                     outputs_by_rank[rank] = out
 60 |                 except multiprocessing.queues.Empty:
 61 |                     break
 62 | 
 63 |             outputs = []
 64 |             for it in range(len(args)):
 65 |                 rank = it % len(gpus)
 66 |                 idx = it // len(gpus)
 67 |                 outputs.append(outputs_by_rank[rank][idx])
 68 | 
 69 |         elif method == "dynamic":
 70 |             gpu_queue = mp.Queue()
 71 |             for gpu_id in gpus:
 72 |                 gpu_queue.put(gpu_id)
 73 | 
 74 |             # Spawn processes as GPUs become available
 75 |             spawned_procs = []
 76 |             result_queue = mp.Queue()
 77 |             for it, arg in enumerate(args):
 78 |                 # Take latest available gpu_id (blocking)
 79 |                 gpu_id = gpu_queue.get()
 80 | 
 81 |                 # Environment variables get copied on process creation
 82 |                 os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
 83 |                 proc_args = (func, arg, it, gpu_id, result_queue, gpu_queue)
 84 |                 proc = mp.Process(target=gpu_map_dynamic_helper, args=proc_args)
 85 |                 proc.start()
 86 |                 spawned_procs.append(proc)
 87 | 
 88 |             # Wait to finish
 89 |             for proc in spawned_procs:
 90 |                 proc.join()
 91 | 
 92 |             # Construct output list
 93 |             outputs_by_it = {}
 94 |             while True:
 95 |                 try:
 96 |                     it, out = result_queue.get(block=False)
 97 |                     outputs_by_it[it] = out
 98 |                 except multiprocessing.queues.Empty:
 99 |                     break
100 | 
101 |             outputs = []
102 |             for it in range(len(args)):
103 |                 outputs.append(outputs_by_it[it])
104 | 
105 |         else:
106 |             raise NotImplementedError
107 | 
108 |     except Exception as e:
109 |         pass
110 | 
111 |     # Restore env vars
112 |     finally:
113 |         if devices is not None:
114 |             os.environ["CUDA_VISIBLE_DEVICES"] = devices
115 |         else:
116 |             del os.environ["CUDA_VISIBLE_DEVICES"]
117 |         return outputs
118 | 
119 | 
120 | def gpu_map_static_helper(func, args, rank, result_queue):
121 |     out = [func(*arg) for arg in args]
122 |     result_queue.put((rank, out))
123 | 
124 | 
125 | def gpu_map_dynamic_helper(func, arg, it, gpu_id, result_queue, gpu_queue):
126 |     out = func(*arg)
127 |     gpu_queue.put(gpu_id)
128 |     result_queue.put((it, out))
129 | 


--------------------------------------------------------------------------------
/lab4d/utils/io.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | import glob
 3 | import os
 4 | 
 5 | import cv2
 6 | import einops
 7 | import imageio
 8 | import numpy as np
 9 | 
10 | from lab4d.utils.vis_utils import img2color, make_image_grid
11 | 
12 | 
13 | def make_save_dir(opts, sub_dir="renderings"):
14 |     """Create a subdirectory to save outputs
15 | 
16 |     Args:
17 |         opts (Dict): Command-line options
18 |         sub_dir (str): Subdirectory to create
19 |     Returns:
20 |         save_dir (str): Output directory
21 |     """
22 |     logname = "%s-%s" % (opts["seqname"], opts["logname"])
23 |     save_dir = "%s/%s/%s/" % (opts["logroot"], logname, sub_dir)
24 |     os.makedirs(save_dir, exist_ok=True)
25 |     return save_dir
26 | 
27 | 
28 | def save_vid(
29 |     outpath,
30 |     frames,
31 |     suffix=".mp4",
32 |     upsample_frame=0,
33 |     fps=10,
34 |     target_size=None,
35 | ):
36 |     """Save frames to video
37 | 
38 |     Args:
39 |         outpath (str): Output directory
40 |         frames: (N, H, W, x) Frames to output
41 |         suffix (str): File type to save (".mp4" or ".gif")
42 |         upsample_frame (int): Target number of frames
43 |         fps (int): Target frames per second
44 |         target_size: If provided, (H, W) target size of frames
45 |     """
46 |     # convert to 150 frames
47 |     if upsample_frame < 1:
48 |         upsample_frame = len(frames)
49 |     frame_150 = []
50 |     for i in range(int(upsample_frame)):
51 |         fid = int(i / upsample_frame * len(frames))
52 |         frame = frames[fid]
53 |         if frame.max() <= 1:
54 |             frame = frame * 255
55 |         frame = frame.astype(np.uint8)
56 |         if target_size is not None:
57 |             frame = cv2.resize(frame, target_size[::-1])
58 |         if suffix == ".gif":
59 |             h, w = frame.shape[:2]
60 |             fxy = np.sqrt(4e4 / (h * w))
61 |             frame = cv2.resize(frame, None, fx=fxy, fy=fxy)
62 | 
63 |         # resize to make divisible by marco block size = 16
64 |         h, w = frame.shape[:2]
65 |         h = int(np.ceil(h / 16) * 16)
66 |         w = int(np.ceil(w / 16) * 16)
67 |         frame = cv2.resize(frame, (w, h))
68 | 
69 |         frame_150.append(frame)
70 |     imageio.mimsave("%s%s" % (outpath, suffix), frame_150, fps=fps)
71 | 
72 | 
73 | def save_rendered(rendered, save_dir, raw_size, pca_fn):
74 |     """Save rendered outputs
75 | 
76 |     Args:
77 |         rendered (Dict): Maps arbitrary keys to outputs of shape (N, H, W, x)
78 |         save_dir (str): Output directory
79 |         raw_size: (2,) Target height and width
80 |         pca_fn (Function): Function to apply PCA on feature outputs
81 |     """
82 |     # save rendered images
83 |     for k, v in rendered.items():
84 |         n, h, w = v.shape[:3]
85 |         img_grid = make_image_grid(v)
86 |         img_grid = img2color(k, img_grid, pca_fn=pca_fn)
87 |         img_grid = (img_grid * 255).astype(np.uint8)
88 |         # cv2.imwrite("%s/%s.jpg" % (save_dir, k), img_grid[:, :, ::-1])
89 | 
90 |         # save video
91 |         frames = einops.rearrange(img_grid, "(m h) (n w) c -> (m n) h w c", h=h, w=w)
92 |         frames = frames[:n]
93 |         save_vid(
94 |             "%s/%s" % (save_dir, k),
95 |             frames,
96 |             fps=30,
97 |             target_size=(raw_size[0], raw_size[1]),
98 |         )
99 | 


--------------------------------------------------------------------------------
/lab4d/utils/loss_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | import torch
 3 | import numpy as np
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | def entropy_loss(prob, dim=-1):
 8 |     """Compute entropy of a probability distribution
 9 |     In the case of skinning weights, each column is a distribution over assignment to B bones.
10 |     We want to encourage low entropy, i.e. each point is assigned to fewer bones.
11 | 
12 |     Args:
13 |         prob: (..., B) Probability distribution
14 |     Returns:
15 |         entropy (...,) Entropy of each distribution
16 |     """
17 |     entropy = -(prob * (prob + 1e-9).log()).sum(dim)
18 |     return entropy
19 | 
20 | 
21 | def cross_entropy_skin_loss(skin):
22 |     """Compute entropy of a probability distribution
23 |     In the case of skinning weights, each column is a distribution over assignment to B bones.
24 |     We want to encourage low entropy, i.e. each point is assigned to fewer bones.
25 | 
26 |     Args:
27 |         skin: (..., B) un-normalized skinning weights
28 |     """
29 |     shape = skin.shape
30 |     nbones = shape[-1]
31 |     full_skin = skin.clone()
32 | 
33 |     # find the most likely bone assignment
34 |     score, indices = skin.max(-1, keepdim=True)
35 |     skin = torch.zeros_like(skin).fill_(0)
36 |     skin = skin.scatter(-1, indices, torch.ones_like(score))
37 | 
38 |     cross_entropy = F.cross_entropy(
39 |         full_skin.view(-1, nbones), skin.view(-1, nbones), reduction="none"
40 |     )
41 |     cross_entropy = cross_entropy.view(shape[:-1])
42 |     return cross_entropy
43 | 
44 | 
45 | def align_vectors(v1, v2):
46 |     """Return the scale that best aligns v1 to v2 in the L2 sense:
47 |     min || kv1-v2 ||^2
48 | 
49 |     Args:
50 |         v1: (...,) Source vector
51 |         v2: (...,) Target vector
52 |     Returns:
53 |         scale_fac (1,): Scale factor
54 |     """
55 |     scale_fac = (v1 * v2).sum() / (v1 * v1).sum()
56 |     if scale_fac < 0:
57 |         scale_fac = torch.tensor([1.0], device=scale_fac.device)
58 |     return scale_fac
59 | 


--------------------------------------------------------------------------------
/lab4d/utils/numpy_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | import numpy as np
  3 | 
  4 | 
  5 | def interp_wt(x, y, x2, type="linear"):
  6 |     """Map a scalar value from range [x0, x1] to [y0, y1] using interpolation
  7 | 
  8 |     Args:
  9 |         x: Input range [x0, x1]
 10 |         y: Output range [y0, y1]
 11 |         x2 (float): Scalar value in range [x0, x1]
 12 |         type (str): Interpolation type ("linear" or "log")
 13 |     Returns:
 14 |         y2 (float): Scalar value mapped to [y0, y1]
 15 |     """
 16 |     # Extract values from tuples
 17 |     x0, x1 = x
 18 |     y0, y1 = y
 19 | 
 20 |     # # Check if x2 is in range
 21 |     # if x2 < x0 or x2 > x1:
 22 |     #     raise ValueError("x2 must be in the range [x0, x1]")
 23 | 
 24 |     if type == "linear":
 25 |         # Perform linear interpolation
 26 |         y2 = y0 + (x2 - x0) * (y1 - y0) / (x1 - x0)
 27 | 
 28 |     elif type == "log":
 29 |         # Transform to log space
 30 |         log_y0 = np.log10(y0)
 31 |         log_y1 = np.log10(y1)
 32 | 
 33 |         # Perform linear interpolation in log space
 34 |         log_y2 = log_y0 + (x2 - x0) * (log_y1 - log_y0) / (x1 - x0)
 35 | 
 36 |         # Transform back to original space
 37 |         y2 = 10**log_y2
 38 | 
 39 |     else:
 40 |         raise ValueError("interpolation_type must be 'linear' or 'log'")
 41 | 
 42 |     y2 = np.clip(y2, np.min(y), np.max(y))
 43 |     return y2
 44 | 
 45 | 
 46 | def pca_numpy(raw_data, n_components):
 47 |     """Return a function that applies PCA to input data, based on the principal
 48 |     components of a raw data distribution.
 49 | 
 50 |     Args:
 51 |         raw_data (np.array): Raw data distribution, used to compute
 52 |             principal components.
 53 |         n_components (int): Number of principal components to use
 54 |     Returns:
 55 |         apply_pca_fn (Function): A function that applies PCA to input data
 56 |     """
 57 |     # center the data matrix by subtracting the mean of each feature
 58 |     mean = np.mean(raw_data, axis=0)
 59 |     centered_data = raw_data - mean
 60 | 
 61 |     # compute the covariance matrix of the centered data
 62 |     covariance_matrix = np.cov(centered_data.T)
 63 | 
 64 |     # compute the eigenvalues and eigenvectors of the covariance matrix
 65 |     eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
 66 | 
 67 |     # sort the eigenvalues in descending order and sort the eigenvectors accordingly
 68 |     sorted_indices = np.argsort(eigenvalues)[::-1]
 69 |     sorted_eigenvectors = eigenvectors[:, sorted_indices]
 70 | 
 71 |     # choose the top k eigenvectors (or all eigenvectors if k is not specified)
 72 |     top_eigenvectors = sorted_eigenvectors[:, :n_components]
 73 | 
 74 |     def apply_pca_fn(data, normalize=False):
 75 |         """
 76 |         Args:
 77 |             data (np.array): Data to apply PCA to
 78 |             normalize (bool): If True, normalize the data to 0,1 for visualization
 79 |         """
 80 |         shape = data.shape
 81 |         data = data.reshape(-1, shape[-1])
 82 |         data = np.dot(data - mean, top_eigenvectors)
 83 | 
 84 |         if normalize:
 85 |             # scale to std = 1
 86 |             data = data / np.sqrt(eigenvalues[sorted_indices][:n_components])
 87 |             data = np.clip(data, -2, 2)  # clip to [-2, 2], 95.4% percentile
 88 |             # scale to 0,1
 89 |             data = (data + 2) / 4
 90 | 
 91 |         data = data.reshape(shape[:-1] + (n_components,))
 92 |         return data
 93 | 
 94 |     return apply_pca_fn
 95 | 
 96 | 
 97 | def bilinear_interp(feat, xy_loc):
 98 |     """Sample from a 2D feature map using bilinear interpolation
 99 | 
100 |     Args:
101 |         feat: (H,W,x) Input feature map
102 |         xy_loc: (N,2) Coordinates to sample, float
103 |     Returns:
104 |         feat_samp: (N,x) Sampled features
105 |     """
106 |     dtype = feat.dtype
107 |     ul_loc = np.floor(xy_loc).astype(int)  # x,y
108 |     x = (xy_loc[:, 0] - ul_loc[:, 0])[:, None]  # (N, 1)
109 |     y = (xy_loc[:, 1] - ul_loc[:, 1])[:, None]  # (N, 1)
110 |     ul_loc = np.clip(ul_loc, 0, 110)  # clip
111 |     q11 = feat[ul_loc[:, 1], ul_loc[:, 0]]  # (N, 16)
112 |     q12 = feat[ul_loc[:, 1], ul_loc[:, 0] + 1]
113 |     q21 = feat[ul_loc[:, 1] + 1, ul_loc[:, 0]]
114 |     q22 = feat[ul_loc[:, 1] + 1, ul_loc[:, 0] + 1]
115 |     feat_samp = (
116 |         q11 * (1 - x) * (1 - y)
117 |         + q21 * (1 - x) * (y - 0)
118 |         + q12 * (x - 0) * (1 - y)
119 |         + q22 * (x - 0) * (y - 0)
120 |     )
121 |     feat_samp = feat_samp.astype(dtype)
122 |     return feat_samp
123 | 


--------------------------------------------------------------------------------
/lab4d/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | import torch
  3 | 
  4 | @torch.enable_grad()
  5 | def compute_gradient(fn, x):
  6 |     """
  7 |     gradient of mlp params wrt pts
  8 |     """
  9 |     x.requires_grad_(True)
 10 |     y = fn(x)
 11 | 
 12 |     # get gradient for each size-1 output
 13 |     gradients = []
 14 |     for i in range(y.shape[-1]):
 15 |         y_sub = y[..., i : i + 1]
 16 |         d_output = torch.ones_like(y_sub, requires_grad=False, device=y.device)
 17 |         gradient = torch.autograd.grad(
 18 |             outputs=y_sub,
 19 |             inputs=x,
 20 |             grad_outputs=d_output,
 21 |             create_graph=True,
 22 |             retain_graph=True,
 23 |             only_inputs=True,
 24 |         )[0]
 25 |         gradients.append(gradient[..., None])
 26 |     gradients = torch.cat(gradients, -1)  # ...,input-dim, output-dim
 27 |     return gradients
 28 | 
 29 | def frameid_to_vid(fid, frame_offset):
 30 |     """Given absolute frame ids [0, ..., N], compute the video id of each frame.
 31 | 
 32 |     Args:
 33 |         fid: (nframes,) Absolute frame ids
 34 |           e.g. [0, 1, 2, 3, 100, 101, 102, 103, 200, 201, 202, 203]
 35 |         frame_offset: (nvideos + 1,) Offset of each video
 36 |           e.g., [0, 100, 200, 300]
 37 |     Returns:
 38 |         vid: (nframes,) Maps idx to video id
 39 |         tid: (nframes,) Maps idx to relative frame id
 40 |     """
 41 |     vid = torch.zeros_like(fid)
 42 |     for i in range(frame_offset.shape[0] - 1):
 43 |         assign = torch.logical_and(fid >= frame_offset[i], fid < frame_offset[i + 1])
 44 |         vid[assign] = i
 45 |     return vid
 46 | 
 47 | 
 48 | def remove_ddp_prefix(state_dict):
 49 |     """Remove distributed data parallel prefix from model checkpoint
 50 | 
 51 |     Args:
 52 |         state_dict (Dict): Model checkpoint
 53 |     Returns:
 54 |         new_state_dict (Dict): New model checkpoint
 55 |     """
 56 |     new_state_dict = {}
 57 |     for key, value in state_dict.items():
 58 |         if key.startswith("module."):
 59 |             new_key = key[7:]  # Remove 'module.' prefix
 60 |         else:
 61 |             new_key = key
 62 |         new_state_dict[new_key] = value
 63 |     return new_state_dict
 64 | 
 65 | 
 66 | def remove_state_startwith(state_dict, prefix):
 67 |     """Remove model parameters that start with a prefix
 68 | 
 69 |     Args:
 70 |         state_dict (Dict): Model checkpoint
 71 |         prefix (str): Prefix to filter
 72 |     Returns:
 73 |         new_state_dict (Dict): New model checkpoint
 74 |     """
 75 |     new_state_dict = {}
 76 |     for key, value in state_dict.items():
 77 |         if key.startswith(prefix):
 78 |             continue
 79 |         else:
 80 |             new_state_dict[key] = value
 81 |     return new_state_dict
 82 | 
 83 | 
 84 | def remove_state_with(state_dict, string):
 85 |     """Remove model parameters that contain a string
 86 | 
 87 |     Args:
 88 |         state_dict (Dict): Model checkpoint
 89 |         string (str): String to filter
 90 |     Returns:
 91 |         new_state_dict (Dict): New model checkpoint
 92 |     """
 93 |     new_state_dict = {}
 94 |     for key, value in state_dict.items():
 95 |         if string in key:
 96 |             continue
 97 |         else:
 98 |             new_state_dict[key] = value
 99 |     return new_state_dict
100 | 
101 | 
102 | def compress_state_with(state_dict, string):
103 |     """Initialize model parameters with the mean of the instance embedding if
104 |     the parameter name contains a string
105 | 
106 |     Args:
107 |         state_dict (Dict): Model checkpoint, modified in place
108 |         string (str): String to filter
109 |     """
110 |     # init with the mean of inst_embedding
111 |     for key, value in state_dict.items():
112 |         if string in key:
113 |             state_dict[key] = value.mean(dim=0, keepdim=True)
114 | 


--------------------------------------------------------------------------------
/lab4d/utils/transforms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | from lab4d.utils.quat_transform import (
 3 |     dual_quaternion_apply,
 4 |     dual_quaternion_inverse,
 5 |     dual_quaternion_to_quaternion_translation,
 6 | )
 7 | 
 8 | 
 9 | def get_bone_coords(xyz, bone2obj):
10 |     """Transform points from object canonical space to bone coordinates
11 | 
12 |     Args:
13 |         xyz: (..., 3) Points in object canonical space
14 |         bone2obj: ((..., B, 4), (..., B, 4)) Bone-to-object SE(3)
15 |             transforms, written as dual quaternions
16 |     Returns:
17 |         xyz_bone: (..., B, 3) Points in bone space
18 |     """
19 |     # transform xyz to bone space
20 |     obj2bone = dual_quaternion_inverse(bone2obj)
21 | 
22 |     # reshape
23 |     xyz = xyz[..., None, :].expand(xyz.shape[:-1] + (bone2obj[0].shape[-2], 3)).clone()
24 |     xyz_bone = dual_quaternion_apply(obj2bone, xyz)
25 |     return xyz_bone
26 | 
27 | 
28 | def get_xyz_bone_distance(xyz, bone2obj):
29 |     """Compute squared distances from points to bone centers
30 | 
31 |     Argss:
32 |         xyz: (..., 3) Points in object canonical space
33 |         bone2obj: ((..., B, 4), (..., B, 4)) Bone-to-object SE(3) transforms, written as dual quaternions
34 | 
35 |     Returns:
36 |         dist2: (..., B) Squared distance to each bone center
37 |     """
38 |     _, center = dual_quaternion_to_quaternion_translation(bone2obj)
39 |     dist2 = (xyz[..., None, :] - center).pow(2).sum(-1)  # M, K
40 |     return dist2
41 | 


--------------------------------------------------------------------------------
/media/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/media/logo.png


--------------------------------------------------------------------------------
/media/teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/media/teaser.gif


--------------------------------------------------------------------------------
/preprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/__init__.py


--------------------------------------------------------------------------------
/preprocess/libs/__init__.py:
--------------------------------------------------------------------------------
 1 | # import lab4d
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.insert(
 6 |     0,
 7 |     "%s/../../" % os.path.join(os.path.dirname(__file__)),
 8 | )
 9 | 
10 | sys.path.insert(
11 |     0,
12 |     "%s/../" % os.path.join(os.path.dirname(__file__)),
13 | )
14 | 
15 | sys.path.insert(
16 |     0,
17 |     "%s/../third_party" % os.path.join(os.path.dirname(__file__)),
18 | )
19 | 


--------------------------------------------------------------------------------
/preprocess/libs/geometry.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | # taken from Rigidmask: https://github.com/gengshan-y/rigidmask/blob/b308b5082d09926e687c55001c20def6b0708021/utils/dydepth.py#L425
  3 | import os
  4 | import sys
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | import trimesh
  9 | 
 10 | from lab4d.utils.profile_utils import record_function
 11 | 
 12 | sys.path.insert(
 13 |     0,
 14 |     "%s/../third_party/vcnplus/" % os.path.join(os.path.dirname(__file__)),
 15 | )
 16 | 
 17 | from flowutils.flowlib import warp_flow
 18 | 
 19 | 
 20 | @record_function("compute_procrustes")
 21 | def compute_procrustes_robust(pts0, pts1):
 22 |     """
 23 |     analytical solution of R/t from correspondence
 24 |     pts0: N x 3
 25 |     pts1: N x 3
 26 |     """
 27 |     num_samples = 2000
 28 |     min_samples = 10
 29 |     extent = (pts0.max(0) - pts0.min(0)).mean()
 30 |     threshold = extent * 0.05
 31 | 
 32 |     inliers = []
 33 |     samples = []
 34 |     idx_array = np.arange(pts0.shape[0])
 35 |     for i in range(num_samples):
 36 |         sample = np.random.choice(idx_array, size=min_samples, replace=False)
 37 |         sol = compute_procrustes(pts0[sample], pts1[sample])
 38 | 
 39 |         # evaluate inliers
 40 |         R, t = sol
 41 |         pts2 = R @ pts0.T + t[:, np.newaxis]
 42 |         dist = np.linalg.norm(pts2.T - pts1, 2, axis=1)
 43 |         inliers.append((dist < threshold).sum())
 44 |         samples.append(sample)
 45 | 
 46 |     best_idx = np.argmax(np.sum(inliers, axis=0))
 47 |     print("inlier_ratio: ", np.max(inliers) / pts0.shape[0])
 48 |     best_sample = samples[best_idx]
 49 |     sol = compute_procrustes(pts0[best_sample], pts1[best_sample])
 50 |     return sol
 51 | 
 52 | 
 53 | @record_function("compute_procrustes")
 54 | def compute_procrustes(pts0, pts1):
 55 |     """
 56 |     analytical solution of R/t from correspondence
 57 |     pts0: N x 3
 58 |     pts1: N x 3
 59 |     """
 60 |     if pts0.shape[0] < 10:
 61 |         print("Warning: too few points for procrustes. Return identity.")
 62 |         return np.eye(3), np.zeros(3)
 63 |     pts0_mean = np.mean(pts0, 0)
 64 |     pts1_mean = np.mean(pts1, 0)
 65 |     pts0_centered = pts0 - pts0_mean
 66 |     pts1_centered = pts1 - pts1_mean
 67 |     H = pts0_centered.T @ pts1_centered
 68 |     U, S, Vt = np.linalg.svd(H)
 69 |     R = Vt.T @ U.T
 70 |     if np.linalg.det(R) < 0:
 71 |         Vt[2, :] *= -1
 72 |         R = Vt.T @ U.T
 73 |     t = pts1_mean - R @ pts0_mean
 74 | 
 75 |     # pts2 = R @ pts0.T + t[:, np.newaxis]
 76 |     # pts2 = pts2.T
 77 |     # trimesh.Trimesh(pts0).export('tmp/0.obj')
 78 |     # trimesh.Trimesh(pts1).export('tmp/1.obj')
 79 |     # trimesh.Trimesh(pts2).export('tmp/2.obj')
 80 |     return R, t
 81 | 
 82 | 
 83 | @record_function("two_frame_registration")
 84 | def two_frame_registration(
 85 |     depth0, depth1, flow, K0, K1, mask, registration_type="procrustes"
 86 | ):
 87 |     # prepare data
 88 |     shape = flow.shape[:2]
 89 |     x0, y0 = np.meshgrid(range(shape[1]), range(shape[0]))
 90 |     x0 = x0.astype(np.float32)
 91 |     y0 = y0.astype(np.float32)
 92 |     x1 = x0 + flow[:, :, 0]
 93 |     y1 = y0 + flow[:, :, 1]
 94 |     hp0 = np.stack((x0, y0, np.ones(x0.shape)), 0).reshape((3, -1))
 95 |     hp1 = np.stack((x1, y1, np.ones(x0.shape)), 0).reshape((3, -1))
 96 | 
 97 |     # use bg + valid pixels to compute R/t
 98 |     # valid_mask = np.logical_and(mask, flow[..., 2] > 0).flatten()
 99 |     valid_mask = mask.flatten()
100 |     pts0 = np.linalg.inv(K0) @ hp0 * depth0.flatten()
101 |     depth1_warped = warp_flow(depth1.astype(float), flow[..., :2]).flatten()
102 |     pts1 = np.linalg.inv(K1) @ hp1 * depth1_warped
103 | 
104 |     if registration_type == "procrustes":
105 |         # Procrustes
106 |         valid_mask = np.logical_and(valid_mask, depth1_warped > 0)
107 |         rmat, trans = compute_procrustes(pts0.T[valid_mask], pts1.T[valid_mask])
108 |         # rmat, trans = compute_procrustes_robust(pts0.T[valid_mask], pts1.T[valid_mask])
109 |     elif registration_type == "pnp":
110 |         # PnP
111 |         _, rvec, trans = cv2.solvePnP(
112 |             pts0.T[valid_mask.flatten(), np.newaxis],
113 |             hp1[:2].T[valid_mask.flatten(), np.newaxis],
114 |             K0,
115 |             0,
116 |             flags=cv2.SOLVEPNP_DLS,
117 |         )
118 |         _, rvec, trans = cv2.solvePnP(
119 |             pts0.T[valid_mask, np.newaxis],
120 |             hp1[:2].T[valid_mask, np.newaxis],
121 |             K0,
122 |             0,
123 |             rvec,
124 |             trans,
125 |             useExtrinsicGuess=True,
126 |             flags=cv2.SOLVEPNP_ITERATIVE,
127 |         )
128 |         rmat = cv2.Rodrigues(rvec)[0]
129 |         trans = trans[:, 0]
130 |     else:
131 |         raise NotImplementedError
132 | 
133 |     cam01 = np.eye(4)
134 |     cam01[:3, :3] = rmat
135 |     cam01[:3, 3] = trans
136 |     return cam01
137 | 


--------------------------------------------------------------------------------
/preprocess/libs/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | import cv2
 3 | import numpy as np
 4 | from scipy.spatial.transform import Rotation as R
 5 | 
 6 | from lab4d.utils.profile_utils import record_function
 7 | 
 8 | 
 9 | @record_function("resize_to_target")
10 | def resize_to_target(flowfw, aspect_ratio=None, is_flow=False):
11 |     h, w = flowfw.shape[:2]
12 |     if aspect_ratio is None:
13 |         factor = np.sqrt(250 * 1000 / (h * w))
14 |         th, tw = int(h * factor), int(w * factor)
15 |     else:
16 |         rh, rw = aspect_ratio[:2]
17 |         factor = np.sqrt(250 * 1000 / (rh * rw))
18 |         th, tw = int(rh * factor), int(rw * factor)
19 | 
20 |     factor_h = th / h
21 |     factor_w = tw / w
22 | 
23 |     flowfw_d = cv2.resize(flowfw, (tw, th))
24 | 
25 |     if is_flow:
26 |         flowfw_d[..., 0] *= factor_w
27 |         flowfw_d[..., 1] *= factor_h
28 |     return flowfw_d
29 | 
30 | 
31 | @record_function("reduce_component")
32 | def reduce_component(mask):
33 |     dtype = mask.dtype
34 |     nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(
35 |         mask.astype(np.uint8), connectivity=8
36 |     )
37 |     if nb_components > 1:
38 |         max_label, max_size = max(
39 |             [(i, stats[i, cv2.CC_STAT_AREA]) for i in range(1, nb_components)],
40 |             key=lambda x: x[1],
41 |         )
42 |         mask = (output == max_label).astype(int)
43 |     mask = mask.astype(dtype)
44 |     return mask
45 | 
46 | 
47 | def robust_rot_align(rot1, rot2):
48 |     """
49 |     align rot1 to rot2 using RANSAC
50 |     """
51 |     in_thresh = 1.0 / 4 * np.pi  # 45 deg
52 |     n_samples = rot2.shape[0]
53 |     rots = rot2[:, :3, :3] @ rot1[:, :3, :3].transpose(0, 2, 1)
54 | 
55 |     inliers = []
56 |     for i in range(n_samples):
57 |         rots_aligned = rots[i : i + 1] @ rot1[:, :3, :3]
58 |         dist = rots_aligned @ rot2[:, :3, :3].transpose(0, 2, 1)
59 |         dist = R.from_matrix(dist).as_rotvec()
60 |         dist = np.linalg.norm(dist, 2, axis=1)
61 |         inliers.append((dist < in_thresh).sum())
62 | 
63 |     # Convert rotation vectors back to rotation matrices
64 |     best_rot = rots[np.argmax(inliers)]
65 |     # print(inliers)
66 |     return best_rot
67 | 


--------------------------------------------------------------------------------
/preprocess/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/scripts/__init__.py


--------------------------------------------------------------------------------
/preprocess/scripts/camera_registration.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | # python preprocess/scripts/camera_registration.py 2023-04-03-16-50-09-room-0000 0
 3 | import glob
 4 | import os
 5 | import sys
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | import trimesh
10 | 
11 | sys.path.insert(
12 |     0,
13 |     "%s/../../" % os.path.join(os.path.dirname(__file__)),
14 | )
15 | 
16 | sys.path.insert(
17 |     0,
18 |     "%s/../" % os.path.join(os.path.dirname(__file__)),
19 | )
20 | 
21 | from libs.geometry import two_frame_registration
22 | from libs.io import flow_process, read_raw
23 | from libs.utils import reduce_component
24 | 
25 | from lab4d.utils.geom_utils import K2inv, K2mat
26 | from lab4d.utils.vis_utils import draw_cams
27 | 
28 | 
29 | def camera_registration(seqname, component_id):
30 |     imgdir = "database/processed/JPEGImages/Full-Resolution/%s" % seqname
31 |     imglist = sorted(glob.glob("%s/*.jpg" % imgdir))
32 |     delta = 1
33 |     crop_size = 256
34 |     use_full = True
35 |     registration_type = "procrustes"
36 | 
37 |     # get camera intrinsics
38 |     raw_shape = cv2.imread(imglist[0]).shape[:2]
39 |     max_l = max(raw_shape)
40 |     Kraw = np.array([max_l, max_l, raw_shape[1] / 2, raw_shape[0] / 2])
41 |     Kraw = K2mat(Kraw)
42 | 
43 |     cam_current = np.eye(4)  # scene to camera: I, R01 I, R12 R01 I, ...
44 |     cams = [cam_current]
45 |     for im0idx in range(len(imglist)):
46 |         if im0idx + delta >= len(imglist):
47 |             continue
48 |         # TODO: load croped images directly
49 |         frameid0 = int(imglist[im0idx].split("/")[-1].split(".")[0])
50 |         frameid1 = int(imglist[im0idx + delta].split("/")[-1].split(".")[0])
51 |         # print("%s %d %d" % (seqname, frameid0, frameid1))
52 |         data_dict0 = read_raw(imglist[im0idx], delta, crop_size, use_full)
53 |         data_dict1 = read_raw(imglist[im0idx + delta], -delta, crop_size, use_full)
54 |         flow_process(data_dict0, data_dict1)
55 | 
56 |         # compute intrincs for the cropped images
57 |         K0 = K2inv(data_dict0["crop2raw"]) @ Kraw
58 |         K1 = K2inv(data_dict1["crop2raw"]) @ Kraw
59 | 
60 |         # get mask
61 |         mask = data_dict0["mask"][..., 0].astype(int) == component_id
62 |         if component_id > 0:
63 |             # reduce the mask to the largest connected component
64 |             mask = reduce_component(mask)
65 |         else:
66 |             # for background, additionally remove flow with low confidence
67 |             mask = np.logical_and(mask, data_dict0["flow"][..., 2] > 0).flatten()
68 |         cam_0_to_1 = two_frame_registration(
69 |             data_dict0["depth"],
70 |             data_dict1["depth"],
71 |             data_dict0["flow"],
72 |             K0,
73 |             K1,
74 |             mask,
75 |             registration_type,
76 |         )
77 |         cam_current = cam_0_to_1 @ cam_current
78 |         cams.append(cam_current)
79 | 
80 |     os.makedirs(imgdir.replace("JPEGImages", "Cameras"), exist_ok=True)
81 |     save_path = imgdir.replace("JPEGImages", "Cameras")
82 |     # for idx, img_path in enumerate(sorted(glob.glob("%s/*.jpg" % imgdir))):
83 |     #     frameid = int(img_path.split("/")[-1].split(".")[0])
84 |     #     campath = "%s/%05d-%02d.txt" % (save_path, frameid, component_id)
85 |     #     np.savetxt(campath, cams[idx])
86 |     np.save("%s/%02d.npy" % (save_path, component_id), cams)
87 |     mesh_cam = draw_cams(cams)
88 |     mesh_cam.export("%s/cameras-%02d.obj" % (save_path, component_id))
89 | 
90 |     print("camera registration done: %s, %d" % (seqname, component_id))
91 | 
92 | 
93 | if __name__ == "__main__":
94 |     seqname = sys.argv[1]
95 |     component_id = int(sys.argv[2])  # 0: bg, 1: fg
96 | 
97 |     camera_registration(seqname, component_id)
98 | 


--------------------------------------------------------------------------------
/preprocess/scripts/compute_diff.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | # python preprocess/scripts/compute_diff.py database/processed/JPEGImages/Full-Resolution/cat-pikachu-0000/ database/processed/JPEGImages/Full-Resolution/2023-04-19-01-36-53-cat-pikachu-0000/
 3 | import glob
 4 | import sys
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | path1 = sys.argv[1]
10 | path2 = sys.argv[2]
11 | 
12 | for path1, path2 in zip(
13 |     sorted(glob.glob(path1 + "/*")), sorted(glob.glob(path2 + "/*"))
14 | ):
15 |     print(path1, path2)
16 | 
17 |     if path1.endswith(".npy"):
18 |         t1 = np.load(path1).astype(np.float32)
19 |         t2 = np.load(path2).astype(np.float32)
20 |     elif path1.endswith(".jpg"):
21 |         t1 = cv2.imread(path1).astype(np.float32)
22 |         t2 = cv2.imread(path2).astype(np.float32)
23 |     elif path1.endswith(".txt"):
24 |         t1 = np.loadtxt(path1)
25 |         t2 = np.loadtxt(path2)
26 |     else:
27 |         raise NotImplementedError
28 | 
29 |     print(np.mean(np.abs(t1 - t2)))
30 | 


--------------------------------------------------------------------------------
/preprocess/scripts/crop.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | # python preprocess/scripts/crop.py $seqname 256 1
  3 | import glob
  4 | import os
  5 | import sys
  6 | 
  7 | import numpy as np
  8 | from tqdm import tqdm
  9 | 
 10 | sys.path.insert(
 11 |     0,
 12 |     "%s/../" % os.path.join(os.path.dirname(__file__)),
 13 | )
 14 | 
 15 | from libs.io import flow_process, read_raw
 16 | 
 17 | 
 18 | def extract_crop(seqname, crop_size, use_full):
 19 |     if use_full:
 20 |         save_prefix = "full"
 21 |     else:
 22 |         save_prefix = "crop"
 23 |     save_prefix = "%s-%d" % (save_prefix, crop_size)
 24 | 
 25 |     delta_list = [1, 2, 4, 8]
 26 | 
 27 |     flowfw_list = {delta: [] for delta in delta_list}
 28 |     flowbw_list = {delta: [] for delta in delta_list}
 29 |     rgb_list = []
 30 |     mask_list = []
 31 |     depth_list = []
 32 |     crop2raw_list = []
 33 |     is_detected_list = []
 34 | 
 35 |     imglist = sorted(
 36 |         glob.glob("database/processed/JPEGImages/Full-Resolution/%s/*.jpg" % seqname)
 37 |     )
 38 |     for im0idx in tqdm(range(len(imglist))):
 39 |         for delta in delta_list:
 40 |             if im0idx % delta != 0:
 41 |                 continue
 42 |             if im0idx + delta >= len(imglist):
 43 |                 continue
 44 |             # print("%s %d %d" % (seqname, frameid0, frameid1))
 45 |             data_dict0 = read_raw(imglist[im0idx], delta, crop_size, use_full)
 46 |             data_dict1 = read_raw(imglist[im0idx + delta], -delta, crop_size, use_full)
 47 |             flow_process(data_dict0, data_dict1)
 48 | 
 49 |             # save img, mask, vis2d
 50 |             if delta == 1:
 51 |                 rgb_list.append(data_dict0["img"])
 52 |                 mask_list.append(data_dict0["mask"])
 53 |                 depth_list.append(data_dict0["depth"])
 54 |                 crop2raw_list.append(data_dict0["crop2raw"])
 55 |                 is_detected_list.append(data_dict0["is_detected"])
 56 | 
 57 |                 if im0idx == len(imglist) - 2:
 58 |                     rgb_list.append(data_dict1["img"])
 59 |                     mask_list.append(data_dict1["mask"])
 60 |                     depth_list.append(data_dict1["depth"])
 61 |                     crop2raw_list.append(data_dict1["crop2raw"])
 62 |                     is_detected_list.append(data_dict1["is_detected"])
 63 | 
 64 |             flowfw_list[delta].append(data_dict0["flow"])
 65 |             flowbw_list[delta].append(data_dict1["flow"])
 66 | 
 67 |     # save cropped data
 68 |     for delta in delta_list:
 69 |         if len(flowfw_list[delta]) == 0:
 70 |             continue
 71 |         np.save(
 72 |             "database/processed/FlowFW_%d/Full-Resolution/%s/%s.npy"
 73 |             % (delta, seqname, save_prefix),
 74 |             np.stack(flowfw_list[delta], 0),
 75 |         )
 76 |         np.save(
 77 |             "database/processed/FlowBW_%d/Full-Resolution/%s/%s.npy"
 78 |             % (delta, seqname, save_prefix),
 79 |             np.stack(flowbw_list[delta], 0),
 80 |         )
 81 | 
 82 |     np.save(
 83 |         "database/processed/JPEGImages/Full-Resolution/%s/%s.npy"
 84 |         % (seqname, save_prefix),
 85 |         np.stack(rgb_list, 0),
 86 |     )
 87 |     np.save(
 88 |         "database/processed/Annotations/Full-Resolution/%s/%s.npy"
 89 |         % (seqname, save_prefix),
 90 |         np.stack(mask_list, 0),
 91 |     )
 92 | 
 93 |     np.save(
 94 |         "database/processed/Depth/Full-Resolution/%s/%s.npy" % (seqname, save_prefix),
 95 |         np.stack(depth_list, 0),
 96 |     )
 97 | 
 98 |     np.save(
 99 |         "database/processed/Annotations/Full-Resolution/%s/%s-crop2raw.npy"
100 |         % (seqname, save_prefix),
101 |         np.stack(crop2raw_list, 0),
102 |     )
103 | 
104 |     np.save(
105 |         "database/processed/Annotations/Full-Resolution/%s/%s-is_detected.npy"
106 |         % (seqname, save_prefix),
107 |         np.stack(is_detected_list, 0),
108 |     )
109 | 
110 |     print("crop (size: %d, full: %d) done: %s" % (crop_size, use_full, seqname))
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     seqname = sys.argv[1]
115 |     crop_size = int(sys.argv[2])
116 |     use_full = bool(int(sys.argv[3]))
117 | 
118 |     extract_crop(seqname, crop_size, use_full)
119 | 


--------------------------------------------------------------------------------
/preprocess/scripts/depth.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | # python preprocess/scripts/depth.py 2023-03-30-21-20-57-cat-pikachu-5-0000
 3 | import glob
 4 | import os
 5 | import sys
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | import trimesh
10 | from PIL import Image
11 | 
12 | sys.path.insert(
13 |     0,
14 |     "%s/../" % os.path.join(os.path.dirname(__file__)),
15 | )
16 | 
17 | 
18 | from libs.utils import resize_to_target
19 | 
20 | 
21 | def depth2pts(depth):
22 |     Kmat = np.eye(3)
23 |     Kmat[0, 0] = depth.shape[0]
24 |     Kmat[1, 1] = depth.shape[0]
25 |     Kmat[0, 2] = depth.shape[1] / 2
26 |     Kmat[1, 2] = depth.shape[0] / 2
27 | 
28 |     xy = np.meshgrid(np.arange(depth.shape[1]), np.arange(depth.shape[0]))
29 |     hxy = np.stack(
30 |         [xy[0].flatten(), xy[1].flatten(), np.ones_like(xy[0].flatten())], axis=0
31 |     )
32 |     hxy = np.linalg.inv(Kmat) @ hxy
33 |     xyz = hxy * depth.flatten()
34 |     return xyz.T
35 | 
36 | 
37 | def extract_depth(seqname):
38 |     image_dir = "database/processed/JPEGImages/Full-Resolution/%s/" % seqname
39 |     output_dir = image_dir.replace("JPEGImages", "Depth")
40 | 
41 |     # torch.hub.help(
42 |     #     "intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True
43 |     # )  # Triggers fresh download of MiDaS repo
44 | 
45 |     model_zoe_nk = torch.hub.load("isl-org/ZoeDepth", "ZoeD_NK", pretrained=True)
46 |     zoe = model_zoe_nk.to("cuda")
47 | 
48 |     os.makedirs(output_dir, exist_ok=True)
49 |     for img_path in sorted(glob.glob(f"{image_dir}/*.jpg")):
50 |         # print(img_path)
51 |         image = Image.open(img_path)
52 |         depth = zoe.infer_pil(image)
53 |         depth = resize_to_target(depth, is_flow=False).astype(np.float16)
54 |         out_path = f"{output_dir}/{os.path.basename(img_path).replace('.jpg', '.npy')}"
55 |         np.save(out_path, depth)
56 |         # pts = depth2pts(depth)
57 | 
58 |     print("zoe depth done: ", seqname)
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     seqname = sys.argv[1]
63 | 
64 |     extract_depth(seqname)
65 | 


--------------------------------------------------------------------------------
/preprocess/scripts/download.py:
--------------------------------------------------------------------------------
 1 | # Usage:
 2 | # python preprocess/scripts/download.py <seqname>
 3 | import os, sys
 4 | import shutil
 5 | import subprocess
 6 | import zipfile
 7 | 
 8 | 
 9 | def download_seq(seqname):
10 |     datadir = os.path.join("database", "raw", seqname)
11 |     if os.path.exists(datadir):
12 |         print(f"Deleting existing directory: {datadir}")
13 |         shutil.rmtree(datadir)
14 | 
15 |     url_path = os.path.join("database", "vid_data", f"{seqname}.txt")
16 |     if not os.path.exists(url_path):
17 |         # specify the folder of videos
18 |         print(f"URL file does not exist: {url_path}")
19 |         # ask for user input
20 |         vid_path = "video_folder"
21 |         while not os.path.isdir(vid_path):
22 |             vid_path = input("Enter the path to video folder:")
23 |         # copy folder to datadir
24 |         print(f"Copying from directory: {vid_path} to {datadir}")
25 |         shutil.copytree(vid_path, datadir)
26 |     else:
27 |         with open(url_path, "r") as f:
28 |             url = f.read().strip()
29 | 
30 |         # Download the video
31 |         print(f"Downloading from URL: {url}")
32 |         tmp_zip = "tmp-%s.zip" % seqname
33 |         subprocess.run(
34 |             ["wget", url, "-O", tmp_zip],
35 |             stdout=subprocess.DEVNULL,
36 |             stderr=subprocess.DEVNULL,
37 |         )
38 | 
39 |         # Unzip the file
40 |         os.makedirs(datadir)
41 |         print(f"Unzipping to directory: {datadir}")
42 |         with zipfile.ZipFile(tmp_zip, "r") as zip_ref:
43 |             zip_ref.extractall(datadir)
44 | 
45 |         # Remove the zip file
46 |         os.remove(tmp_zip)
47 | 
48 | 
49 | def main():
50 |     # Get sequence name from command line arguments
51 |     if len(sys.argv) > 1:
52 |         seqname = sys.argv[1]
53 |         download_seq(seqname)
54 |     else:
55 |         print("Usage: python preprocess/scripts/download.py <seqname>")
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     main()
60 | 


--------------------------------------------------------------------------------
/preprocess/scripts/extract_frames.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | # python preprocess/scripts/extract_frames.py database/raw/cat-1015/10415567.mp4 tmp/
 3 | import sys
 4 | 
 5 | import imageio
 6 | import numpy as np
 7 | 
 8 | 
 9 | def extract_frames(in_path, out_path):
10 |     print("extracting frames: ", in_path)
11 |     # Open the video file
12 |     reader = imageio.get_reader(in_path)
13 | 
14 |     # Find the first non-black frame
15 |     for i, im in enumerate(reader):
16 |         if np.any(im > 0):
17 |             start_frame = i
18 |             break
19 | 
20 |     # Write the video starting from the first non-black frame
21 |     count = 0
22 |     for i, im in enumerate(reader):
23 |         if i >= start_frame:
24 |             imageio.imsave("%s/%05d.jpg" % (out_path, count), im)
25 |             count += 1
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     in_path = sys.argv[1]
30 |     out_path = sys.argv[2]
31 |     extract_frames(in_path, out_path)
32 | 


--------------------------------------------------------------------------------
/preprocess/scripts/tsdf_fusion.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | # python preprocess/scripts/tsdf_fusion.py 2023-04-03-18-02-32-cat-pikachu-5-0000 0
  3 | import glob
  4 | import os
  5 | import sys
  6 | 
  7 | import cv2
  8 | import numpy as np
  9 | import trimesh
 10 | 
 11 | sys.path.insert(
 12 |     0,
 13 |     "%s/../third_party" % os.path.join(os.path.dirname(__file__)),
 14 | )
 15 | 
 16 | 
 17 | sys.path.insert(
 18 |     0,
 19 |     "%s/../" % os.path.join(os.path.dirname(__file__)),
 20 | )
 21 | 
 22 | sys.path.insert(
 23 |     0,
 24 |     "%s/../../" % os.path.join(os.path.dirname(__file__)),
 25 | )
 26 | 
 27 | import fusion
 28 | from libs.io import read_frame_data
 29 | 
 30 | from lab4d.utils.geom_utils import K2inv, K2mat
 31 | from lab4d.utils.vis_utils import draw_cams
 32 | 
 33 | # def read_cam(imgpath, component_id):
 34 | #     campath = imgpath.replace("JPEGImages", "Cameras").replace(
 35 | #         ".jpg", "-%02d.txt" % component_id
 36 | #     )
 37 | #     scene2cam = np.loadtxt(campath)
 38 | #     cam2scene = np.linalg.inv(scene2cam)
 39 | #     return cam2scene
 40 | 
 41 | 
 42 | def tsdf_fusion(seqname, component_id, crop_size=256, use_full=True):
 43 |     # load rgb/depth
 44 |     imgdir = "database/processed/JPEGImages/Full-Resolution/%s" % seqname
 45 |     imglist = sorted(glob.glob("%s/*.jpg" % imgdir))
 46 | 
 47 |     # camera path
 48 |     save_path = imgdir.replace("JPEGImages", "Cameras")
 49 |     save_path = "%s/%02d.npy" % (save_path, component_id)
 50 |     cams_prev = np.load(save_path)
 51 | 
 52 |     # get camera intrinsics
 53 |     raw_shape = cv2.imread(imglist[0]).shape[:2]
 54 |     max_l = max(raw_shape)
 55 |     Kraw = np.array([max_l, max_l, raw_shape[1] / 2, raw_shape[0] / 2])
 56 |     Kraw = K2mat(Kraw)
 57 | 
 58 |     # initialize volume
 59 |     vol_bnds = np.zeros((3, 2))
 60 |     for it, imgpath in enumerate(imglist[:-1]):
 61 |         rgb, depth, mask, crop2raw = read_frame_data(
 62 |             imgpath, crop_size, use_full, component_id
 63 |         )
 64 |         K0 = K2inv(crop2raw) @ Kraw
 65 |         # cam2scene = read_cam(imgpath, component_id)
 66 |         cam2scene = np.linalg.inv(cams_prev[it])
 67 |         depth[~mask] = 0
 68 |         depth[depth > 10] = 0
 69 |         view_frust_pts = fusion.get_view_frustum(depth, K0, cam2scene)
 70 |         vol_bnds[:, 0] = np.minimum(vol_bnds[:, 0], np.amin(view_frust_pts, axis=1))
 71 |         vol_bnds[:, 1] = np.maximum(vol_bnds[:, 1], np.amax(view_frust_pts, axis=1))
 72 |     tsdf_vol = fusion.TSDFVolume(vol_bnds, voxel_size=0.2, use_gpu=False)
 73 | 
 74 |     # fusion
 75 |     for it, imgpath in enumerate(imglist[:-1]):
 76 |         # print(imgpath)
 77 |         rgb, depth, mask, crop2raw = read_frame_data(
 78 |             imgpath, crop_size, use_full, component_id
 79 |         )
 80 |         K0 = K2inv(crop2raw) @ Kraw
 81 |         depth[~mask] = 0
 82 |         # cam2scene = read_cam(imgpath, component_id)
 83 |         cam2scene = np.linalg.inv(cams_prev[it])
 84 |         tsdf_vol.integrate(rgb, depth, K0, cam2scene, obs_weight=1.0)
 85 | 
 86 |     save_path = imgdir.replace("JPEGImages", "Cameras")
 87 |     # get mesh, compute center
 88 |     rt = tsdf_vol.get_mesh()
 89 |     verts, faces = rt[0], rt[1]
 90 |     mesh = trimesh.Trimesh(verts, faces)
 91 |     aabb = mesh.bounds
 92 |     center = aabb.mean(0)
 93 |     mesh.vertices = mesh.vertices - center[None]
 94 |     mesh.export("%s/mesh-%02d-centered.obj" % (save_path, component_id))
 95 | 
 96 |     # save cameras
 97 |     cams = []
 98 |     for it, imgpath in enumerate(imglist):
 99 |         # campath = imgpath.replace("JPEGImages", "Cameras").replace(
100 |         #     ".jpg", "-%02d.txt" % component_id
101 |         # )
102 |         # cam = np.loadtxt(campath)
103 |         # shift the camera in the scene space
104 |         cam = np.linalg.inv(cams_prev[it])
105 |         cam[:3, 3] -= center
106 |         cam = np.linalg.inv(cam)
107 |         # np.savetxt(campath, cam)
108 |         cams.append(cam)
109 |     np.save("%s/%02d.npy" % (save_path, component_id), cams)
110 |     mesh_cam = draw_cams(cams)
111 |     mesh_cam.export("%s/cameras-%02d-centered.obj" % (save_path, component_id))
112 | 
113 |     print("tsdf fusion done: %s, %d" % (seqname, component_id))
114 | 
115 | 
116 | if __name__ == "__main__":
117 |     seqname = sys.argv[1]
118 |     component_id = int(sys.argv[2])
119 | 
120 |     tsdf_fusion(seqname, component_id)
121 | 


--------------------------------------------------------------------------------
/preprocess/scripts/write_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 2 | # python preprocess/scripts/write_config.py ${vidname}
 3 | import configparser
 4 | import glob
 5 | import os
 6 | import sys
 7 | 
 8 | import cv2
 9 | 
10 | 
11 | def write_config(collection_name):
12 |     min_nframe = 8
13 |     imgroot = "database/processed/JPEGImages/Full-Resolution/"
14 | 
15 |     config = configparser.ConfigParser()
16 |     config["data"] = {
17 |         "init_frame": "0",
18 |         "end_frame": "-1",
19 |     }
20 | 
21 |     seqname_all = sorted(
22 |         glob.glob("%s/%s-[0-9][0-9][0-9][0-9]*" % (imgroot, collection_name))
23 |     )
24 |     total_vid = 0
25 |     for i, seqname in enumerate(seqname_all):
26 |         seqname = seqname.split("/")[-1]
27 |         img = cv2.imread("%s/%s/00000.jpg" % (imgroot, seqname), 0)
28 |         num_fr = len(glob.glob("%s/%s/*.jpg" % (imgroot, seqname)))
29 |         if num_fr < min_nframe:
30 |             continue
31 | 
32 |         fl = max(img.shape)
33 |         px = img.shape[1] // 2
34 |         py = img.shape[0] // 2
35 |         camtxt = [fl, fl, px, py]
36 |         config["data_%d" % total_vid] = {
37 |             "ks": " ".join([str(i) for i in camtxt]),
38 |             "shape": " ".join([str(img.shape[0]), str(img.shape[1])]),
39 |             "img_path": "database/processed/JPEGImages/Full-Resolution/%s/" % seqname,
40 |         }
41 |         total_vid += 1
42 | 
43 |     os.makedirs("database/configs", exist_ok=True)
44 |     with open("database/configs/%s.config" % collection_name, "w") as configfile:
45 |         config.write(configfile)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     collection_name = sys.argv[1]
50 | 
51 |     write_config(collection_name)
52 | 


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/compute_flow.sh:
--------------------------------------------------------------------------------
 1 | # bash compute_flow.sh $seqname
 2 | seqname=$1
 3 | 
 4 | if [[ $seqname ]];
 5 | then  
 6 |   array=(1 2 4 8)
 7 |   for i in "${array[@]}"
 8 |   do
 9 |   python compute_flow.py --datapath ../../../database/processed/JPEGImages/Full-Resolution/$seqname/ --loadmodel ./vcn_rob.pth  --dframe $i
10 | done
11 | fi
12 | 


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/flowutils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/third_party/vcnplus/flowutils/__init__.py


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/frame_filter.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | import sys
  3 | import os
  4 | 
  5 | # insert path of current file
  6 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
  7 | 
  8 | import cv2
  9 | import pdb
 10 | import argparse
 11 | import numpy as np
 12 | import torch
 13 | import torch.nn as nn
 14 | import torch.nn.parallel
 15 | import torch.backends.cudnn as cudnn
 16 | import torch.utils.data
 17 | import glob
 18 | import shutil
 19 | 
 20 | from models.VCNplus import VCN
 21 | from models.inference import (
 22 |     load_eval_checkpoint,
 23 |     modify_flow_module,
 24 |     process_flow_input,
 25 |     make_disc_aux,
 26 | )
 27 | 
 28 | cudnn.benchmark = True
 29 | 
 30 | 
 31 | def frame_filter(seqname, outdir):
 32 |     print("Filtering frames for %s" % (seqname))
 33 |     model_path = "./preprocess/third_party/vcnplus/vcn_rob.pth"
 34 |     maxdisp = 256  # maxium disparity. Only affect the coarsest cost volume size
 35 |     fac = (
 36 |         1  # controls the shape of search grid. Only affect the coarse cost volume size
 37 |     )
 38 |     flow_threshold = 0.05  # flow threshold that controls frame skipping
 39 |     max_frames = 500  # maximum number of frames to keep (to avoid oom in tracking etc.)
 40 | 
 41 |     # construct model
 42 |     model = load_eval_checkpoint(model_path, maxdisp=maxdisp, fac=fac)
 43 | 
 44 |     # input and output images
 45 |     img_paths = sorted(
 46 |         glob.glob("%s/JPEGImagesRaw/Full-Resolution/%s/*.jpg" % (outdir, seqname))
 47 |     )
 48 |     output_path = "%s/JPEGImages/Full-Resolution/%s/" % (outdir, seqname)
 49 |     output_idxs = []
 50 | 
 51 |     # load image 0 and compute resize ratio
 52 |     img0_o = cv2.imread(img_paths[0])[:, :, ::-1]
 53 |     output_idxs.append(0)
 54 | 
 55 |     input_size = img0_o.shape
 56 |     inp_h, inp_w, _ = input_size
 57 |     max_res = 300 * 300
 58 |     res_fac = np.sqrt(max_res / (inp_h * inp_w))
 59 |     max_h = int(np.ceil(inp_h * res_fac / 64) * 64)
 60 |     max_w = int(np.ceil(inp_w * res_fac / 64) * 64)
 61 | 
 62 |     # modify flow module according to input size
 63 |     modify_flow_module(model, max_h, max_w)
 64 |     model.eval()
 65 | 
 66 |     # find adjacent frames with sufficiently large flow
 67 |     img0, img0_noaug = process_flow_input(img0_o, model.mean_L, max_h, max_w)
 68 |     for jnx in range(1, len(img_paths)):
 69 |         img1_o = cv2.imread(img_paths[jnx])[:, :, ::-1]
 70 |         img1, img1_noaug = process_flow_input(img1_o, model.mean_R, max_h, max_w)
 71 | 
 72 |         # forward inference
 73 |         disc_aux = make_disc_aux(img0_noaug, max_h, max_w, input_size)
 74 |         with torch.no_grad():
 75 |             img01 = torch.cat([img0, img1], dim=0)
 76 |             flowfw, _, _, _ = model(img01, disc_aux)  # 1, 2, max_h, max_w
 77 | 
 78 |         flowfw[:, 0:1] /= max_w
 79 |         flowfw[:, 1:2] /= max_h
 80 | 
 81 |         maxflow = torch.max(torch.norm(flowfw[0], p=2, dim=0)).item()
 82 |         # print(jnx, "%.06f" % (maxflow))
 83 | 
 84 |         if maxflow > flow_threshold:
 85 |             output_idxs.append(jnx)
 86 |             img0_o = img1_o
 87 |             img0, img0_noaug = process_flow_input(img0_o, model.mean_L, max_h, max_w)
 88 | 
 89 |         if len(output_idxs) >= max_frames:
 90 |             break
 91 | 
 92 |     # copy selected frames to output
 93 |     if len(output_idxs) > 8:
 94 |         os.system("mkdir -p %s" % (output_path))
 95 |         for output_file in [f"{jnx:05d}.jpg" for jnx in output_idxs]:
 96 |             shutil.copy2(
 97 |                 f"{outdir}/JPEGImagesRaw/Full-Resolution/{seqname}/{output_file}",
 98 |                 output_path,
 99 |             )
100 | 
101 |         print("frame filtering done: %s" % seqname)
102 |     else:
103 |         print("lack of motion, ignored: %s" % seqname)
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     if len(sys.argv) != 3:
108 |         print(f"Usage: python {sys.argv[0]} <seqname> <outdir>")
109 |         print(f"Example: python {sys.argv[0]} cat-pikachu-0-0000 'database/processed/'")
110 |         exit()
111 |     seqname = sys.argv[1]
112 |     outdir = sys.argv[2]
113 |     frame_filter(seqname, outdir)
114 | 


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/third_party/vcnplus/models/__init__.py


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/det.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torchvision.models as models
 6 | import torch
 7 | import torch.nn as nn
 8 | import os
 9 | 
10 | from .networks.msra_resnet import get_pose_net
11 | from .networks.dlav0 import get_pose_net as get_dlav0
12 | from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
13 | from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
14 | from .networks.large_hourglass import get_large_hourglass_net
15 | 
16 | _model_factory = {
17 |   'res': get_pose_net, # default Resnet with deconv
18 |   'dlav0': get_dlav0, # default DLAup
19 |   'dla': get_dla_dcn,
20 |   'resdcn': get_pose_net_dcn,
21 |   'hourglass': get_large_hourglass_net,
22 | }
23 | 
24 | def create_model(arch, heads, head_conv,num_input):
25 |   num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
26 |   arch = arch[:arch.find('_')] if '_' in arch else arch
27 |   get_model = _model_factory[arch]
28 |   model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv,num_input=num_input)
29 |   return model
30 | 
31 | def load_model(model, model_path, optimizer=None, resume=False, 
32 |                lr=None, lr_step=None):
33 |   start_epoch = 0
34 |   checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
35 |   print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
36 |   state_dict_ = checkpoint['state_dict']
37 |   state_dict = {}
38 |   
39 |   # convert data_parallal to model
40 |   for k in state_dict_:
41 |     if k.startswith('module') and not k.startswith('module_list'):
42 |       state_dict[k[7:]] = state_dict_[k]
43 |     else:
44 |       state_dict[k] = state_dict_[k]
45 |   model_state_dict = model.state_dict()
46 | 
47 |   # check loaded parameters and created model parameters
48 |   msg = 'If you see this, your model does not fully load the ' + \
49 |         'pre-trained weight. Please make sure ' + \
50 |         'you have correctly specified --arch xxx ' + \
51 |         'or set the correct --num_classes for your own dataset.'
52 |   for k in state_dict:
53 |     if k in model_state_dict:
54 |       if state_dict[k].shape != model_state_dict[k].shape:
55 |         print('Skip loading parameter {}, required shape{}, '\
56 |               'loaded shape{}. {}'.format(
57 |           k, model_state_dict[k].shape, state_dict[k].shape, msg))
58 |         state_dict[k] = model_state_dict[k]
59 |     else:
60 |       print('Drop parameter {}.'.format(k) + msg)
61 |   for k in model_state_dict:
62 |     if not (k in state_dict):
63 |       print('No param {}.'.format(k) + msg)
64 |       state_dict[k] = model_state_dict[k]
65 |   model.load_state_dict(state_dict, strict=False)
66 | 
67 |   # resume optimizer parameters
68 |   if optimizer is not None and resume:
69 |     if 'optimizer' in checkpoint:
70 |       optimizer.load_state_dict(checkpoint['optimizer'])
71 |       start_epoch = checkpoint['epoch']
72 |       start_lr = lr
73 |       for step in lr_step:
74 |         if start_epoch >= step:
75 |           start_lr *= 0.1
76 |       for param_group in optimizer.param_groups:
77 |         param_group['lr'] = start_lr
78 |       print('Resumed optimizer with start lr', start_lr)
79 |     else:
80 |       print('No optimizer parameters in checkpoint.')
81 |   if optimizer is not None:
82 |     return model, optimizer, start_epoch
83 |   else:
84 |     return model
85 | 
86 | def save_model(path, epoch, model, optimizer=None):
87 |   if isinstance(model, torch.nn.DataParallel):
88 |     state_dict = model.module.state_dict()
89 |   else:
90 |     state_dict = model.state_dict()
91 |   data = {'epoch': epoch,
92 |           'state_dict': state_dict}
93 |   if not (optimizer is None):
94 |     data['optimizer'] = optimizer.state_dict()
95 |   torch.save(data, path)
96 | 
97 | 


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/det_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | def _sigmoid(x):
 9 |   y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
10 |   return y
11 | 
12 | def _gather_feat(feat, ind, mask=None):
13 |     dim  = feat.size(2)
14 |     ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
15 |     feat = feat.gather(1, ind)
16 |     if mask is not None:
17 |         mask = mask.unsqueeze(2).expand_as(feat)
18 |         feat = feat[mask]
19 |         feat = feat.view(-1, dim)
20 |     return feat
21 | 
22 | def _transpose_and_gather_feat(feat, ind):
23 |     feat = feat.permute(0, 2, 3, 1).contiguous()
24 |     feat = feat.view(feat.size(0), -1, feat.size(3))
25 |     feat = _gather_feat(feat, ind)
26 |     return feat
27 | 
28 | def flip_tensor(x):
29 |     return torch.flip(x, [3])
30 |     # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
31 |     # return torch.from_numpy(tmp).to(x.device)
32 | 
33 | def flip_lr(x, flip_idx):
34 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
35 |   shape = tmp.shape
36 |   for e in flip_idx:
37 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
38 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
39 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)
40 | 
41 | def flip_lr_off(x, flip_idx):
42 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
43 |   shape = tmp.shape
44 |   tmp = tmp.reshape(tmp.shape[0], 17, 2, 
45 |                     tmp.shape[2], tmp.shape[3])
46 |   tmp[:, :, 0, :, :] *= -1
47 |   for e in flip_idx:
48 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
49 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
50 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/.gitignore:
--------------------------------------------------------------------------------
1 | DCNv2/build
2 | 


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .idea
3 | *.so
4 | *.o
5 | *pyc
6 | _ext
7 | build
8 | DCNv2.egg-info
9 | dist


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/__init__.py:
--------------------------------------------------------------------------------
1 | from .dcn_v2 import *
2 | 


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*!
 3 |  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
 4 |  *
 5 |  * COPYRIGHT
 6 |  *
 7 |  * All contributions by the University of California:
 8 |  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
 9 |  * All rights reserved.
10 |  *
11 |  * All other contributions:
12 |  * Copyright (c) 2014-2017, the respective contributors
13 |  * All rights reserved.
14 |  *
15 |  * Caffe uses a shared copyright model: each contributor holds copyright over
16 |  * their contributions to Caffe. The project versioning records all such
17 |  * contribution and copyright details. If a contributor wants to further mark
18 |  * their specific copyright on a particular contribution, they should indicate
19 |  * their copyright solely in the commit message of the change when it is
20 |  * committed.
21 |  *
22 |  * LICENSE
23 |  *
24 |  * Redistribution and use in source and binary forms, with or without
25 |  * modification, are permitted provided that the following conditions are met:
26 |  *
27 |  * 1. Redistributions of source code must retain the above copyright notice, this
28 |  * list of conditions and the following disclaimer.
29 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
30 |  * this list of conditions and the following disclaimer in the documentation
31 |  * and/or other materials provided with the distribution.
32 |  *
33 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
37 |  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
40 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 |  *
44 |  * CONTRIBUTION AGREEMENT
45 |  *
46 |  * By contributing to the BVLC/caffe repository through pull-request, comment,
47 |  * or otherwise, the contributor releases their content to the
48 |  * license and copyright terms herein.
49 |  *
50 |  ***************** END Caffe Copyright Notice and Disclaimer ********************
51 |  *
52 |  * Copyright (c) 2018 Microsoft
53 |  * Licensed under The MIT License [see LICENSE for details]
54 |  * \file modulated_deformable_im2col.h
55 |  * \brief Function definitions of converting an image to
56 |  * column matrix based on kernel, padding, dilation, and offset.
57 |  * These functions are mainly used in deformable convolution operators.
58 |  * \ref: https://arxiv.org/abs/1811.11168
59 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
60 |  */
61 | 
62 | /***************** Adapted by Charles Shang *********************/
63 | // modified from the CUDA version for CPU use by Daniel K. Suhendro
64 | 
65 | #ifndef DCN_V2_IM2COL_CPU
66 | #define DCN_V2_IM2COL_CPU
67 | 
68 | #ifdef __cplusplus
69 | extern "C"
70 | {
71 | #endif
72 | 
73 |   void modulated_deformable_im2col_cpu(const float *data_im, const float *data_offset, const float *data_mask,
74 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
75 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
76 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
77 |                                         const int dilation_h, const int dilation_w,
78 |                                         const int deformable_group, float *data_col);
79 | 
80 |   void modulated_deformable_col2im_cpu(const float *data_col, const float *data_offset, const float *data_mask,
81 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
82 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
83 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
84 |                                         const int dilation_h, const int dilation_w,
85 |                                         const int deformable_group, float *grad_im);
86 | 
87 |   void modulated_deformable_col2im_coord_cpu(const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
88 |                                          const int batch_size, const int channels, const int height_im, const int width_im,
89 |                                          const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
90 |                                          const int pad_h, const int pad_w, const int stride_h, const int stride_w,
91 |                                          const int dilation_h, const int dilation_w,
92 |                                          const int deformable_group,
93 |                                          float *grad_offset, float *grad_mask);
94 | 
95 | #ifdef __cplusplus
96 | }
97 | #endif
98 | 
99 | #endif


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | at::Tensor
 5 | dcn_v2_cpu_forward(const at::Tensor &input,
 6 |                     const at::Tensor &weight,
 7 |                     const at::Tensor &bias,
 8 |                     const at::Tensor &offset,
 9 |                     const at::Tensor &mask,
10 |                     const int kernel_h,
11 |                     const int kernel_w,
12 |                     const int stride_h,
13 |                     const int stride_w,
14 |                     const int pad_h,
15 |                     const int pad_w,
16 |                     const int dilation_h,
17 |                     const int dilation_w,
18 |                     const int deformable_group);
19 | 
20 | std::vector<at::Tensor>
21 | dcn_v2_cpu_backward(const at::Tensor &input,
22 |                      const at::Tensor &weight,
23 |                      const at::Tensor &bias,
24 |                      const at::Tensor &offset,
25 |                      const at::Tensor &mask,
26 |                      const at::Tensor &grad_output,
27 |                      int kernel_h, int kernel_w,
28 |                      int stride_h, int stride_w,
29 |                      int pad_h, int pad_w,
30 |                      int dilation_h, int dilation_w,
31 |                      int deformable_group);
32 | 
33 | 
34 | std::tuple<at::Tensor, at::Tensor>
35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
36 |                                   const at::Tensor &bbox,
37 |                                   const at::Tensor &trans,
38 |                                   const int no_trans,
39 |                                   const float spatial_scale,
40 |                                   const int output_dim,
41 |                                   const int group_size,
42 |                                   const int pooled_size,
43 |                                   const int part_size,
44 |                                   const int sample_per_part,
45 |                                   const float trans_std);
46 | 
47 | std::tuple<at::Tensor, at::Tensor>
48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
49 |                                    const at::Tensor &input,
50 |                                    const at::Tensor &bbox,
51 |                                    const at::Tensor &trans,
52 |                                    const at::Tensor &top_count,
53 |                                    const int no_trans,
54 |                                    const float spatial_scale,
55 |                                    const int output_dim,
56 |                                    const int group_size,
57 |                                    const int pooled_size,
58 |                                    const int part_size,
59 |                                    const int sample_per_part,
60 |                                    const float trans_std);


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | at::Tensor
 5 | dcn_v2_cuda_forward(const at::Tensor &input,
 6 |                     const at::Tensor &weight,
 7 |                     const at::Tensor &bias,
 8 |                     const at::Tensor &offset,
 9 |                     const at::Tensor &mask,
10 |                     const int kernel_h,
11 |                     const int kernel_w,
12 |                     const int stride_h,
13 |                     const int stride_w,
14 |                     const int pad_h,
15 |                     const int pad_w,
16 |                     const int dilation_h,
17 |                     const int dilation_w,
18 |                     const int deformable_group);
19 | 
20 | std::vector<at::Tensor>
21 | dcn_v2_cuda_backward(const at::Tensor &input,
22 |                      const at::Tensor &weight,
23 |                      const at::Tensor &bias,
24 |                      const at::Tensor &offset,
25 |                      const at::Tensor &mask,
26 |                      const at::Tensor &grad_output,
27 |                      int kernel_h, int kernel_w,
28 |                      int stride_h, int stride_w,
29 |                      int pad_h, int pad_w,
30 |                      int dilation_h, int dilation_w,
31 |                      int deformable_group);
32 | 
33 | 
34 | std::tuple<at::Tensor, at::Tensor>
35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,
36 |                                   const at::Tensor &bbox,
37 |                                   const at::Tensor &trans,
38 |                                   const int no_trans,
39 |                                   const float spatial_scale,
40 |                                   const int output_dim,
41 |                                   const int group_size,
42 |                                   const int pooled_size,
43 |                                   const int part_size,
44 |                                   const int sample_per_part,
45 |                                   const float trans_std);
46 | 
47 | std::tuple<at::Tensor, at::Tensor>
48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,
49 |                                    const at::Tensor &input,
50 |                                    const at::Tensor &bbox,
51 |                                    const at::Tensor &trans,
52 |                                    const at::Tensor &top_count,
53 |                                    const int no_trans,
54 |                                    const float spatial_scale,
55 |                                    const int output_dim,
56 |                                    const int group_size,
57 |                                    const int pooled_size,
58 |                                    const int part_size,
59 |                                    const int sample_per_part,
60 |                                    const float trans_std);


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/DCN/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "dcn_v2.h"
 3 | 
 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 5 |   m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward");
 6 |   m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward");
 7 |   m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward");
 8 |   m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward");
 9 | }
10 | 


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Charles Shang
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/README.md:
--------------------------------------------------------------------------------
 1 | ## Deformable Convolutional Networks V2 with Pytorch 1.X
 2 | 
 3 | ### Build
 4 | ```bash
 5 |     ./make.sh         # build
 6 |     python testcpu.py    # run examples and gradient check on cpu
 7 |     python testcuda.py   # run examples and gradient check on gpu 
 8 | ```
 9 | ### Note
10 | Now the master branch is for pytorch 1.x, you can switch back to pytorch 0.4 with,
11 | ```bash
12 | git checkout pytorch_0.4
13 | ```
14 | 
15 | ### Known Issues:
16 | 
17 | - [x] Gradient check w.r.t offset (solved)
18 | - [ ] Backward is not reentrant (minor)
19 | 
20 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
21 | 
22 | Update: all gradient check passes with **double** precision. 
23 | 
24 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 
25 | float `<1e-15` for double), 
26 | so it may not be a serious problem (?)
27 | 
28 | Please post an issue or PR if you have any comments.
29 | 


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python setup.py build develop
3 | 


--------------------------------------------------------------------------------
/preprocess/third_party/vcnplus/models/networks/DCNv2/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import glob
 5 | 
 6 | import torch
 7 | 
 8 | from torch.utils.cpp_extension import CUDA_HOME
 9 | from torch.utils.cpp_extension import CppExtension
10 | from torch.utils.cpp_extension import CUDAExtension
11 | 
12 | from setuptools import find_packages
13 | from setuptools import setup
14 | 
15 | requirements = ["torch", "torchvision"]
16 | 
17 | 
18 | def get_extensions():
19 |     this_dir = os.path.dirname(os.path.abspath(__file__))
20 |     extensions_dir = os.path.join(this_dir, "DCN", "src")
21 | 
22 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
23 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
24 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
25 |     
26 |     #os.environ["CC"] = "g++"
27 |     sources = main_file + source_cpu
28 |     extension = CppExtension
29 |     extra_compile_args = {'cxx': ['-std=c++14']}
30 |     define_macros = []
31 | 
32 |     
33 |     #if torch.cuda.is_available() and CUDA_HOME is not None:
34 |     if torch.cuda.is_available():
35 |         extension = CUDAExtension
36 |         sources += source_cuda
37 |         define_macros += [("WITH_CUDA", None)]
38 |         extra_compile_args["nvcc"] = [
39 |             "-DCUDA_HAS_FP16=1",
40 |             "-D__CUDA_NO_HALF_OPERATORS__",
41 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
42 |             "-D__CUDA_NO_HALF2_OPERATORS__",
43 |         ]
44 |     else:
45 |         #raise NotImplementedError('Cuda is not available')
46 |         pass
47 |     
48 | 
49 |     sources = [os.path.join(extensions_dir, s) for s in sources]
50 |     include_dirs = [extensions_dir]
51 |     ext_modules = [
52 |         extension(
53 |             "_ext",
54 |             sources,
55 |             include_dirs=include_dirs,
56 |             define_macros=define_macros,
57 |             extra_compile_args=extra_compile_args,
58 |         )
59 |     ]
60 |     return ext_modules
61 | 
62 | setup(
63 |     name="DCNv2",
64 |     version="0.1",
65 |     author="charlesshang",
66 |     url="https://github.com/charlesshang/DCNv2",
67 |     description="deformable convolutional networks",
68 |     packages=find_packages(exclude=("configs", "tests",)),
69 |     # install_requires=requirements,
70 |     ext_modules=get_extensions(),
71 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
72 | )
73 | 


--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lab4d-org/lab4d/e62976600bcc4b6f4f08cf059701c1c603c9c34a/preprocess/third_party/viewpoint/__init__.py


--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/configs/cse/Base-DensePose-RCNN-FPN-Human.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-DensePose-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   ROI_DENSEPOSE_HEAD:
 4 |     CSE:
 5 |       EMBEDDERS:
 6 |         "smpl_27554":
 7 |           TYPE: vertex_feature
 8 |           NUM_VERTICES: 27554
 9 |           FEATURE_DIM: 256
10 |           FEATURES_TRAINABLE: False
11 |           IS_TRAINABLE: True
12 |           INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_smpl_27554_256.pkl"
13 | DATASETS:
14 |   TRAIN:
15 |     - "densepose_coco_2014_train_cse"
16 |     - "densepose_coco_2014_valminusminival_cse"
17 |   TEST:
18 |     - "densepose_coco_2014_minival_cse"
19 |   CLASS_TO_MESH_NAME_MAPPING:
20 |     "0": "smpl_27554"
21 | 


--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/configs/cse/Base-DensePose-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | VERSION: 2
 2 | MODEL:
 3 |   META_ARCHITECTURE: "GeneralizedRCNN"
 4 |   BACKBONE:
 5 |     NAME: "build_resnet_fpn_backbone"
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
10 |   ANCHOR_GENERATOR:
11 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
12 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
13 |   RPN:
14 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
15 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
16 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
17 |     # Detectron1 uses 2000 proposals per-batch,
18 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
19 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
20 |     POST_NMS_TOPK_TRAIN: 1000
21 |     POST_NMS_TOPK_TEST: 1000
22 | 
23 |   DENSEPOSE_ON: True
24 |   ROI_HEADS:
25 |     NAME: "DensePoseROIHeads"
26 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
27 |     NUM_CLASSES: 1
28 |   ROI_BOX_HEAD:
29 |     NAME: "FastRCNNConvFCHead"
30 |     NUM_FC: 2
31 |     POOLER_RESOLUTION: 7
32 |     POOLER_SAMPLING_RATIO: 2
33 |     POOLER_TYPE: "ROIAlign"
34 |   ROI_DENSEPOSE_HEAD:
35 |     NAME: "DensePoseV1ConvXHead"
36 |     POOLER_TYPE: "ROIAlign"
37 |     NUM_COARSE_SEGM_CHANNELS: 2
38 |     PREDICTOR_NAME: "DensePoseEmbeddingPredictor"
39 |     LOSS_NAME: "DensePoseCseLoss"
40 |     CSE:
41 |       # embedding loss, possible values:
42 |       # - "EmbeddingLoss"
43 |       # - "SoftEmbeddingLoss"
44 |       EMBED_LOSS_NAME: "EmbeddingLoss"
45 | SOLVER:
46 |   IMS_PER_BATCH: 16
47 |   BASE_LR: 0.01
48 |   STEPS: (60000, 80000)
49 |   MAX_ITER: 90000
50 |   WARMUP_FACTOR: 0.1
51 |   CLIP_GRADIENTS:
52 |     CLIP_TYPE: norm
53 |     CLIP_VALUE: 1.0
54 |     ENABLED: true
55 |     NORM_TYPE: 2.0
56 | INPUT:
57 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
58 | DENSEPOSE_EVALUATION:
59 |   TYPE: cse
60 |   STORAGE: file
61 | 


--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-DensePose-RCNN-FPN-Human.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |   ROI_DENSEPOSE_HEAD:
 7 |     NAME: "DensePoseDeepLabHead"
 8 |     CSE:
 9 |       EMBED_LOSS_NAME: "SoftEmbeddingLoss"
10 | SOLVER:
11 |   MAX_ITER: 130000
12 |   STEPS: (100000, 120000)
13 | 


--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml:
--------------------------------------------------------------------------------
  1 | _BASE_: "Base-DensePose-RCNN-FPN.yaml"
  2 | MODEL:
  3 |   WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_s1x/250533982/model_final_2c4512.pkl"
  4 |   RESNETS:
  5 |     DEPTH: 50
  6 |   ROI_HEADS:
  7 |     NUM_CLASSES: 1
  8 |   ROI_DENSEPOSE_HEAD:
  9 |     NAME: "DensePoseV1ConvXHead"
 10 |     COARSE_SEGM_TRAINED_BY_MASKS: True
 11 |     CSE:
 12 |       EMBED_LOSS_NAME: "SoftEmbeddingLoss"
 13 |       EMBEDDING_DIST_GAUSS_SIGMA: 0.1
 14 |       GEODESIC_DIST_GAUSS_SIGMA: 0.1
 15 |       EMBEDDERS:
 16 |         "cat_5001":
 17 |           TYPE: vertex_feature
 18 |           NUM_VERTICES: 5001
 19 |           FEATURE_DIM: 256
 20 |           FEATURES_TRAINABLE: False
 21 |           IS_TRAINABLE: True
 22 |           INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_5001_256.pkl"
 23 |         "dog_5002":
 24 |           TYPE: vertex_feature
 25 |           NUM_VERTICES: 5002
 26 |           FEATURE_DIM: 256
 27 |           FEATURES_TRAINABLE: False
 28 |           IS_TRAINABLE: True
 29 |           INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_5002_256.pkl"
 30 |         "sheep_5004":
 31 |           TYPE: vertex_feature
 32 |           NUM_VERTICES: 5004
 33 |           FEATURE_DIM: 256
 34 |           FEATURES_TRAINABLE: False
 35 |           IS_TRAINABLE: True
 36 |           INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl"
 37 |         "horse_5004":
 38 |           TYPE: vertex_feature
 39 |           NUM_VERTICES: 5004
 40 |           FEATURE_DIM: 256
 41 |           FEATURES_TRAINABLE: False
 42 |           IS_TRAINABLE: True
 43 |           INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl"
 44 |         "zebra_5002":
 45 |           TYPE: vertex_feature
 46 |           NUM_VERTICES: 5002
 47 |           FEATURE_DIM: 256
 48 |           FEATURES_TRAINABLE: False
 49 |           IS_TRAINABLE: True
 50 |           INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl"
 51 |         "giraffe_5002":
 52 |           TYPE: vertex_feature
 53 |           NUM_VERTICES: 5002
 54 |           FEATURE_DIM: 256
 55 |           FEATURES_TRAINABLE: False
 56 |           IS_TRAINABLE: True
 57 |           INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl"
 58 |         "elephant_5002":
 59 |           TYPE: vertex_feature
 60 |           NUM_VERTICES: 5002
 61 |           FEATURE_DIM: 256
 62 |           FEATURES_TRAINABLE: False
 63 |           IS_TRAINABLE: True
 64 |           INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl"
 65 |         "cow_5002":
 66 |           TYPE: vertex_feature
 67 |           NUM_VERTICES: 5002
 68 |           FEATURE_DIM: 256
 69 |           FEATURES_TRAINABLE: False
 70 |           IS_TRAINABLE: True
 71 |           INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl"
 72 |         "bear_4936":
 73 |           TYPE: vertex_feature
 74 |           NUM_VERTICES: 4936
 75 |           FEATURE_DIM: 256
 76 |           FEATURES_TRAINABLE: False
 77 |           IS_TRAINABLE: True
 78 |           INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl"
 79 | DATASETS:
 80 |   TRAIN:
 81 |     - "densepose_lvis_v1_ds1_train_v1"
 82 |   TEST:
 83 |     - "densepose_lvis_v1_ds1_val_v1"
 84 |   WHITELISTED_CATEGORIES:
 85 |     "densepose_lvis_v1_ds1_train_v1":
 86 |       - 943  # sheep
 87 |       - 1202 # zebra
 88 |       - 569  # horse
 89 |       - 496  # giraffe
 90 |       - 422  # elephant
 91 |       - 80   # cow
 92 |       - 76   # bear
 93 |       - 225  # cat
 94 |       - 378  # dog
 95 |     "densepose_lvis_v1_ds1_val_v1":
 96 |       - 943  # sheep
 97 |       - 1202 # zebra
 98 |       - 569  # horse
 99 |       - 496  # giraffe
100 |       - 422  # elephant
101 |       - 80   # cow
102 |       - 76   # bear
103 |       - 225  # cat
104 |       - 378  # dog
105 |   CATEGORY_MAPS:
106 |     "densepose_lvis_v1_ds1_train_v1":
107 |       "1202": 943 # zebra -> sheep
108 |       "569": 943  # horse -> sheep
109 |       "496": 943  # giraffe -> sheep
110 |       "422": 943  # elephant -> sheep
111 |       "80": 943   # cow -> sheep
112 |       "76": 943   # bear -> sheep
113 |       "225": 943  # cat -> sheep
114 |       "378": 943  # dog -> sheep
115 |     "densepose_lvis_v1_ds1_val_v1":
116 |       "1202": 943 # zebra -> sheep
117 |       "569": 943  # horse -> sheep
118 |       "496": 943  # giraffe -> sheep
119 |       "422": 943  # elephant -> sheep
120 |       "80": 943   # cow -> sheep
121 |       "76": 943   # bear -> sheep
122 |       "225": 943  # cat -> sheep
123 |       "378": 943  # dog -> sheep
124 |   CLASS_TO_MESH_NAME_MAPPING:
125 |     # Note: different classes are mapped to a single class
126 |     # mesh is chosen based on GT data, so this is just some
127 |     # value which has no particular meaning
128 |     "0": "sheep_5004"
129 | SOLVER:
130 |   MAX_ITER: 4000
131 |   STEPS: (3000, 3500)
132 | DENSEPOSE_EVALUATION:
133 |   EVALUATE_MESH_ALIGNMENT: True
134 | 


--------------------------------------------------------------------------------
/preprocess/third_party/viewpoint/cselib.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
  3 | import cv2
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn.functional as F
  7 | import pdb
  8 | 
  9 | from detectron2.config import get_cfg
 10 | from detectron2.modeling import build_model
 11 | from detectron2.checkpoint import DetectionCheckpointer
 12 | from detectron2.structures import Boxes as create_boxes
 13 | 
 14 | import sys
 15 | 
 16 | sys.path.insert(0, "preprocess/third_party/detectron2/projects/DensePose/")
 17 | from densepose import add_densepose_config
 18 | 
 19 | 
 20 | # load model
 21 | def create_cse(is_human):
 22 |     if is_human:
 23 |         dp_config_path = "preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml"
 24 |         dp_weight_path = "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x/250713061/model_final_1d3314.pkl"
 25 |     else:
 26 |         dp_config_path = "preprocess/third_party/viewpoint/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml"
 27 |         dp_weight_path = "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k/253498611/model_final_6d69b7.pkl"
 28 | 
 29 |     cfg = get_cfg()
 30 |     add_densepose_config(cfg)
 31 |     cfg.merge_from_file(dp_config_path)
 32 |     cfg.MODEL.WEIGHTS = dp_weight_path
 33 |     model = build_model(cfg)  # returns a torch.nn.Module
 34 |     DetectionCheckpointer(model).load(
 35 |         cfg.MODEL.WEIGHTS
 36 |     )  # load a file, usually from cfg.MODEL.WEIGHTS
 37 |     return model
 38 | 
 39 | 
 40 | def preprocess_image(image, mask):
 41 |     h, w, _ = image.shape
 42 | 
 43 |     # resize
 44 |     max_size = 1333
 45 |     if h > w:
 46 |         h_rszd, w_rszd = max_size, max_size * w // h
 47 |     else:
 48 |         h_rszd, w_rszd = max_size * h // w, max_size
 49 |     image = cv2.resize(image, (w_rszd, h_rszd))
 50 |     mask = cv2.resize(mask.astype(float), (w_rszd, h_rszd)).astype(np.uint8)
 51 | 
 52 |     # pad
 53 |     h_pad = (1 + h_rszd // 32) * 32
 54 |     w_pad = (1 + w_rszd // 32) * 32
 55 |     image_tmp = np.zeros((h_pad, w_pad, 3)).astype(np.uint8)
 56 |     mask_tmp = np.zeros((h_pad, w_pad)).astype(np.uint8)
 57 |     image_tmp[:h_rszd, :w_rszd] = image
 58 |     mask_tmp[:h_rszd, :w_rszd] = mask
 59 |     image = image_tmp
 60 |     mask = mask_tmp
 61 | 
 62 |     # preprocess image and box
 63 |     indices = np.where(mask > 0)
 64 |     xid = indices[1]
 65 |     yid = indices[0]
 66 |     center = ((xid.max() + xid.min()) // 2, (yid.max() + yid.min()) // 2)
 67 |     length = (
 68 |         int((xid.max() - xid.min()) * 1.0 // 2),
 69 |         int((yid.max() - yid.min()) * 1.0 // 2),
 70 |     )
 71 |     bbox = [center[0] - length[0], center[1] - length[1], length[0] * 2, length[1] * 2]
 72 |     bbox = [
 73 |         max(0, bbox[0]),
 74 |         max(0, bbox[1]),
 75 |         min(w_pad, bbox[0] + bbox[2]),
 76 |         min(h_pad, bbox[1] + bbox[3]),
 77 |     ]
 78 |     bbox_raw = bbox.copy()  # bbox in the raw image coordinate
 79 |     bbox_raw[0] *= w / w_rszd
 80 |     bbox_raw[2] *= w / w_rszd
 81 |     bbox_raw[1] *= h / h_rszd
 82 |     bbox_raw[3] *= h / h_rszd
 83 |     return image, mask, bbox, bbox_raw
 84 | 
 85 | 
 86 | def run_cse(model, image, mask):
 87 |     image, mask, bbox, bbox_raw = preprocess_image(image, mask)
 88 | 
 89 |     image = torch.Tensor(image).cuda().permute(2, 0, 1)[None]
 90 |     image = torch.stack([(x - model.pixel_mean) / model.pixel_std for x in image])
 91 |     pred_boxes = torch.Tensor([bbox]).cuda()
 92 |     pred_boxes = create_boxes(pred_boxes)
 93 | 
 94 |     # inference
 95 |     model.eval()
 96 |     with torch.no_grad():
 97 |         features = model.backbone(image)
 98 |         features = [features[f] for f in model.roi_heads.in_features]
 99 |         features = [model.roi_heads.decoder(features)]
100 |         features_dp = model.roi_heads.densepose_pooler(features, [pred_boxes])
101 |         densepose_head_outputs = model.roi_heads.densepose_head(features_dp)
102 |         densepose_predictor_outputs = model.roi_heads.densepose_predictor(
103 |             densepose_head_outputs
104 |         )
105 |         coarse_segm_resized = densepose_predictor_outputs.coarse_segm[0]
106 |         embedding_resized = densepose_predictor_outputs.embedding[0]
107 | 
108 |     # use input mask
109 |     x, y, xx, yy = bbox
110 |     mask_box = mask[y:yy, x:xx]
111 |     mask_box = torch.Tensor(mask_box).cuda()[None, None]
112 |     mask_box = (
113 |         F.interpolate(mask_box, coarse_segm_resized.shape[1:3], mode="bilinear")[0, 0]
114 |         > 0
115 |     )
116 | 
117 |     # output embedding
118 |     embedding = embedding_resized  # size does not matter for a image code
119 |     embedding = embedding * mask_box.float()[None]
120 | 
121 |     # output dp2raw
122 |     bbox_raw = np.asarray(bbox_raw)
123 |     dp2raw = np.concatenate(
124 |         [(bbox_raw[2:] - bbox_raw[:2]) / embedding.shape[1], bbox_raw[:2]]
125 |     )
126 |     return embedding, dp2raw
127 | 


--------------------------------------------------------------------------------
/scripts/create_collage.py:
--------------------------------------------------------------------------------
 1 | # python scripts/create_collage.py --testdir logdir/penguin-fg-skel-b120/ --prefix renderings_0002
 2 | 
 3 | from moviepy.editor import clips_array, VideoFileClip, vfx
 4 | import sys, os
 5 | import numpy as np
 6 | import pdb
 7 | import glob
 8 | import argparse
 9 | import itertools
10 | 
11 | parser = argparse.ArgumentParser(description="combine results into a collage")
12 | parser.add_argument("--testdir", default="", help="path to test dir")
13 | parser.add_argument(
14 |     "--prefix", default="renderings_ref_", type=str, help="what data to combine"
15 | )
16 | args = parser.parse_args()
17 | 
18 | 
19 | def main():
20 |     save_path = "%s/collage.mp4" % args.testdir
21 | 
22 |     video_list = []
23 |     for sub_seq in sorted(glob.glob("%s/%s*" % (args.testdir, args.prefix))):
24 |         path_list = []
25 |         path_list.append("%s/ref/ref_rgb.mp4" % sub_seq)
26 |         path_list.append("%s/ref/rgb.mp4" % sub_seq)
27 |         path_list.append("%s/ref/xyz.mp4" % sub_seq)
28 |         path_list.append("%s/rot-0-360/rgb.mp4" % sub_seq)
29 |         path_list.append("%s/rot-0-360/xyz.mp4" % sub_seq)
30 | 
31 |         # make sure these exist
32 |         if np.sum([os.path.exists(path) for path in path_list]) == len(path_list):
33 |             print("found %s" % sub_seq)
34 |             video_list.append([VideoFileClip(path) for path in path_list])
35 | 
36 |     if len(video_list) == 0:
37 |         print("no video found")
38 |         return
39 | 
40 |     # align in time
41 |     max_duration = max(
42 |         [clip.duration for clip in list(itertools.chain.from_iterable(video_list))]
43 |     )
44 |     for i, clip_list in enumerate(video_list):
45 |         for j, clip in enumerate(clip_list):
46 |             video_list[i][j] = clip.resize(width=512).fx(
47 |                 vfx.freeze, t="end", total_duration=max_duration, padding_end=0.5
48 |             )
49 | 
50 |     final_clip = clips_array(video_list)
51 |     final_clip.write_videofile(save_path)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     main()
56 | 


--------------------------------------------------------------------------------
/scripts/download_unzip.sh:
--------------------------------------------------------------------------------
1 | # bash scripts/download_unzip.sh "$url"
2 | url=$1
3 | rootdir=$PWD
4 | 
5 | filename=tmp-`date +"%Y-%m-%d-%H-%M-%S"`.zip
6 | wget $url -O $filename
7 | unzip $filename
8 | rm $filename
9 | 


--------------------------------------------------------------------------------
/scripts/install-deps.sh:
--------------------------------------------------------------------------------
 1 | mim install mmcv
 2 | 
 3 | (cd lab4d/third_party/quaternion && CUDA_HOME=$CONDA_PREFIX pip install .)
 4 | 
 5 | mkdir ./preprocess/third_party/Track-Anything/checkpoints; wget "https://www.dropbox.com/scl/fi/o86gx6zn27b494m937n2i/E2FGVI-HQ-CVPR22.pth?rlkey=j15ue65ryy8jb1mvn2htf0jtk&st=t4zyl5jk&dl=0" -O ./preprocess/third_party/Track-Anything/checkpoints/E2FGVI-HQ-CVPR22.pth
 6 | 
 7 | wget https://www.dropbox.com/s/bgsodsnnbxdoza3/vcn_rob.pth -O ./preprocess/third_party/vcnplus/vcn_rob.pth
 8 | 
 9 | wget https://www.dropbox.com/s/51cjzo8zgz966t5/human.pth -O preprocess/third_party/viewpoint/human.pth
10 | 
11 | wget https://www.dropbox.com/s/1464pg6c9ce8rve/quad.pth -O preprocess/third_party/viewpoint/quad.pth
12 | 


--------------------------------------------------------------------------------
/scripts/render_intermediate.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
  2 | # python scripts/render_intermediate.py --testdir logdir/human-48-category-comp/
  3 | import sys, os
  4 | import pdb
  5 | 
  6 | os.environ["PYOPENGL_PLATFORM"] = "egl"  # opengl seems to only work with TPU
  7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
  8 | 
  9 | import glob
 10 | import numpy as np
 11 | import cv2
 12 | import argparse
 13 | import trimesh
 14 | import pyrender
 15 | from pyrender import IntrinsicsCamera, Mesh, Node, Scene, OffscreenRenderer
 16 | import matplotlib
 17 | import tqdm
 18 | 
 19 | from lab4d.utils.io import save_vid
 20 | 
 21 | cmap = matplotlib.colormaps.get_cmap("cool")
 22 | 
 23 | parser = argparse.ArgumentParser(description="script to render cameras over epochs")
 24 | parser.add_argument("--testdir", default="", help="path to test dir")
 25 | parser.add_argument(
 26 |     "--data_class", default="fg", type=str, help="which data to render, {fg, bg}"
 27 | )
 28 | args = parser.parse_args()
 29 | 
 30 | img_size = 1024
 31 | 
 32 | # renderer
 33 | r = OffscreenRenderer(img_size, img_size)
 34 | cam = IntrinsicsCamera(img_size, img_size, img_size / 2, img_size / 2)
 35 | # light
 36 | direc_l = pyrender.DirectionalLight(color=np.ones(3), intensity=3.0)
 37 | light_pose = np.asarray(
 38 |     [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=float
 39 | )
 40 | # cv to gl coords
 41 | cam_pose = -np.eye(4)
 42 | cam_pose[0, 0] = 1
 43 | cam_pose[-1, -1] = 1
 44 | rtmat = np.eye(4)
 45 | # object to camera transforms
 46 | rtmat[:3, :3] = cv2.Rodrigues(np.asarray([np.pi / 2, 0, 0]))[0]  # bev
 47 | 
 48 | 
 49 | def main():
 50 |     # io
 51 |     path_list = [
 52 |         i for i in glob.glob("%s/*-%s-proxy.obj" % (args.testdir, args.data_class))
 53 |     ]
 54 |     if len(path_list) == 0:
 55 |         print("no mesh found in %s for %s" % (args.testdir, args.data_class))
 56 |         return
 57 |     path_list = sorted(path_list, key=lambda x: int(x.split("/")[-1].split("-")[0]))
 58 |     outdir = "%s/renderings_proxy" % args.testdir
 59 |     os.makedirs(outdir, exist_ok=True)
 60 | 
 61 |     mesh_dict = {}
 62 |     aabb_min = np.asarray([np.inf, np.inf, np.inf])
 63 |     aabb_max = np.asarray([-np.inf, -np.inf, -np.inf])
 64 |     for mesh_path in path_list:
 65 |         batch_idx = int(mesh_path.split("/")[-1].split("-")[0])
 66 |         mesh_obj = trimesh.load(mesh_path)
 67 |         mesh_dict[batch_idx] = mesh_obj
 68 | 
 69 |         # update aabb
 70 |         aabb_min = np.minimum(aabb_min, mesh_obj.bounds[0])
 71 |         aabb_max = np.maximum(aabb_max, mesh_obj.bounds[1])
 72 | 
 73 |     # set camera translation
 74 |     rtmat[2, 3] = max(aabb_max - aabb_min) * 1.2
 75 | 
 76 |     # render
 77 |     frames = []
 78 |     for batch_idx, mesh_obj in tqdm.tqdm(mesh_dict.items()):
 79 |         scene = Scene(ambient_light=0.4 * np.asarray([1.0, 1.0, 1.0, 1.0]))
 80 | 
 81 |         # add object / camera
 82 |         mesh_obj.apply_transform(rtmat)
 83 |         scene.add_node(Node(mesh=Mesh.from_trimesh(mesh_obj)))
 84 | 
 85 |         # camera
 86 |         scene.add(cam, pose=cam_pose)
 87 | 
 88 |         # light
 89 |         scene.add(direc_l, pose=light_pose)
 90 | 
 91 |         # render
 92 |         color, depth = r.render(
 93 |             scene,
 94 |             flags=pyrender.RenderFlags.SHADOWS_DIRECTIONAL
 95 |             | pyrender.RenderFlags.SKIP_CULL_FACES,
 96 |         )
 97 |         # add text
 98 |         color = color.astype(np.uint8)
 99 |         color = cv2.putText(
100 |             color,
101 |             "batch: %02d" % batch_idx,
102 |             (30, 50),
103 |             cv2.FONT_HERSHEY_SIMPLEX,
104 |             2,
105 |             (256, 0, 0),
106 |             2,
107 |         )
108 |         frames.append(color)
109 | 
110 |     save_vid("%s/fg" % outdir, frames, suffix=".mp4", upsample_frame=-1)
111 |     print("saved to %s/fg.mp4" % outdir)
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     main()
116 | 


--------------------------------------------------------------------------------
/scripts/run_crop_all.py:
--------------------------------------------------------------------------------
 1 | # WIP by Gengshan Yang
 2 | # TODO: use config file to go over seqs
 3 | # python scripts/run_crop_all.py cat-pikachu
 4 | import os
 5 | import sys
 6 | import glob
 7 | import multiprocessing
 8 | from functools import partial
 9 | 
10 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
11 | from preprocess.scripts.crop import extract_crop
12 | 
13 | os.environ["OMP_NUM_THREADS"] = "1"
14 | 
15 | vidname = sys.argv[1]
16 | path = (
17 |     "database/processed/JPEGImages/Full-Resolution/%s*" % vidname
18 | )  # path to the images
19 | 
20 | 
21 | def process_seqname(seqname, size, region):
22 |     extract_crop(seqname, size, region)
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     pool = multiprocessing.Pool(processes=32)  # use up to 32 processes
27 | 
28 |     for seqname in sorted(glob.glob(path)):
29 |         seqname = seqname.split("/")[-1]
30 |         # we'll use a partial function to bind the common arguments
31 |         func = partial(process_seqname, seqname, 256)
32 |         pool.apply_async(func, args=(0,))
33 |         pool.apply_async(func, args=(1,))
34 | 
35 |     pool.close()
36 |     pool.join()  # wait for all processes to finish
37 | 


--------------------------------------------------------------------------------
/scripts/run_rendering_parallel.py:
--------------------------------------------------------------------------------
 1 | # WIP by Gengshan Yang
 2 | # generate three visualizations (reference view, bev, turntable) rendering, mesh export in parallel
 3 | # python scripts/run_rendering_parallel.py logdir/dog-98-category-comp/opts.log 0-2 0,1,2
 4 | import sys
 5 | import subprocess
 6 | 
 7 | # Set the flagfile.
 8 | flagfile = sys.argv[1]
 9 | 
10 | # Set the range of inst_ids.
11 | start_inst_id, end_inst_id = map(int, sys.argv[2].split("-"))
12 | id_list = list(range(start_inst_id, end_inst_id + 1))
13 | 
14 | # Set the devices id
15 | dev_list = sys.argv[3].split(",")
16 | dev_list = list(map(int, dev_list))
17 | num_devices = len(dev_list)
18 | id_per_device = len(id_list) // num_devices
19 | 
20 | print(
21 |     "rendering videos",
22 |     id_list,
23 |     "on devices",
24 |     dev_list,
25 | )
26 | 
27 | # render proxy over rounds
28 | logdir = flagfile.rsplit("/", 1)[0]
29 | subprocess.Popen(
30 |     f"python scripts/render_intermediate.py --testdir {logdir}/", shell=True
31 | )
32 | 
33 | # Loop over each device.
34 | for dev_id, device in enumerate(dev_list):
35 |     # Initialize an empty command list for this device.
36 |     command_for_device = []
37 | 
38 |     # Loop over the inst_ids assigned to this device.
39 |     if dev_id == num_devices - 1:
40 |         assigned_ids = id_list[dev_id * id_per_device :]
41 |     else:
42 |         assigned_ids = id_list[dev_id * id_per_device : (dev_id + 1) * id_per_device]
43 |     for inst_id in assigned_ids:
44 |         # Add the command for this inst_id to the device's command list.
45 |         command_for_device.append(
46 |             f"CUDA_VISIBLE_DEVICES={device} python lab4d/render.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id} --render_res 256 --data_prefix full"
47 |         )
48 |         command_for_device.append(
49 |             f"CUDA_VISIBLE_DEVICES={device} python lab4d/render.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id} --render_res 256 --viewpoint rot-0-360"
50 |         )
51 |         # command_for_device.append(
52 |         #     f"CUDA_VISIBLE_DEVICES={device} python lab4d/render.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id} --render_res 256 --viewpoint bev-90"
53 |         # )
54 |         # command_for_device.append(
55 |         #     f"CUDA_VISIBLE_DEVICES={device} python lab4d/export.py --flagfile={flagfile} --load_suffix latest --inst_id {inst_id}"
56 |         # )
57 | 
58 |         # Add a delay between commands to avoid overloading the device.
59 |         command_for_device.append("sleep 1")
60 | 
61 |     # Join all commands for this device into a single string.
62 |     command_str = "; ".join(command_for_device)
63 | 
64 |     # Start a screen session for this device, executing the device's command string.
65 |     subprocess.Popen(
66 |         f'screen -S render-{device}-{",".join(str(i) for i in assigned_ids)} -d -m bash -c "{command_str}"',
67 |         shell=True,
68 |     )
69 | 


--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
 1 | # bash scripts/train.sh lab4d/train.py 0 --seqname 2023-03-26-00-39-17-cat-pikachu
 2 | main_func=$1
 3 | dev=$2
 4 | add_args=${*: 3:$#-1}
 5 | 
 6 | ngpu=`echo $dev |  awk -F '[\t,]' '{print NF-1}'`
 7 | ngpu=$(($ngpu + 1 ))
 8 | echo "using "$ngpu "gpus"
 9 | 
10 | # assign random port
11 | # https://github.com/pytorch/pytorch/issues/73320
12 | CUDA_VISIBLE_DEVICES=$dev torchrun \
13 |         --nproc_per_node $ngpu --nnodes 1 --rdzv_backend c10d --rdzv_endpoint localhost:0 \
14 |         $main_func \
15 |         --ngpu $ngpu \
16 |         $add_args
17 | 


--------------------------------------------------------------------------------
/scripts/zip_dataset.py:
--------------------------------------------------------------------------------
 1 | # Description: Zip the dataset for easy sharing
 2 | # Usage: python scripts/zip_dataset.py <vidname>
 3 | import configparser
 4 | import os
 5 | import sys
 6 | 
 7 | cwd = os.getcwd()
 8 | if cwd not in sys.path:
 9 |     sys.path.insert(0, cwd)
10 | 
11 | from preprocess.libs.io import run_bash_command
12 | 
13 | vidname = sys.argv[1]
14 | 
15 | args = []
16 | config = configparser.RawConfigParser()
17 | config.read("database/configs/%s.config" % vidname)
18 | for vidid in range(len(config.sections()) - 1):
19 |     seqname = config.get("data_%d" % vidid, "img_path").strip("/").split("/")[-1]
20 |     run_bash_command(
21 |         f"zip {vidname}.zip -r database/processed/*/Full-Resolution/{seqname}"
22 |     )
23 | 
24 | run_bash_command(f"zip {vidname}.zip database/configs/{vidname}.config")
25 | 


--------------------------------------------------------------------------------
/scripts/zip_logdir.py:
--------------------------------------------------------------------------------
 1 | # Description: Zip the logdir for easy sharing
 2 | # Usage: python scripts/zip_logdir <dir>
 3 | import os
 4 | import pdb
 5 | import sys
 6 | 
 7 | cwd = os.getcwd()
 8 | if cwd not in sys.path:
 9 |     sys.path.insert(0, cwd)
10 | 
11 | from preprocess.libs.io import run_bash_command
12 | 
13 | logpath = sys.argv[1]
14 | 
15 | logname = logpath.strip("/").split("/")[-1]
16 | print(logname)
17 | 
18 | run_bash_command(f"zip log-{logname}.zip {logpath}/*")
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | 
3 | setup(
4 |     name="lab4d",
5 |     packages=find_packages(),
6 | )
7 | 


--------------------------------------------------------------------------------