├── .gitmodules
├── LICENSE.md
├── README.md
├── data
    └── .gitkeep
├── data_preprocess
    ├── rgbd_dog
    │   ├── detect_dog_mask2former.py
    │   └── preprocess.py
    ├── robot
    │   └── preprocess.py
    └── zju
    │   ├── detect_person.py
    │   ├── diff.patch
    │   ├── preprocess.py
    │   └── read_smpl.py
├── environment.yml
├── figures
    ├── reconstruction_rotate_motion_0_0.gif
    ├── repose_rotate_0_0.gif
    └── robot_example.jpg
├── install.sh
└── src
    ├── confs
        ├── atlas.yml
        ├── atlas_merge.yml
        ├── baxter.yml
        ├── baxter_merge.yml
        ├── cassie.yml
        ├── cassie_merge.yml
        ├── default.yml
        ├── dog.yml
        ├── dog_merge.yml
        ├── iiwa.yml
        ├── iiwa_merge.yml
        ├── nao.yml
        ├── nao_merge.yml
        ├── pandas.yml
        ├── pandas_merge.yml
        ├── spot.yml
        ├── spot_merge.yml
        ├── zju366.yml
        ├── zju366_merge.yml
        ├── zju377.yml
        ├── zju377_merge.yml
        ├── zju381.yml
        ├── zju381_merge.yml
        ├── zju384.yml
        ├── zju384_merge.yml
        ├── zju387.yml
        └── zju387_merge.yml
    ├── datasets
        └── dataset.py
    ├── demo_notebook.ipynb
    ├── models
        ├── decoder.py
        ├── loss.py
        └── model.py
    ├── train_single_video.py
    ├── utils
        ├── config.py
        ├── get_args.py
        ├── graph_utils.py
        ├── model_utils.py
        ├── render_utils.py
        ├── sdf_utils.py
        ├── train_utils.py
        ├── trainer.py
        └── visualization_utils.py
    ├── validation
        ├── SMPL_regression.py
        ├── lpips_ssim.py
        └── reconstruction.py
    └── visualize
        ├── create_reconstruction_video.py
        ├── create_repose_video.py
        ├── part_merging.py
        ├── repose_configs
            ├── cassie.yml
            ├── iiwa.yml
            └── spot.yml
        └── repose_person_by_driving_pose.py


/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "data_preprocess/rgbd_dog/RGBD-Dog"]
 2 | 	path = data_preprocess/rgbd_dog/RGBD-Dog
 3 | 	url = git@github.com:CAMERA-Bath/RGBD-Dog.git
 4 | [submodule "data_preprocess/rgbd_dog/RGBD_Dog"]
 5 | 	path = data_preprocess/rgbd_dog/RGBD_Dog
 6 | 	url = git@github.com:CAMERA-Bath/RGBD-Dog.git
 7 | [submodule "data_preprocess/zju/AdeliDet"]
 8 | 	path = data_preprocess/zju/AdeliDet
 9 | 	url = git@github.com:aim-uofa/AdelaiDet.git
10 | [submodule "data_preprocess/rgbd_dog/Mask2Former"]
11 | 	path = data_preprocess/rgbd_dog/Mask2Former
12 | 	url = git@github.com:facebookresearch/Mask2Former.git
13 | [submodule "data_preprocess/zju/EasyMocap"]
14 | 	path = data_preprocess/zju/EasyMocap
15 | 	url = git@github.com:zju3dv/EasyMocap.git
16 | [submodule "AdelaiDet"]
17 | 	path = AdelaiDet
18 | 	url = https://github.com/aim-uofa/AdelaiDet.git
19 | [submodule "Mask2Former"]
20 | 	path = Mask2Former
21 | 	url = git@github.com:facebookresearch/Mask2Former.git
22 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ## NVIDIA License
 2 | 
 3 | ### 1. Definitions
 4 | 
 5 | “Licensor” means any person or entity that distributes its Work.
 6 | “Work” means (a) the original work of authorship made available under this license, which may include software, documentation, or other files, and (b) any additions to or derivative works  thereof  that are made available under this license.
 7 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this license, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
 8 | Works are “made available” under this license by including in or with the Work either (a) a copyright notice referencing the applicability of this license to the Work, or (b) a copy of this license.
 9 | 
10 | ### 2. License Grant
11 | 
12 | 2.1 Copyright Grant. Subject to the terms and conditions of this license, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to use, reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
13 | 
14 | ### 3. Limitations
15 | 
16 | 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this license, (b) you include a complete copy of this license with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work.
17 | 
18 | 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this license (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself.
19 | 
20 | 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially. Notwithstanding the foregoing, NVIDIA Corporation and its affiliates may use the Work and any derivative works commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
21 | 
22 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this license from such Licensor (including the grant in Section 2.1) will terminate immediately.
23 | 
24 | 3.5 Trademarks. This license does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this license.
25 | 
26 | 3.6 Termination. If you violate any term of this license, then your rights under this license (including the grant in Section 2.1) will terminate immediately.
27 | 
28 | ### 4. Disclaimer of Warranty.
29 | 
30 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF 
31 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. 
32 | 
33 | ### 5. Limitation of Liability.
34 | 
35 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
36 | 
37 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Watch It Move
  2 | 
  3 | Official implementation of the IEEE/CVF CVPR 2022 paper
  4 | 
  5 | **Watch It Move: Unsupervised Discovery of 3D Joints for Re-Posing of Articulated Objects**\
  6 | Atsuhiro Noguchi, Umar Iqbal, Jonathan Tremblay, Tatsuya Harada, Orazio Gallo\
  7 | [Project page](https://nvlabs.github.io/watch-it-move/) / [Paper](https://arxiv.org/abs/2112.11347)
  8 | / [Video](https://www.youtube.com/watch?v=oRnnuCVV89o)
  9 | 
 10 | Abstract: Rendering articulated objects while controlling their poses is critical to applications such as virtual
 11 | reality or animation for movies. Manipulating the pose of an object, however, requires the understanding of its
 12 | underlying structure, that is, its joints and how they interact with each other. Unfortunately, assuming the structure
 13 | to be known, as existing methods do, precludes the ability to work on new object categories. We propose to learn both
 14 | the appearance and the structure of previously unseen articulated objects by observing them move from multiple views,
 15 | with no joints annotation supervision, or information about the structure. We observe that 3D points that are static
 16 | relative to one another should belong to the same part, and that adjacent parts that move relative to each other must be
 17 | connected by a joint. To leverage this insight, we model the object parts in 3D as ellipsoids, which allows us to
 18 | identify joints. We combine this explicit representation with an implicit one that compensates for the approximation
 19 | introduced. We show that our method works for different structures, from quadrupeds, to single-arm robots, to humans.
 20 | 
 21 | ## Table of content
 22 |   * [Setup](#setup)
 23 |   * [Steps to replicate the teaser video for spot](#steps-to-replicate-the-teaser-video-for-spot)
 24 |   * [Steps to train for spot](#steps-to-train-for-spot)
 25 |   * [The WIM dataset](#the-wim-dataset)
 26 |   * [Dataset Preprocessing](#dataset-preprocessing)
 27 |   * [Training](#training)
 28 |   * [Pretrained Models](#pretrained-models)
 29 |   * [Demo](#demo)
 30 |   * [Evaluation (ZJU only)](#evaluation--zju-only-)
 31 |   * [Visualization](#visualization)
 32 |   * [Citation](#citation)
 33 | 
 34 | ## Setup
 35 | Clone this repository and create the environment.
 36 | ```angular2html
 37 | git clone --recursive git@github.com:NVlabs/watch-it-move.git
 38 | cd watch-it-move
 39 | bash install.sh
 40 | 
 41 | # To run the training and rendering examples below, download the data for Spot
 42 | mkdir -p data/robots/spot
 43 | gdown https://drive.google.com/u/1/uc\?id\=1HNzCa8olJgedpKe6jBCIi-_LffLX9f8R\&export\=download -O data/robots/spot/cache.pickle
 44 | ```
 45 | ### Disclaimer
 46 | We have only tested the following code on NVIDIA A100 GPUs.
 47 | 
 48 | ## Steps to replicate the teaser video for spot
 49 | 
 50 | ```angular2html
 51 | # download pretrained model for spot
 52 | mkdir -p data/output/result/spot_merge
 53 | gdown https://drive.google.com/u/1/uc\?id\=12_K-x-daAGqvIoDd3tRvRIe0LKLOALyC\&export\=download -O data/output/result/spot_merge/snapshot_latest.pth
 54 | 
 55 | cd <project_root>/src
 56 | # save the reconstruction video
 57 | python visualize/create_reconstruction_video.py --exp_name spot_merge
 58 | # save re-pose video
 59 | python visualize/create_repose_video.py --exp_name spot_merge --repose_config visualize/repose_configs/spot.yml --rotate
 60 | ```
 61 | Videos in mp4 format and the png image for each frame will be saved to `<project_root>/data/output/result/spot_merge/`
 62 | 
 63 | <center>
 64 | <img src=figures/reconstruction_rotate_motion_0_0.gif width=50%><img src=figures/repose_rotate_0_0.gif width=50%>
 65 | </center>
 66 | 
 67 | ## Steps to train for spot
 68 | ```angular2html
 69 | cd <project_root>/src
 70 | CUDA_VISIBLE_DEVICES=[gpu_id] python train_single_video.py --config confs/spot.yml --default_config confs/default.yml
 71 | CUDA_VISIBLE_DEVICES=[gpu_id] python train_single_video.py --config confs/spot_merge.yml --default_config confs/default.yml
 72 | ```
 73 | ## The WIM dataset
 74 | <img src=figures/robot_example.jpg width=450px>
 75 | 
 76 | We provide multiview videos for seven different moving robots [here](https://drive.google.com/drive/folders/1i5rWanA8FgVLrWPO4bl0aaGKBYwhY6IQ) (see [LICENSE.md](LICENSE.md) for terms of use).
 77 | We provide both raw video data and preprocessed data. Please follow the instructions bellow to download and preprocess the data.
 78 | It includes: 1000 frame videos of moving robots from 20 different viewpoints and preprocessed data of 300 frames of 5 chosen viewpoints.
 79 | 
 80 | 
 81 | ## Dataset Preprocessing
 82 | ### WIM Dataset
 83 | - The WIM dataset is available [here](https://drive.google.com/drive/folders/1i5rWanA8FgVLrWPO4bl0aaGKBYwhY6IQ).
 84 | - We provide preprocessed data in the directory named [preprocessed](https://drive.google.com/drive/folders/1toiwb06VggqH1FOS9OnKYlqRFk3H6T9g). Download, uncompress, and place it in
 85 |   ```angular2html
 86 |   <project_root>data/robots/<name_of_robot>/cache.pickle
 87 |   ```
 88 | - If you run with pre-processing on your own, download tar.gz files from [here](https://drive.google.com/drive/folders/1i5rWanA8FgVLrWPO4bl0aaGKBYwhY6IQ), uncompress them, place them as
 89 |   ```
 90 |   <project_root>data/robots/<name_of_robot>/cam_<camera_id>.json
 91 |   <project_root>data/robots/<name_of_robot>/frame_<frame_id>_cam_<camera_id>.png
 92 |   ```
 93 |   and run
 94 |   ```angular2html
 95 |   cd <project_root>/data_preprocess/robot
 96 |   python preprocess.py --data_root ../../data/robots --robot_name atlas --robot_name baxter --robot_name spot --robot_name cassie --robot_name iiwa --robot_name nao --robot_name pandas
 97 |   ```
 98 | 
 99 | ### ZJU MOCAP
100 | 
101 | - Requirements (installed by `install.sh`): [Adet](https://github.com/aim-uofa/AdelaiDet), [EasyMocap](https://github.com/zju3dv/EasyMocap)
102 | 
103 | - Download the COCO instance segmentation model
104 |   named [R_101_dcni3_5x](https://github.com/aim-uofa/AdelaiDet#coco-instance-segmentation-baselines-with-blendmask) from
105 |   Adet and copy it to `data_preprocess/zju/R_101_dcni3_5x.pth`.
106 | - Download the [ZJU MOCAP LightStage dataset](https://github.com/zju3dv/EasyMocap#zju-mocap) and copy it in
107 |   ```
108 |   <project_root>/data/
109 |                  └── zju_mocap
110 |                      ├── 366
111 |                      ├── 377
112 |                      ├── 381
113 |                      ├── 384
114 |                      └── 387
115 |   ```
116 | - Download the SMPL models
117 |   following [EasyMocap installation](https://github.com/zju3dv/EasyMocap/blob/master/doc/installation.md). You only need to download smplx models.
118 |   ```
119 |   <project_root>/data
120 |                  └── smplx
121 |                      ├── J_regressor_body25.npy
122 |                      ├── J_regressor_body25_smplh.txt
123 |                      ├── J_regressor_body25_smplx.txt
124 |                      ├── J_regressor_mano_LEFT.txt
125 |                      ├── J_regressor_mano_RIGHT.txt
126 |                      └── smplx
127 |                          ├── SMPLX_FEMALE.pkl
128 |                          ├── SMPLX_MALE.pkl
129 |                          └── SMPLX_NEUTRAL.pkl
130 |   ```
131 | - Run
132 |   ```angular2html
133 |   cd <project_root>/data_preprocess/zju
134 |   python preprocess.py --smpl_model_path ../../data/smplx --zju_path ../../data/zju_mocap --person_id 366 --person_id 377 --person_id 381 --person_id 384 --person_id 387
135 |   ```
136 | 
137 | ### Dog dataset
138 | - Requirement (installed by `install.sh`): [mask2former](https://github.com/facebookresearch/Mask2Former)
139 | - Download `Mask2Former (200 queries)` model from https://github.com/facebookresearch/Mask2Former/blob/main/MODEL_ZOO.md#instance-segmentation and copy it to `<project_root>/data_preprocess/rgbd_dog/model_final_e5f453.pth`.
140 | - Download [RGBD-Dog dataset](https://github.com/CAMERA-Bath/RGBD-Dog) as
141 |   ```
142 |   <project_root>/data/rgbd_dog
143 |                  └── dog1
144 |                      └── motion_testSeq
145 |                          ├── kinect_depth
146 |                          ├── kinect_rgb
147 |                          ├── motion_capture
148 |                          └── sony
149 |   ```
150 |   We used `motion_testSeq` for training.
151 | - Run
152 |   ```angular2html
153 |   cd <project_root>/data_preprocess/rgbd_dog
154 |   python preprocess.py --data_root ../../data/rgbd_dog/dog1/motion_testSeq
155 |   ```
156 | 
157 | ## Training
158 | Run the following commands to train the model. Please specify the experiment name in `[exp_name]`
159 | ```
160 | cd <project_root>/src
161 | CUDA_VISIBLE_DEVICES=[gpu_id] python train_single_video.py --config confs/[exp_name].yml --default_config confs/default.yml
162 | ```
163 | 
164 | ## Pretrained Models
165 | Pretrained models for ZJU mocap dataset, robot dataset, and dog dataset are available [here](https://drive.google.com/drive/folders/1gmkkHXRr5-1w5W-kCSHcsInMY8ODEqyK).
166 | The name of each directory corresponds to the name of a config fine under `src/confs`.
167 | Please download and place these directories in `data/output/result`.
168 | ```angular2html
169 | <project_root>/data/output/result
170 |                ├── atlas
171 |                │   └── snapshot_latest.pth
172 |                ├── baxter
173 |                ...
174 | ```
175 | 
176 | ## Demo
177 | Visualization code is available in `<project_root>/src/visualize/demo_notebook.ipynb`
178 | 
179 | ## Evaluation (ZJU only)
180 | 
181 | ### LPIPS and SSIM
182 | Calculate lpips and ssim between generated and ground truth images.
183 | ```angular2html
184 | cd <project_root>/src
185 | python validation/reconsuruction.py --exp_name zju366 --exp_name zju377
186 | python validation/lpips_ssim.py --exp_name zju366 --exp_name zju377
187 | ```
188 | Results will be saved to `[output_dir]/result/[exp_name]/validation`
189 | 
190 | ### Pose Regression
191 | Calculate MPJPE (mm) between ground truth and regressed joint locations.
192 | ```angular2html
193 | cd <project_root>/src
194 | python validation/SMPL_regression.py --exp_name zju366 --exp_name zju377
195 | ```
196 | 
197 | ## Visualization
198 | ### Reconstruction video
199 | Results will be saved to `<project_root>/data/output/result/[exp_name]/reconstruction_...`.
200 | ```angular2html
201 | cd <project_root>/src
202 | python visualize/create_reconstruction_video.py --exp_name zju366 --exp_name zju377
203 | ```
204 | 
205 | ### Manual re-posing
206 | Results will be saved to `<project_root>/data/output/result/[exp_name]/repose_...`.
207 | ```angular2html
208 | cd <project_root>/src
209 | python visualize/create_repose_video.py --exp_name spot --repose_config visualize/repose_configs/spot.yml --rotate
210 | ```
211 | repose_config (e.g., `<project_root>/src/visualize/repose_configs/spot.yml`) includes the following parameters:
212 | ```angular2html
213 | camera_id: camera id of the reference frame
214 | frame_id: frame id of the reference frame
215 | root: part id of the root. 
216 | first: part id and its rotation in rodrigues form for the first quarter of the video.
217 | second: part id and its rotation in rodrigues form for the next quarter of the video.
218 | ```
219 | `root`, `first`, and `second` vary depending on the training results, even when trained on the same data.
220 | For your pretrained models, please follow `<project_root>/src/visualize/demo_notebook.ipynb` to adapt them.
221 | 
222 | ### Merge Parts
223 | Images of merged structure will be saved to `<project_root>/data/output/result/[exp_name]/merge`.
224 | ```angular2html
225 | cd <project_root>/src
226 | python visualize/part_merging.py --exp_name spot --camera_id 0
227 | ```
228 | 
229 | ### Re-posing by driving frames (ZJU only)
230 | Re-posing by test frames.
231 | ```angular2html
232 | cd <project_root>/src
233 | python visualize/repose_person_by_driving_pose.py --exp_name zju366 --camera_id 0 --num_video_frames 50
234 | ```
235 | Results will be saved to `<project_root>/data/output/result/[exp_name]/drive_..`.
236 | 
237 | # Citation
238 | ```bibtex
239 | @inproceedings{noguchi2022watch,
240 |     title = {Watch It Move: {U}nsupervised Discovery of {3D} Joints for Re-Posing of Articulated Objects},
241 |     author = {Atsuhiro Noguchi and Umar Iqbal and Jonathan Tremblay and Tatsuya Harada and Orazio Gallo},
242 |     journal = {CVPR},
243 |     year = {2022},
244 | }
245 | ```
246 | 


--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/watch-it-move/5fdec2b71b07f9d4a5492fb3dad6bfcc7d9a9f8b/data/.gitkeep


--------------------------------------------------------------------------------
/data_preprocess/rgbd_dog/detect_dog_mask2former.py:
--------------------------------------------------------------------------------
 1 | """
 2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 | property and proprietary rights in and to this material, related
 6 | documentation and any modifications thereto. Any use, reproduction,
 7 | disclosure or distribution of this material and related documentation
 8 | without an express license agreement from NVIDIA CORPORATION or
 9 | its affiliates is strictly prohibited.
10 | """
11 | 
12 | import sys
13 | from typing import Any, List
14 | 
15 | import numpy as np
16 | import torch
17 | from detectron2.config import get_cfg
18 | from detectron2.engine.defaults import DefaultPredictor
19 | from detectron2.projects.deeplab import add_deeplab_config
20 | from tqdm import tqdm
21 | 
22 | sys.path.append("Mask2Former")
23 | from mask2former import add_maskformer2_config
24 | 
25 | 
26 | def setup_cfg():
27 |     # load config from file and command-line arguments
28 |     cfg = get_cfg()
29 |     add_deeplab_config(cfg)
30 |     add_maskformer2_config(cfg)
31 |     cfg.merge_from_file(
32 |         "Mask2Former/configs/coco/instance-segmentation/swin/maskformer2_swin_large_IN21k_384_bs16_100ep.yaml")
33 |     cfg.merge_from_list(["MODEL.WEIGHTS", "model_final_e5f453.pkl"])
34 |     cfg.freeze()
35 | 
36 |     return cfg
37 | 
38 | 
39 | class DogDetector(object):
40 |     def __init__(self):
41 |         cfg = setup_cfg()
42 |         self.cpu_device = torch.device("cpu")
43 |         self.vis_text = cfg.MODEL.ROI_HEADS.NAME == "TextHead"
44 | 
45 |         self.predictor = DefaultPredictor(cfg)
46 | 
47 |     def run_on_video(self, video: List[np.ndarray]) -> List[Any]:
48 |         """
49 |         Visualizes predictions on frames of the input video.
50 |         Args:
51 |             video (np.array):
52 |         Returns:
53 |             ndarray: RGB
54 |         """
55 | 
56 |         def process_predictions(predictions: Any):
57 |             predictions = predictions["instances"].to(self.cpu_device)
58 |             return predictions
59 | 
60 |         detected_video = []
61 |         for frame in tqdm(video):
62 |             detected_video.append(process_predictions(self.predictor(frame)))
63 | 
64 |         return detected_video
65 | 


--------------------------------------------------------------------------------
/data_preprocess/robot/preprocess.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | import argparse
 12 | import json
 13 | import pickle
 14 | from typing import List
 15 | 
 16 | import blosc
 17 | import cv2
 18 | import numpy as np
 19 | from tqdm import tqdm
 20 | 
 21 | 
 22 | def read_frames(chosen_camera_id: List[int], video_len: int, data_dir: str):
 23 |     """
 24 | 
 25 |     Args:
 26 |         chosen_camera_id:
 27 |         video_len:
 28 |         data_dir:
 29 | 
 30 |     Returns:
 31 | 
 32 |     """
 33 |     all_video = []
 34 |     all_mask = []
 35 |     all_camera_intrinsic = []
 36 |     all_camera_rotation = []
 37 |     all_camera_translation = []
 38 |     for c_id in tqdm(chosen_camera_id):
 39 |         for f_id in range(video_len):
 40 |             img_path = f"{data_dir}/frame_{f_id:0>5}_cam_{c_id:0>3}.png"
 41 |             config_path = f"{data_dir}/cam_{c_id:0>3}.json"
 42 |             img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
 43 |             img_scale = 1
 44 |             if img.shape[0] != 512:
 45 |                 img_scale = 512 / img.shape[0]
 46 |                 img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_AREA)
 47 |             with open(config_path, "r") as f:
 48 |                 config = json.load(f)
 49 |             frame = img[:, :, [2, 1, 0]]
 50 |             mask = img[:, :, 3] > 127.5
 51 |             all_video.append(frame)
 52 |             all_mask.append(mask)
 53 | 
 54 |             intrinsic = config["camera_data"]["intrinsics"]
 55 |             camera_intrinsic = np.zeros((3, 3), dtype="float32")
 56 |             camera_intrinsic[0, 0] = intrinsic['fx'] * img_scale
 57 |             camera_intrinsic[1, 1] = intrinsic['fy'] * img_scale
 58 |             camera_intrinsic[0, 2] = intrinsic['cx'] * img_scale
 59 |             camera_intrinsic[1, 2] = intrinsic['cy'] * img_scale
 60 |             camera_intrinsic[2, 2] = 1
 61 |             all_camera_intrinsic.append(camera_intrinsic)
 62 | 
 63 |             extrinsic = np.array(config["camera_data"]["camera_view_matrix"])
 64 |             extrinsic[:, 1] = -extrinsic[:, 1]
 65 |             extrinsic[:, 2] = -extrinsic[:, 2]
 66 |             all_camera_rotation.append(extrinsic[:3, :3].transpose())  # (3, 3)
 67 |             all_camera_translation.append(extrinsic[3, :3, None])  # (3, 1)
 68 | 
 69 |     all_video = np.array(all_video)
 70 |     all_mask = np.array(all_mask)
 71 |     all_camera_intrinsic = np.array(all_camera_intrinsic)
 72 |     all_camera_rotation = np.array(all_camera_rotation)
 73 |     all_camera_translation = np.array(all_camera_translation)
 74 | 
 75 |     frame_id = [np.arange(video_len) for cam in chosen_camera_id]
 76 |     frame_id = np.concatenate(frame_id, axis=0)
 77 | 
 78 |     camera_id = [np.ones(video_len, dtype="int") * (cam - 1) for cam in chosen_camera_id]
 79 |     camera_id = np.concatenate(camera_id, axis=0)
 80 | 
 81 |     return all_video, all_mask, frame_id, camera_id, all_camera_intrinsic, all_camera_rotation, all_camera_translation
 82 | 
 83 | 
 84 | def preprocess_robot(robot_name: str):
 85 |     if robot_name == "atlas":
 86 |         data_dir = f"{DATA_ROOT}/atlas"
 87 |         chosen_camera_id = [0, 2, 5, 13, 18]
 88 |         video_len = 300
 89 |     elif robot_name == "baxter":
 90 |         data_dir = f"{DATA_ROOT}/baxter"
 91 |         chosen_camera_id = [0, 2, 5, 13, 18]
 92 |         video_len = 300
 93 |     elif robot_name == "spot":
 94 |         data_dir = f"{DATA_ROOT}/spot"
 95 |         chosen_camera_id = [0, 2, 5, 10, 13]
 96 |         video_len = 300
 97 |     elif robot_name == "cassie":
 98 |         data_dir = f"{DATA_ROOT}/cassie"
 99 |         chosen_camera_id = [0, 3, 5, 10, 13]
100 |         video_len = 300
101 |     elif robot_name == "iiwa":
102 |         data_dir = f"{DATA_ROOT}/iiwa"
103 |         chosen_camera_id = [0, 2, 4, 5, 13]
104 |         video_len = 300
105 |     elif robot_name == "nao":
106 |         data_dir = f"{DATA_ROOT}/nao"
107 |         chosen_camera_id = [7, 10, 11, 14, 15]
108 |         video_len = 300
109 |     elif robot_name == "pandas":
110 |         data_dir = f"{DATA_ROOT}/pandas"
111 |         chosen_camera_id = [0, 2, 5, 10, 13]
112 |         video_len = 300
113 |     else:
114 |         raise ValueError("invalid robot name")
115 | 
116 |     data_dict = {}
117 | 
118 |     # read frame
119 |     all_video, all_mask, frame_id, camera_id, all_intrinsic, all_rot, all_trans = read_frames(chosen_camera_id,
120 |                                                                                               video_len, data_dir)
121 | 
122 |     data_dict["frame_id"] = frame_id
123 |     data_dict["img"] = np.array([blosc.pack_array(frame.transpose(2, 0, 1)) for frame in tqdm(all_video)],
124 |                                 dtype="object")
125 |     data_dict["mask"] = np.array([blosc.pack_array(mask) for mask in tqdm(all_mask)], dtype="object")
126 |     data_dict["camera_intrinsic"] = all_intrinsic
127 |     data_dict["camera_rotation"] = all_rot
128 |     data_dict["camera_translation"] = all_trans
129 | 
130 |     data_dict["camera_id"] = np.arange(len(all_video)) // (len(all_video) // len(chosen_camera_id))
131 | 
132 |     with open(data_dir + '/cache.pickle', 'wb') as f:
133 |         pickle.dump(data_dict, f)
134 | 
135 | 
136 | if __name__ == "__main__":
137 |     parser = argparse.ArgumentParser(description='Robot data preprocessing')
138 |     parser.add_argument('--data_root', type=str, required=True)
139 |     parser.add_argument('--robot_name', action='append',
140 |                         required=True)
141 |     args = parser.parse_args()
142 | 
143 |     DATA_ROOT = args.data_root
144 |     robot_names = args.robot_name
145 | 
146 |     for robot_name in robot_names:
147 |         preprocess_robot(robot_name)
148 | 


--------------------------------------------------------------------------------
/data_preprocess/zju/detect_person.py:
--------------------------------------------------------------------------------
 1 | """
 2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 | property and proprietary rights in and to this material, related
 6 | documentation and any modifications thereto. Any use, reproduction,
 7 | disclosure or distribution of this material and related documentation
 8 | without an express license agreement from NVIDIA CORPORATION or
 9 | its affiliates is strictly prohibited.
10 | """
11 | from typing import Any, List
12 | 
13 | import numpy as np
14 | import torch
15 | from adet.config import get_cfg
16 | from detectron2.data import MetadataCatalog
17 | from detectron2.engine.defaults import DefaultPredictor
18 | from tqdm import tqdm
19 | 
20 | 
21 | def setup_cfg():
22 |     # load config from file and command-line arguments
23 |     confidence_threshold = 0.3
24 |     cfg = get_cfg()
25 |     cfg.merge_from_file("AdeliDet/configs/BlendMask/R_101_dcni3_5x.yaml")
26 |     cfg.merge_from_list(["MODEL.WEIGHTS", "R_101_dcni3_5x.pth"])
27 |     # Set score_threshold for builtin models
28 |     cfg.MODEL.RETINANET.SCORE_THRESH_TEST = confidence_threshold
29 |     cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = confidence_threshold
30 |     cfg.MODEL.FCOS.INFERENCE_TH_TEST = confidence_threshold
31 |     cfg.MODEL.MEInst.INFERENCE_TH_TEST = confidence_threshold
32 |     cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = confidence_threshold
33 |     cfg.freeze()
34 | 
35 |     return cfg
36 | 
37 | 
38 | class PersonDetector(object):
39 |     def __init__(self):
40 |         cfg = setup_cfg()
41 |         self.metadata = MetadataCatalog.get(
42 |             cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
43 |         )
44 |         self.cpu_device = torch.device("cpu")
45 |         self.vis_text = cfg.MODEL.ROI_HEADS.NAME == "TextHead"
46 | 
47 |         self.predictor = DefaultPredictor(cfg)
48 | 
49 |     def process_predictions(self, frame: np.ndarray, predictions: Any) -> np.ndarray:
50 |         """
51 | 
52 |         Args:
53 |             frame:
54 |             predictions:
55 | 
56 |         Returns:
57 | 
58 |         """
59 |         predictions = predictions["instances"].to(self.cpu_device)
60 |         if predictions.pred_masks.shape[0] == 0:
61 |             print("No mask detected")
62 |             return np.zeros((frame.shape[0], frame.shape[1], 1))
63 | 
64 |         mask = predictions.pred_masks[0, :, :, None].cpu().numpy()
65 | 
66 |         return mask
67 | 
68 |     def run_on_video(self, video: np.ndarray) -> List[np.ndarray]:
69 |         """
70 |         Detect person from video
71 |         Args:
72 |             video:
73 | 
74 |         Returns:
75 | 
76 |         """
77 |         detected_video = []
78 |         for frame in tqdm(video):
79 |             detected_video.append(self.process_predictions(frame, self.predictor(frame)))
80 | 
81 |         return detected_video
82 | 


--------------------------------------------------------------------------------
/data_preprocess/zju/diff.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/easymocap/smplmodel/lbs.py b/easymocap/smplmodel/lbs.py
 2 | index 4c82dd2..6cde76a 100644
 3 | --- a/easymocap/smplmodel/lbs.py
 4 | +++ b/easymocap/smplmodel/lbs.py
 5 | @@ -215,0 +215,5 @@ def
 6 | +    # Calculate for only the parts required for WatchItMove
 7 | +    num_rots = rot_mats.shape[1]  # Modification for WatchItMove
 8 | +    rot_mats = rot_mats[:, :num_rots]  # Modification for WatchItMove
 9 | +    J = J[:, :num_rots]  # Modification for WatchItMove
10 | +    parents = parents[:num_rots]  # Modification for WatchItMove
11 | @@ -216,0 +221,3 @@ def
12 | +    # return joint locations and transformation matrices for WatchItMove
13 | +    if lbs_weights is None:  # Modification for WatchItMove
14 | +        return J_transformed, A  # Modification for WatchItMove
15 | 


--------------------------------------------------------------------------------
/data_preprocess/zju/preprocess.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | import argparse
 12 | import glob
 13 | import json
 14 | import os
 15 | import pickle
 16 | from typing import Tuple, List, Dict
 17 | 
 18 | import blosc
 19 | import cv2
 20 | import numpy as np
 21 | import torch
 22 | from easymocap.smplmodel import SMPLlayer
 23 | from tqdm import tqdm
 24 | 
 25 | from detect_person import PersonDetector
 26 | from read_smpl import PoseLoader
 27 | 
 28 | 
 29 | def read_frames(person_id: int, save_size: int, crop_size: int, chosen_camera_id: np.ndarray
 30 |                 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, int]:
 31 |     """
 32 | 
 33 |     Args:
 34 |         person_id:
 35 |         save_size:
 36 |         crop_size:
 37 |         chosen_camera_id:
 38 | 
 39 |     Returns:
 40 | 
 41 |     """
 42 |     all_video = []
 43 |     for cam in tqdm(chosen_camera_id):
 44 |         video_path = f"{ZJU_PATH}/{person_id}/videos/{cam:0>2}.mp4"
 45 |         video = cv2.VideoCapture(video_path)
 46 |         frames = []
 47 |         while True:
 48 |             ret, frame = video.read()
 49 |             if not ret:
 50 |                 break
 51 |             frame = frame[:crop_size, :crop_size]
 52 |             frame = cv2.resize(frame, (save_size, save_size), interpolation=cv2.INTER_CUBIC)
 53 |             frames.append(frame[:, :, ::-1])
 54 |         frames = np.array(frames)
 55 |         all_video.append(frames)
 56 |     video_len = np.array([video.shape[0] for video in all_video])
 57 |     assert (video_len == video_len[0]).all()
 58 |     frame_id = [np.arange(video_len[0]) for _ in range(NUM_CAMERA)]
 59 |     frame_id = np.stack(frame_id, axis=0)
 60 | 
 61 |     all_video = np.stack(all_video, axis=0)
 62 | 
 63 |     camera_id = [np.ones(video_len[0], dtype="int") * (cam - 1) for cam in chosen_camera_id]
 64 |     camera_id = np.stack(camera_id, axis=0)
 65 | 
 66 |     return all_video, frame_id, camera_id, video_len[0]
 67 | 
 68 | 
 69 | class DetectPerson:
 70 |     def __init__(self):
 71 |         self.detector = PersonDetector()
 72 | 
 73 |     def __call__(self, all_video: np.ndarray):
 74 |         """
 75 | 
 76 |         Args:
 77 |             all_video:
 78 | 
 79 |         Returns:
 80 | 
 81 |         """
 82 |         detected = self.detector.run_on_video(all_video)
 83 | 
 84 |         return detected
 85 | 
 86 | 
 87 | def read_intrinsic(person_id: int, save_scale: float) -> np.ndarray:
 88 |     """
 89 | 
 90 |     Args:
 91 |         person_id:
 92 |         save_scale:
 93 | 
 94 |     Returns:
 95 | 
 96 |     """
 97 |     fs = cv2.FileStorage(f"{ZJU_PATH}/{person_id}/intri.yml", cv2.FILE_STORAGE_READ)
 98 |     all_intrinsic = []
 99 |     for cam in range(1, NUM_CAMERA + 1):
100 |         matrix = fs.getNode(f"K_{cam:0>2}").mat()
101 |         matrix = np.array(matrix).reshape(3, 3)
102 |         all_intrinsic.append(matrix)
103 |     all_intrinsic = np.array(all_intrinsic)
104 |     all_intrinsic[:, :2] /= save_scale
105 | 
106 |     return all_intrinsic
107 | 
108 | 
109 | def read_extrinsic(person_id: int) -> Tuple[np.ndarray, np.ndarray]:
110 |     """
111 | 
112 |     Args:
113 |         person_id:
114 | 
115 |     Returns:
116 | 
117 |     """
118 |     fs = cv2.FileStorage(f"{ZJU_PATH}/{person_id}/extri.yml", cv2.FILE_STORAGE_READ)
119 |     all_rot = []
120 |     all_trans = []
121 |     for cam in range(1, NUM_CAMERA + 1):
122 |         rot = fs.getNode(f"Rot_{cam:0>2}").mat()
123 |         rot = np.array(rot).reshape(3, 3)
124 |         trans = fs.getNode(f"T_{cam:0>2}").mat()
125 |         trans = np.array(trans).reshape(3, 1)
126 |         all_rot.append(rot)
127 |         all_trans.append(trans)
128 |     all_rot = np.array(all_rot)
129 |     all_trans = np.array(all_trans)
130 | 
131 |     return all_rot, all_trans
132 | 
133 | 
134 | def read_smpl_parameters(person_id: int, video_len: int) -> np.ndarray:
135 |     """
136 | 
137 |     Args:
138 |         person_id:
139 |         video_len:
140 | 
141 |     Returns:
142 | 
143 |     """
144 |     all_smpl_param = []
145 |     for frame_id in tqdm(range(video_len)):
146 |         smpl_path = f"{ZJU_PATH}/{person_id}/smplx/{frame_id:0>6}.json"
147 | 
148 |         with open(smpl_path, "r") as f:
149 |             smpl_param = json.load(f)[0]
150 | 
151 |         smpl_param = pose_loader(smpl_param)
152 |         all_smpl_param.append(smpl_param)
153 |     all_smpl_param = np.array(all_smpl_param)
154 | 
155 |     return all_smpl_param
156 | 
157 | 
158 | def read_smpl_verts(person_id: int, smpllayer: SMPLlayer) -> np.ndarray:
159 |     """
160 | 
161 |     Args:
162 |         person_id:
163 |         smpllayer:
164 | 
165 |     Returns:
166 | 
167 |     """
168 |     all_smpl_verts = []
169 |     smpl_paths = sorted(glob.glob(f"{ZJU_PATH}/{person_id}/smplx/*.json"))
170 |     for frame_id in tqdm(range(len(smpl_paths))):
171 |         smpl_path = smpl_paths[frame_id]
172 | 
173 |         with open(smpl_path, "r") as f:
174 |             smpl_param = json.load(f)[0]
175 |         with torch.no_grad():
176 |             Rh = torch.tensor(np.array(smpl_param["Rh"])).float()  # 1 x 3
177 |             Th = torch.tensor(np.array(smpl_param["Th"])).float()  # 1 x 3
178 |             poses = torch.tensor(np.array(smpl_param["poses"])).float()  # 1 x 72
179 |             shapes = torch.tensor(smpl_param["shapes"]).float()  # 1 x 10
180 |             expression = torch.tensor(smpl_param["expression"]).float()  # 1 x 10
181 |             verts = smpllayer(poses, shapes, Rh, Th, expression)
182 | 
183 |         all_smpl_verts.append(verts[0].cpu().numpy())
184 |     all_smpl_verts = np.array(all_smpl_verts)
185 | 
186 |     return all_smpl_verts  # (video_len, n_verts, 3)
187 | 
188 | 
189 | def create_dict(video: np.ndarray, mask: List[np.ndarray], frame: np.ndarray, camera: np.ndarray,
190 |                 all_intrinsic: np.ndarray, all_rot: np.ndarray, all_trans: np.ndarray, smpl: np.ndarray, set_size: int
191 |                 ) -> Dict[str, np.ndarray]:
192 |     """
193 | 
194 |     Args:
195 |         video:
196 |         mask:
197 |         frame:
198 |         camera:
199 |         all_intrinsic:
200 |         all_rot:
201 |         all_trans:
202 |         smpl:
203 |         set_size:
204 | 
205 |     Returns:
206 | 
207 |     """
208 |     data_dict = {}
209 |     data_dict["frame_id"] = frame.reshape(-1)
210 |     data_dict["img"] = np.array([blosc.pack_array(frame.transpose(2, 0, 1)) for frame in tqdm(video)],
211 |                                 dtype="object")
212 | 
213 |     data_dict["mask"] = np.array([blosc.pack_array(det[:, :, 0]) for det in tqdm(mask)],
214 |                                  dtype="object")
215 |     data_dict["camera_intrinsic"] = all_intrinsic[camera]
216 |     data_dict["camera_rotation"] = all_rot[camera]
217 |     data_dict["camera_translation"] = all_trans[camera]
218 | 
219 |     data_dict["camera_id"] = np.arange(len(frame)) // set_size
220 |     data_dict["smpl_pose"] = smpl
221 | 
222 |     return data_dict
223 | 
224 | 
225 | def process_train_set(person_id: int, all_video, all_intrinsic: np.ndarray, all_rot: np.ndarray, all_trans: np.ndarray,
226 |                       all_smpl_param: np.ndarray, frame_id, camera_id, video_len: int, train_set_rate: float) -> int:
227 |     """
228 | 
229 |     Args:
230 |         person_id:
231 |         all_video:
232 |         all_intrinsic:
233 |         all_rot:
234 |         all_trans:
235 |         all_smpl_param:
236 |         frame_id:
237 |         camera_id:
238 |         video_len:
239 |         train_set_rate:
240 | 
241 |     Returns:
242 | 
243 |     """
244 |     train_set_size = int(video_len * train_set_rate)
245 |     train_video = all_video[TRAIN_CAMERA_ID - 1, :train_set_size].reshape(-1, *all_video.shape[2:])
246 |     train_frame = frame_id[TRAIN_CAMERA_ID - 1, :train_set_size].reshape(-1, *frame_id.shape[2:])
247 |     train_camera = camera_id[TRAIN_CAMERA_ID - 1, :train_set_size].reshape(-1, *camera_id.shape[2:])
248 |     train_mask = person_detector(train_video)
249 | 
250 |     train_dict = create_dict(train_video, train_mask, train_frame, train_camera, all_intrinsic,
251 |                              all_rot, all_trans, all_smpl_param, train_set_size)
252 | 
253 |     with open(f'{ZJU_PATH}/cache{SAVE_SIZE}/{person_id}/cache_train.pickle', 'wb') as f:
254 |         pickle.dump(train_dict, f)
255 | 
256 |     print("person id:", person_id, "train set size", train_set_size)
257 | 
258 |     return train_set_size
259 | 
260 | 
261 | def process_test_set(person_id: int, train_set_size: int, test_set_size: int, video_len: int, all_video: np.ndarray,
262 |                      all_intrinsic: np.ndarray, all_rot: np.ndarray, all_trans: np.ndarray,
263 |                      all_smpl_param: np.ndarray, frame_id: np.ndarray, camera_id: np.ndarray, mode: str) -> None:
264 |     """
265 |     
266 |     Args:
267 |         person_id:
268 |         train_set_size:
269 |         test_set_size:
270 |         video_len:
271 |         all_video:
272 |         all_intrinsic:
273 |         all_rot:
274 |         all_trans:
275 |         all_smpl_param:
276 |         frame_id:
277 |         camera_id:
278 |         mode:
279 | 
280 |     Returns:
281 | 
282 |     """
283 |     if mode == "novel_view":
284 |         test_frame_id = np.linspace(0, train_set_size - 1, test_set_size).astype("int")
285 |         camera_idx = TEST_CAMERA_ID
286 |         cache_name = "cache_test"
287 |     elif mode == "novel_pose":
288 |         test_frame_id = np.linspace(train_set_size, video_len - 1, test_set_size).astype("int")
289 |         camera_idx = ALL_CAMERA_ID
290 |         cache_name = "cache_novel_pose"
291 |     else:
292 |         raise ValueError()
293 | 
294 |     test_video = all_video[camera_idx - 1][:, test_frame_id].reshape(-1, *all_video.shape[2:])
295 |     test_frame = frame_id[camera_idx - 1][:, test_frame_id].reshape(-1, *frame_id.shape[2:])
296 |     test_camera = camera_id[camera_idx - 1][:, test_frame_id].reshape(-1, *camera_id.shape[2:])
297 |     test_mask = person_detector(test_video)
298 | 
299 |     test_dict = create_dict(test_video, test_mask, test_frame, test_camera, all_intrinsic,
300 |                             all_rot, all_trans, all_smpl_param, test_set_size)
301 | 
302 |     with open(f'{ZJU_PATH}/cache{SAVE_SIZE}/{person_id}/{cache_name}.pickle', 'wb') as f:
303 |         pickle.dump(test_dict, f)
304 | 
305 | 
306 | def main():
307 |     person_ids = args.person_id
308 |     train_set_rate = 0.8
309 |     test_set_size = 20
310 | 
311 |     for person_id in person_ids:
312 |         # smpl verts
313 |         all_verts = read_smpl_verts(person_id, smpllayer)
314 |         data_dict = {"smpl_verts": all_verts}
315 |         os.makedirs(f'{ZJU_PATH}/cache{SAVE_SIZE}/{person_id}', exist_ok=True)
316 |         with open(f'{ZJU_PATH}/cache{SAVE_SIZE}/{person_id}/smpl_verts.pickle', 'wb') as f:
317 |             pickle.dump(data_dict, f)
318 | 
319 |         # read frame
320 |         all_video, frame_id, camera_id, video_len = read_frames(person_id, SAVE_SIZE, CROP_SIZE, ALL_CAMERA_ID)
321 |         all_smpl_param = read_smpl_parameters(person_id, video_len)
322 |         all_smpl_param = all_smpl_param[:, :23]
323 |         all_intrinsic = read_intrinsic(person_id, SAVE_SCALE)
324 |         all_rot, all_trans = read_extrinsic(person_id)
325 | 
326 |         # train set
327 |         train_set_size = process_train_set(person_id, all_video, all_intrinsic, all_rot, all_trans, all_smpl_param,
328 |                                            frame_id, camera_id, video_len, train_set_rate)
329 | 
330 |         # novel view
331 |         process_test_set(person_id, train_set_size, test_set_size, video_len, all_video, all_intrinsic, all_rot,
332 |                          all_trans, all_smpl_param, frame_id, camera_id, "novel_view")
333 | 
334 |         # novel pose
335 |         process_test_set(person_id, train_set_size, test_set_size, video_len, all_video, all_intrinsic, all_rot,
336 |                          all_trans, all_smpl_param, frame_id, camera_id, "novel_pose")
337 | 
338 | 
339 | if __name__ == "__main__":
340 |     parser = argparse.ArgumentParser(description='ZJU data preprocessing')
341 |     parser.add_argument('--smpl_model_path', type=str, required=True)
342 |     parser.add_argument('--zju_path', type=str, required=True)
343 |     parser.add_argument('--person_id', action='append',
344 |                         required=True)
345 |     args = parser.parse_args()
346 | 
347 |     SMPL_MODEL_PATH = args.smpl_model_path
348 | 
349 |     ZJU_PATH = args.zju_path
350 |     SAVE_SCALE = 2
351 |     CROP_SIZE = 1024
352 |     NUM_CAMERA = 23
353 |     SAVE_SIZE = CROP_SIZE // SAVE_SCALE
354 | 
355 |     TRAIN_CAMERA_ID = np.array([1, 5, 9, 13, 17, 21])
356 |     TEST_CAMERA_ID = np.array([2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19, 20, 22, 23])
357 |     ALL_CAMERA_ID = np.arange(1, NUM_CAMERA + 1)
358 | 
359 |     pose_loader = PoseLoader(SMPL_MODEL_PATH)
360 |     smpllayer = SMPLlayer(SMPL_MODEL_PATH + "/smplx", model_type='smplx')
361 |     person_detector = DetectPerson()
362 | 
363 |     main()
364 | 


--------------------------------------------------------------------------------
/data_preprocess/zju/read_smpl.py:
--------------------------------------------------------------------------------
 1 | """
 2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 | property and proprietary rights in and to this material, related
 6 | documentation and any modifications thereto. Any use, reproduction,
 7 | disclosure or distribution of this material and related documentation
 8 | without an express license agreement from NVIDIA CORPORATION or
 9 | its affiliates is strictly prohibited.
10 | """
11 | from typing import Any
12 | 
13 | import cv2
14 | import numpy as np
15 | import torch
16 | from easymocap.smplmodel import load_model
17 | 
18 | from EasyMocap.easymocap.smplmodel.lbs import lbs as extract_bone
19 | 
20 | 
21 | class PoseLoader:
22 |     def __init__(self, smpl_model_path: str):
23 |         """
24 | 
25 |         Args:
26 |             smpl_model_path:
27 |         """
28 |         self.body_model = load_model(
29 |             gender="neutral",
30 |             model_type="smplx",
31 |             model_path=smpl_model_path,
32 |             device="cpu")
33 | 
34 |     def __call__(self, smpl_param: Any) -> np.ndarray:
35 |         """
36 | 
37 |         Args:
38 |             smpl_param:
39 | 
40 |         Returns:
41 | 
42 |         """
43 |         Rh = np.array(smpl_param["Rh"])  # 1 x 3
44 |         Th = np.array(smpl_param["Th"])  # 1 x 3
45 |         poses = np.array(smpl_param["poses"])[:, :72]  # 1 x 72
46 |         shapes = smpl_param["shapes"]  # 1 x 10
47 |         expression = smpl_param["expression"]  # 1 x 10
48 | 
49 |         shapes = torch.tensor(shapes).float()
50 |         expression = torch.tensor(expression).float()
51 |         shapes = torch.cat([shapes, expression], dim=1)
52 |         poses = torch.tensor(poses).float()
53 |         v_template = self.body_model.j_v_template
54 |         joints, transformation = extract_bone(shapes, poses, v_template,
55 |                                               self.body_model.j_shapedirs, None,
56 |                                               self.body_model.j_J_regressor, self.body_model.parents,
57 |                                               None, dtype=self.body_model.dtype,
58 |                                               use_pose_blending=False)
59 |         bone_pose = transformation.clone()
60 |         bone_pose[:, :, :3, 3] = joints
61 | 
62 |         trans = np.eye(4)
63 |         trans[:3, :3] = cv2.Rodrigues(Rh[0])[0]
64 |         trans[:3, 3] = Th
65 | 
66 |         bone_pose_world = np.matmul(trans, bone_pose.numpy()[0])
67 | 
68 |         return bone_pose_world
69 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: watch_it_move
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 |   - conda-forge
 6 | dependencies:
 7 |   - python=3.9.12
 8 |   - pip==22.0.4
 9 |   - cudatoolkit=11.1.1
10 |   - easydict=1.9
11 |   - ninja=1.10.2
12 |   - numpy=1.21.5
13 |   - pytorch=1.10.1
14 |   - torchvision=0.11.2
15 |   - pip:
16 |       - blosc==1.10.6
17 |       - lpips==0.1.4
18 |       - matplotlib==3.5.1
19 |       - opencv-contrib-python==4.5.5.64
20 |       - opencv-python==4.5.5.64
21 |       - scikit-image==0.19.2
22 |       - smplx==0.1.28
23 |       - tensorboardx==2.5
24 |       - cython==0.29.28
25 |       - timm==0.5.4
26 |       - h5py==3.6.0
27 |       - submitit==1.4.2
28 |       - gdown==4.4.0
29 | 


--------------------------------------------------------------------------------
/figures/reconstruction_rotate_motion_0_0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/watch-it-move/5fdec2b71b07f9d4a5492fb3dad6bfcc7d9a9f8b/figures/reconstruction_rotate_motion_0_0.gif


--------------------------------------------------------------------------------
/figures/repose_rotate_0_0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/watch-it-move/5fdec2b71b07f9d4a5492fb3dad6bfcc7d9a9f8b/figures/repose_rotate_0_0.gif


--------------------------------------------------------------------------------
/figures/robot_example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/watch-it-move/5fdec2b71b07f9d4a5492fb3dad6bfcc7d9a9f8b/figures/robot_example.jpg


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | # Create the conda environment
 2 | 
 3 | conda env create --file environment.yml
 4 | eval "$(conda shell.bash hook)"
 5 | conda activate watch_it_move
 6 | pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html
 7 | pip install "git+https://github.com/facebookresearch/pytorch3d.git@v0.6.2"
 8 | 
 9 | # Initialize the submodules
10 | 
11 | cd data_preprocess/zju/EasyMocap
12 | python setup.py develop
13 | # Patch lbs.py to expose a local variable we need use 
14 | patch -p1 < ../diff.patch
15 | cd ../../..
16 | 
17 | cd AdelaiDet
18 | python setup.py build develop
19 | cd ..
20 | 
21 | cd Mask2Former/mask2former/modeling/pixel_decoder/ops
22 | sh make.sh
23 | 


--------------------------------------------------------------------------------
/src/confs/atlas.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: atlas
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/robots/atlas"
15 |   batchsize: 16
16 |   size: 512
17 |   num_parts: 20
18 |   num_workers: 0  # dataloader
19 |   num_frames: 300
20 |   num_view: 5
21 |   coordinate_scale: 1
22 |   prob_sample_latest: 0.2
23 |   thin_out_interval: 1
24 |   background_color: 1
25 | 
26 | network_params:
27 |   pixel_sampler: foreground
28 |   decoder_params:
29 |     sdf_residual_range: 0.02
30 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
31 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
32 |     sdf_scale: 600
33 |     initial_sdf_weight: 30.
34 |   trajectory_params:
35 |     dct:
36 |       hidden_dim: 256
37 |       n_mlp: 4
38 |       k: 30
39 |       n_split: 1
40 |   surface_loss: true
41 |   structure_loss: true
42 |   center_coef_for_structure_loss: 0.02
43 | 
44 | loss_params:
45 |   mask_loss_multiplier: 1
46 |   surface_loss_coef: 400
47 |   structure_loss_coef: 1
48 |   initial_structure_loss_coef: 1
49 |   max_structure_loss_coef: 50
50 |   joint_2d_loss_coef: 1000
51 |   joint_3d_separation_loss_coef: 1
52 |   sdf_loss_coef: 0.02
53 | 
54 | train_setting:
55 |   num_iter: 200000
56 |   optimizer: AdamW
57 |   lr: 0.0003
58 |   decay: 0.005
59 |   clip_grad: true
60 |   scheduler_gamma: 0.99995
61 |   resume: False
62 | 
63 |   val_interval: 5000
64 |   save_interval: 500
65 |   log_interval: 100
66 | 
67 |   frame_schedule:
68 |     incremental:
69 |       initial_frame: 10
70 |       start: 10000
71 |       incremental_period: 70000
72 | 
73 | render_setting:
74 |   num_ray: 384
75 | 


--------------------------------------------------------------------------------
/src/confs/atlas_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: atlas_merge
11 | 
12 | resume_model_path: ../data/output/result/atlas/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/robots/atlas"
17 |   batchsize: 16
18 |   size: 512
19 |   num_parts: 20
20 |   num_workers: 0  # dataloader
21 |   num_frames: 300
22 |   num_view: 5
23 |   coordinate_scale: 1
24 |   prob_sample_latest: 0.2
25 |   thin_out_interval: 1
26 |   background_color: 1
27 | 
28 | network_params:
29 |   pixel_sampler: foreground
30 |   decoder_params:
31 |     sdf_residual_range: 0.02
32 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
33 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
34 |     sdf_scale: 600
35 |     initial_sdf_weight: 30.
36 |   trajectory_params:
37 |     dct:
38 |       hidden_dim: 256
39 |       n_mlp: 4
40 |       k: 30
41 |       n_split: 1
42 |   surface_loss: true
43 |   structure_loss: true
44 |   center_coef_for_structure_loss: 0.02
45 | 
46 | loss_params:
47 |   mask_loss_multiplier: 1
48 |   surface_loss_coef: 400
49 |   structure_loss_coef: 1
50 |   initial_structure_loss_coef: 1
51 |   max_structure_loss_coef: 50
52 |   joint_2d_loss_coef: 1000
53 |   joint_3d_separation_loss_coef: 1
54 |   sdf_loss_coef: 0.02
55 |   pull_rigid_parts_loss_coef: 5
56 | 
57 | train_setting:
58 |   num_iter: 200000
59 |   optimizer: AdamW
60 |   lr: 0.0003
61 |   decay: 0.005
62 |   clip_grad: true
63 |   scheduler_gamma: 0.99995
64 |   resume: False
65 | 
66 |   val_interval: 5000
67 |   save_interval: 500
68 |   log_interval: 100
69 | 
70 |   frame_schedule:
71 |     incremental:
72 |       initial_frame: 10
73 |       start: 10000
74 |       incremental_period: 70000
75 | 
76 | render_setting:
77 |   num_ray: 384
78 | 


--------------------------------------------------------------------------------
/src/confs/baxter.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: baxter
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/robots/baxter"
15 |   batchsize: 16
16 |   size: 512
17 |   num_parts: 20
18 |   num_workers: 0  # dataloader
19 |   num_frames: 300
20 |   num_view: 5
21 |   coordinate_scale: 1
22 |   prob_sample_latest: 0.2
23 |   thin_out_interval: 1
24 |   background_color: 1
25 | 
26 | network_params:
27 |   pixel_sampler: foreground
28 |   decoder_params:
29 |     sdf_residual_range: 0.02
30 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
31 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
32 |     sdf_scale: 600
33 |     initial_sdf_weight: 30.
34 |   trajectory_params:
35 |     dct:
36 |       hidden_dim: 256
37 |       n_mlp: 4
38 |       k: 30
39 |       n_split: 1
40 |   surface_loss: true
41 |   structure_loss: true
42 |   center_coef_for_structure_loss: 0.02
43 | 
44 | loss_params:
45 |   mask_loss_multiplier: 1
46 |   surface_loss_coef: 400
47 |   structure_loss_coef: 1
48 |   initial_structure_loss_coef: 1
49 |   max_structure_loss_coef: 50
50 |   joint_2d_loss_coef: 1000
51 |   joint_3d_separation_loss_coef: 1
52 |   sdf_loss_coef: 0.02
53 | 
54 | train_setting:
55 |   num_iter: 200000
56 |   optimizer: AdamW
57 |   lr: 0.0003
58 |   decay: 0.005
59 |   clip_grad: true
60 |   scheduler_gamma: 0.99995
61 |   resume: False
62 | 
63 |   val_interval: 5000
64 |   save_interval: 500
65 |   log_interval: 100
66 | 
67 |   frame_schedule:
68 |     incremental:
69 |       initial_frame: 10
70 |       start: 10000
71 |       incremental_period: 70000
72 | 
73 | render_setting:
74 |   num_ray: 384
75 | 


--------------------------------------------------------------------------------
/src/confs/baxter_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: baxter_merge
11 | 
12 | resume_model_path: ../data/output/result/baxter/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/robots/baxter"
17 |   batchsize: 16
18 |   size: 512
19 |   num_parts: 20
20 |   num_workers: 0  # dataloader
21 |   num_frames: 300
22 |   num_view: 5
23 |   coordinate_scale: 1
24 |   prob_sample_latest: 0.2
25 |   thin_out_interval: 1
26 |   background_color: 1
27 | 
28 | network_params:
29 |   pixel_sampler: foreground
30 |   decoder_params:
31 |     sdf_residual_range: 0.02
32 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
33 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
34 |     sdf_scale: 600
35 |     initial_sdf_weight: 30.
36 |   trajectory_params:
37 |     dct:
38 |       hidden_dim: 256
39 |       n_mlp: 4
40 |       k: 30
41 |       n_split: 1
42 |   surface_loss: true
43 |   structure_loss: true
44 |   center_coef_for_structure_loss: 0.02
45 | 
46 | loss_params:
47 |   mask_loss_multiplier: 1
48 |   surface_loss_coef: 400
49 |   structure_loss_coef: 1
50 |   initial_structure_loss_coef: 1
51 |   max_structure_loss_coef: 50
52 |   joint_2d_loss_coef: 1000
53 |   joint_3d_separation_loss_coef: 1
54 |   sdf_loss_coef: 0.02
55 |   pull_rigid_parts_loss_coef: 5
56 | 
57 | train_setting:
58 |   num_iter: 200000
59 |   optimizer: AdamW
60 |   lr: 0.0003
61 |   decay: 0.005
62 |   clip_grad: true
63 |   scheduler_gamma: 0.99995
64 |   resume: False
65 | 
66 |   val_interval: 5000
67 |   save_interval: 500
68 |   log_interval: 100
69 | 
70 |   frame_schedule:
71 |     incremental:
72 |       initial_frame: 10
73 |       start: 10000
74 |       incremental_period: 70000
75 | 
76 | render_setting:
77 |   num_ray: 384
78 | 


--------------------------------------------------------------------------------
/src/confs/cassie.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: cassie
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/robots/cassie"
15 |   batchsize: 16
16 |   size: 512
17 |   num_parts: 15
18 |   num_workers: 0  # dataloader
19 |   num_frames: 300
20 |   num_view: 5
21 |   coordinate_scale: 1
22 |   prob_sample_latest: 0.2
23 |   thin_out_interval: 1
24 |   background_color: 1
25 | 
26 | network_params:
27 |   pixel_sampler: foreground
28 |   decoder_params:
29 |     sdf_residual_range: 0.02
30 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
31 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
32 |     sdf_scale: 600
33 |     initial_sdf_weight: 30.
34 |   trajectory_params:
35 |     dct:
36 |       hidden_dim: 256
37 |       n_mlp: 4
38 |       k: 30
39 |       n_split: 1
40 |   surface_loss: true
41 |   structure_loss: true
42 |   center_coef_for_structure_loss: 0.02
43 | 
44 | loss_params:
45 |   mask_loss_multiplier: 1
46 |   surface_loss_coef: 400
47 |   structure_loss_coef: 1
48 |   initial_structure_loss_coef: 1
49 |   max_structure_loss_coef: 50
50 |   joint_2d_loss_coef: 1000
51 |   joint_3d_separation_loss_coef: 1
52 |   sdf_loss_coef: 0.02
53 | 
54 | train_setting:
55 |   num_iter: 200000
56 |   optimizer: AdamW
57 |   lr: 0.0003
58 |   decay: 0.005
59 |   clip_grad: true
60 |   scheduler_gamma: 0.99995
61 |   resume: False
62 | 
63 |   val_interval: 5000
64 |   save_interval: 500
65 |   log_interval: 100
66 | 
67 |   frame_schedule:
68 |     incremental:
69 |       initial_frame: 10
70 |       start: 10000
71 |       incremental_period: 70000
72 | 
73 | render_setting:
74 |   num_ray: 512
75 | 


--------------------------------------------------------------------------------
/src/confs/cassie_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: cassie_merge
11 | 
12 | resume_model_path: ../data/output/result/cassie/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/robots/cassie"
17 |   batchsize: 16
18 |   size: 512
19 |   num_parts: 15
20 |   num_workers: 0  # dataloader
21 |   num_frames: 300
22 |   num_view: 5
23 |   coordinate_scale: 1
24 |   prob_sample_latest: 0.2
25 |   thin_out_interval: 1
26 |   background_color: 1
27 | 
28 | network_params:
29 |   pixel_sampler: foreground
30 |   decoder_params:
31 |     sdf_residual_range: 0.02
32 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
33 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
34 |     sdf_scale: 600
35 |     initial_sdf_weight: 30.
36 |   trajectory_params:
37 |     dct:
38 |       hidden_dim: 256
39 |       n_mlp: 4
40 |       k: 30
41 |       n_split: 1
42 |   surface_loss: true
43 |   structure_loss: true
44 |   center_coef_for_structure_loss: 0.02
45 | 
46 | loss_params:
47 |   mask_loss_multiplier: 1
48 |   surface_loss_coef: 400
49 |   structure_loss_coef: 1
50 |   initial_structure_loss_coef: 1
51 |   max_structure_loss_coef: 50
52 |   joint_2d_loss_coef: 1000
53 |   joint_3d_separation_loss_coef: 1
54 |   sdf_loss_coef: 0.02
55 |   pull_rigid_parts_loss_coef: 5
56 | 
57 | train_setting:
58 |   num_iter: 200000
59 |   optimizer: AdamW
60 |   lr: 0.0003
61 |   decay: 0.005
62 |   clip_grad: true
63 |   scheduler_gamma: 0.99995
64 |   resume: False
65 | 
66 |   val_interval: 5000
67 |   save_interval: 500
68 |   log_interval: 100
69 | 
70 |   frame_schedule:
71 |     incremental:
72 |       initial_frame: 10
73 |       start: 10000
74 |       incremental_period: 70000
75 | 
76 | render_setting:
77 |   num_ray: 512
78 | 


--------------------------------------------------------------------------------
/src/confs/default.yml:
--------------------------------------------------------------------------------
  1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | #property and proprietary rights in and to this material, related
  5 | #documentation and any modifications thereto. Any use, reproduction,
  6 | #disclosure or distribution of this material and related documentation
  7 | #without an express license agreement from NVIDIA CORPORATION or
  8 | #its affiliates is strictly prohibited.
  9 | output_dir:
 10 | exp_name:
 11 | 
 12 | fp16: false
 13 | resume_model_path:
 14 | load_optimizer: true
 15 | iteration:
 16 | 
 17 | dataset:
 18 |   data_root: ""
 19 |   batchsize: 4
 20 |   size: 256
 21 |   set_name:
 22 |   num_parts: 24
 23 |   num_workers: 2  # dataloader
 24 |   n_repetition_in_epoch: 1000
 25 |   coordinate_scale: 1500
 26 |   num_frames:
 27 |   num_view:
 28 |   prob_sample_latest: 0.2
 29 |   thin_out_interval: 1
 30 |   background_color: -1
 31 |   compression: True
 32 | 
 33 | test_dataset:
 34 |   data_root: ""
 35 |   batchsize: 4
 36 |   size: 256
 37 |   num_parts: 24
 38 |   num_workers: 2  # dataloader
 39 |   n_repetition_in_epoch: 1
 40 |   coordinate_scale: 1500
 41 | 
 42 | network_params:
 43 |   pixel_sampler: uniform
 44 |   decoder_params:
 45 |     hidden_dim: 256
 46 |     n_power: 6
 47 |     num_layers: 8
 48 |     sdf_residual_range: 0.05
 49 |     child_root: [ [ -0.75, 0, 0 ], [ 0, 0.75, 0 ], [ 0, -0.75, 0 ],
 50 |                   [ 0, 0, 0.75 ], [ 0, 0, -0.75 ], [ 0.75, 0, 0 ] ]
 51 |     sdf_scale: 100
 52 |     initial_sdf_weight: 1.
 53 |     residual_sdf: true
 54 |   trajectory_params:
 55 |     dct:
 56 |       hidden_dim: 256
 57 |       n_mlp: 4
 58 |       k: 200
 59 |       n_split: 1
 60 |   surface_loss: false
 61 |   structure_loss: false
 62 |   center_coef_for_structure_loss: 0
 63 | 
 64 | loss_params:
 65 |   mask_loss_multiplier: 1
 66 |   surface_loss_coef: 0
 67 |   structure_loss_coef: 0
 68 |   initial_structure_loss_coef: 0
 69 |   max_structure_loss_coef: 0
 70 |   joint_2d_loss_coef: 0
 71 |   joint_3d_separation_loss_coef: 0
 72 |   sdf_loss_coef: 0
 73 |   pull_rigid_parts_loss_coef: 0
 74 | 
 75 | train_setting:
 76 |   num_iter: 100000
 77 |   optimizer: Adam
 78 |   lr: 0.001
 79 |   decay: 0
 80 |   clip_grad: false
 81 |   scheduler_gamma: 1
 82 |   resume: False
 83 | 
 84 |   val_interval: 5000
 85 |   save_interval: 5000
 86 |   log_interval: 100
 87 | 
 88 |   # for DDP
 89 |   master_addr: localhost
 90 |   master_port: '12355'
 91 |   backend: nccl
 92 | 
 93 |   dataset_schedule_type: "incremental"  # incremental
 94 |   frame_schedule:
 95 |     incremental:
 96 |       initial_frame: 2932
 97 |       start:
 98 |       incremental_period:
 99 | 
100 | 
101 | render_setting:
102 |   num_ray: 100
103 | 


--------------------------------------------------------------------------------
/src/confs/dog.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: dog
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/rgbd_dog/dog1/motion_testSeq"
15 |   batchsize: 16
16 |   size: 512
17 |   set_name:
18 |   num_parts: 20
19 |   num_workers: 0  # dataloader
20 |   num_frames: 601
21 |   num_view: 8
22 |   coordinate_scale: 1000
23 |   prob_sample_latest: 0.2
24 |   thin_out_interval: 1
25 |   background_color: -1
26 | 
27 | network_params:
28 |   pixel_sampler: foreground
29 |   decoder_params:
30 |     sdf_residual_range: 0.02
31 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
32 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
33 |     sdf_scale: 600
34 |     initial_sdf_weight: 30.
35 |   trajectory_params:
36 |     dct:
37 |       hidden_dim: 256
38 |       n_mlp: 4
39 |       k: 50
40 |       n_split: 1
41 |   surface_loss: true
42 |   structure_loss: true
43 |   center_coef_for_structure_loss: 0.02
44 | 
45 | loss_params:
46 |   mask_loss_multiplier: 1
47 |   surface_loss_coef: 600
48 |   structure_loss_coef: 2
49 |   initial_structure_loss_coef: 2
50 |   max_structure_loss_coef: 50
51 |   joint_2d_loss_coef: 0
52 |   joint_3d_separation_loss_coef: 1
53 |   sdf_loss_coef: 0.2
54 | 
55 | train_setting:
56 |   num_iter: 200000
57 |   optimizer: AdamW
58 |   lr: 0.0003
59 |   decay: 0.005
60 |   clip_grad: true
61 |   scheduler_gamma: 0.99995
62 |   resume: False
63 | 
64 |   val_interval: 5000
65 |   save_interval: 500
66 |   log_interval: 100
67 | 
68 |   frame_schedule:
69 |     incremental:
70 |       initial_frame: 10
71 |       start: 10000
72 |       incremental_period: 70000
73 | 
74 | render_setting:
75 |   num_ray: 384
76 | 


--------------------------------------------------------------------------------
/src/confs/dog_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: dog_merge
11 | 
12 | resume_model_path: ../data/output/result/dog/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/rgbd_dog/dog1/motion_testSeq"
17 |   batchsize: 16
18 |   size: 512
19 |   set_name:
20 |   num_parts: 20
21 |   num_workers: 0  # dataloader
22 |   num_frames: 601
23 |   num_view: 8
24 |   coordinate_scale: 1000
25 |   prob_sample_latest: 0.2
26 |   thin_out_interval: 1
27 |   background_color: -1
28 | 
29 | network_params:
30 |   pixel_sampler: foreground
31 |   decoder_params:
32 |     sdf_residual_range: 0.02
33 |     child_root: [ [ -0.75, 0, 0 ], [ 0, 0.75, 0 ], [ 0, -0.75, 0 ],
34 |                   [ 0, 0, 0.75 ], [ 0, 0, -0.75 ], [ 0.75, 0, 0 ] ]
35 |     sdf_scale: 600
36 |     initial_sdf_weight: 30.
37 |   trajectory_params:
38 |     dct:
39 |       hidden_dim: 256
40 |       n_mlp: 4
41 |       k: 50
42 |       n_split: 1
43 |   surface_loss: true
44 |   structure_loss: true
45 |   center_coef_for_structure_loss: 0.02
46 | 
47 | loss_params:
48 |   mask_loss_multiplier: 1
49 |   surface_loss_coef: 600
50 |   structure_loss_coef: 2
51 |   initial_structure_loss_coef: 2
52 |   max_structure_loss_coef: 50
53 |   joint_2d_loss_coef: 0
54 |   joint_3d_separation_loss_coef: 1
55 |   sdf_loss_coef: 0.2
56 |   pull_rigid_parts_loss_coef: 5
57 | 
58 | train_setting:
59 |   num_iter: 200000
60 |   optimizer: AdamW
61 |   lr: 0.0003
62 |   decay: 0.005
63 |   clip_grad: true
64 |   scheduler_gamma: 0.99995
65 |   resume: False
66 | 
67 |   val_interval: 5000
68 |   save_interval: 500
69 |   log_interval: 100
70 | 
71 |   frame_schedule:
72 |     incremental:
73 |       initial_frame: 10
74 |       start: 10000
75 |       incremental_period: 70000
76 | 
77 | render_setting:
78 |   num_ray: 384
79 | 


--------------------------------------------------------------------------------
/src/confs/iiwa.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: iiwa
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/robots/iiwa"
15 |   batchsize: 16
16 |   size: 512
17 |   num_parts: 8
18 |   num_workers: 0  # dataloader
19 |   num_frames: 300
20 |   num_view: 5
21 |   coordinate_scale: 1
22 |   prob_sample_latest: 0.2
23 |   thin_out_interval: 1
24 |   background_color: 1
25 | 
26 | network_params:
27 |   pixel_sampler: foreground
28 |   decoder_params:
29 |     sdf_residual_range: 0.02
30 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
31 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
32 |     sdf_scale: 600
33 |     initial_sdf_weight: 30.
34 |   trajectory_params:
35 |     dct:
36 |       hidden_dim: 256
37 |       n_mlp: 4
38 |       k: 30
39 |       n_split: 1
40 |   surface_loss: true
41 |   structure_loss: true
42 |   center_coef_for_structure_loss: 0.02
43 | 
44 | loss_params:
45 |   mask_loss_multiplier: 1
46 |   surface_loss_coef: 400
47 |   structure_loss_coef: 1
48 |   initial_structure_loss_coef: 1
49 |   max_structure_loss_coef: 50
50 |   joint_2d_loss_coef: 1000
51 |   joint_3d_separation_loss_coef: 1
52 |   sdf_loss_coef: 0.02
53 | 
54 | train_setting:
55 |   num_iter: 200000
56 |   optimizer: AdamW
57 |   lr: 0.0003
58 |   decay: 0.005
59 |   clip_grad: true
60 |   scheduler_gamma: 0.99995
61 |   resume: False
62 | 
63 |   val_interval: 5000
64 |   save_interval: 500
65 |   log_interval: 100
66 | 
67 |   frame_schedule:
68 |     incremental:
69 |       initial_frame: 10
70 |       start: 10000
71 |       incremental_period: 70000
72 | 
73 | render_setting:
74 |   num_ray: 512
75 | 


--------------------------------------------------------------------------------
/src/confs/iiwa_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: iiwa_merge
11 | 
12 | resume_model_path: ../data/output/result/iiwa/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/robots/iiwa"
17 |   batchsize: 16
18 |   size: 512
19 |   num_parts: 8
20 |   num_workers: 0  # dataloader
21 |   num_frames: 300
22 |   num_view: 5
23 |   coordinate_scale: 1
24 |   prob_sample_latest: 0.2
25 |   thin_out_interval: 1
26 |   background_color: 1
27 | 
28 | network_params:
29 |   pixel_sampler: foreground
30 |   decoder_params:
31 |     sdf_residual_range: 0.02
32 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
33 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
34 |     sdf_scale: 600
35 |     initial_sdf_weight: 30.
36 |   trajectory_params:
37 |     dct:
38 |       hidden_dim: 256
39 |       n_mlp: 4
40 |       k: 30
41 |       n_split: 1
42 |   surface_loss: true
43 |   structure_loss: true
44 |   center_coef_for_structure_loss: 0.02
45 | 
46 | loss_params:
47 |   mask_loss_multiplier: 1
48 |   surface_loss_coef: 400
49 |   structure_loss_coef: 1
50 |   initial_structure_loss_coef: 1
51 |   max_structure_loss_coef: 50
52 |   joint_2d_loss_coef: 1000
53 |   joint_3d_separation_loss_coef: 1
54 |   sdf_loss_coef: 0.02
55 |   pull_rigid_parts_loss_coef: 5
56 | 
57 | train_setting:
58 |   num_iter: 200000
59 |   optimizer: AdamW
60 |   lr: 0.0003
61 |   decay: 0.005
62 |   clip_grad: true
63 |   scheduler_gamma: 0.99995
64 |   resume: False
65 | 
66 |   val_interval: 5000
67 |   save_interval: 500
68 |   log_interval: 100
69 | 
70 |   frame_schedule:
71 |     incremental:
72 |       initial_frame: 10
73 |       start: 10000
74 |       incremental_period: 70000
75 | 
76 | render_setting:
77 |   num_ray: 512
78 | 


--------------------------------------------------------------------------------
/src/confs/nao.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: nao
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/robots/nao"
15 |   batchsize: 16
16 |   size: 512
17 |   num_parts: 20
18 |   num_workers: 0  # dataloader
19 |   num_frames: 300
20 |   num_view: 5
21 |   coordinate_scale: 0.333
22 |   prob_sample_latest: 0.2
23 |   thin_out_interval: 1
24 |   background_color: 1
25 | 
26 | network_params:
27 |   pixel_sampler: foreground
28 |   decoder_params:
29 |     sdf_residual_range: 0.02
30 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
31 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
32 |     sdf_scale: 600
33 |     initial_sdf_weight: 30.
34 |   trajectory_params:
35 |     dct:
36 |       hidden_dim: 256
37 |       n_mlp: 4
38 |       k: 30
39 |       n_split: 1
40 |   surface_loss: true
41 |   structure_loss: true
42 |   center_coef_for_structure_loss: 0.02
43 | 
44 | loss_params:
45 |   mask_loss_multiplier: 1
46 |   surface_loss_coef: 600
47 |   structure_loss_coef: 1
48 |   initial_structure_loss_coef: 1
49 |   max_structure_loss_coef: 50
50 |   joint_2d_loss_coef: 0
51 |   joint_3d_separation_loss_coef: 1
52 |   sdf_loss_coef: 0.2
53 | 
54 | train_setting:
55 |   num_iter: 200000
56 |   optimizer: AdamW
57 |   lr: 0.0003
58 |   decay: 0.005
59 |   clip_grad: true
60 |   scheduler_gamma: 0.99995
61 |   resume: False
62 | 
63 |   val_interval: 5000
64 |   save_interval: 500
65 |   log_interval: 100
66 | 
67 |   frame_schedule:
68 |     incremental:
69 |       initial_frame: 10
70 |       start: 10000
71 |       incremental_period: 70000
72 | 
73 | render_setting:
74 |   num_ray: 384
75 | 


--------------------------------------------------------------------------------
/src/confs/nao_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: nao_merge
11 | 
12 | resume_model_path: ../data/output/result/nao/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | 
16 | dataset:
17 |   data_root: "../data/robots/nao"
18 |   batchsize: 16
19 |   size: 512
20 |   num_parts: 20
21 |   num_workers: 0  # dataloader
22 |   num_frames: 300
23 |   num_view: 5
24 |   coordinate_scale: 0.333
25 |   prob_sample_latest: 0.2
26 |   thin_out_interval: 1
27 |   background_color: 1
28 | 
29 | network_params:
30 |   pixel_sampler: foreground
31 |   decoder_params:
32 |     sdf_residual_range: 0.02
33 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
34 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
35 |     sdf_scale: 600
36 |     initial_sdf_weight: 30.
37 |   trajectory_params:
38 |     dct:
39 |       hidden_dim: 256
40 |       n_mlp: 4
41 |       k: 30
42 |       n_split: 1
43 |   surface_loss: true
44 |   structure_loss: true
45 |   center_coef_for_structure_loss: 0.02
46 | 
47 | loss_params:
48 |   mask_loss_multiplier: 1
49 |   surface_loss_coef: 600
50 |   structure_loss_coef: 1
51 |   initial_structure_loss_coef: 1
52 |   max_structure_loss_coef: 50
53 |   joint_2d_loss_coef: 0
54 |   joint_3d_separation_loss_coef: 1
55 |   sdf_loss_coef: 0.2
56 |   pull_rigid_parts_loss_coef: 5
57 | 
58 | train_setting:
59 |   num_iter: 200000
60 |   optimizer: AdamW
61 |   lr: 0.0003
62 |   decay: 0.005
63 |   clip_grad: true
64 |   scheduler_gamma: 0.99995
65 |   resume: False
66 | 
67 |   val_interval: 5000
68 |   save_interval: 500
69 |   log_interval: 100
70 | 
71 |   frame_schedule:
72 |     incremental:
73 |       initial_frame: 10
74 |       start: 10000
75 |       incremental_period: 70000
76 | 
77 | render_setting:
78 |   num_ray: 384
79 | 


--------------------------------------------------------------------------------
/src/confs/pandas.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: pandas
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/robots/pandas"
15 |   batchsize: 16
16 |   size: 512
17 |   num_parts: 10
18 |   num_workers: 0  # dataloader
19 |   num_frames: 300
20 |   num_view: 5
21 |   coordinate_scale: 1
22 |   prob_sample_latest: 0.2
23 |   thin_out_interval: 1
24 |   background_color: 1
25 | 
26 | network_params:
27 |   pixel_sampler: foreground
28 |   decoder_params:
29 |     sdf_residual_range: 0.02
30 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
31 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
32 |     sdf_scale: 600
33 |     initial_sdf_weight: 30.
34 |   trajectory_params:
35 |     dct:
36 |       hidden_dim: 256
37 |       n_mlp: 4
38 |       k: 30
39 |       n_split: 1
40 |   surface_loss: true
41 |   structure_loss: true
42 |   center_coef_for_structure_loss: 0.02
43 | 
44 | loss_params:
45 |   mask_loss_multiplier: 1
46 |   surface_loss_coef: 400
47 |   structure_loss_coef: 1
48 |   initial_structure_loss_coef: 1
49 |   max_structure_loss_coef: 50
50 |   joint_2d_loss_coef: 1000
51 |   joint_3d_separation_loss_coef: 1
52 |   sdf_loss_coef: 0.02
53 | 
54 | train_setting:
55 |   num_iter: 200000
56 |   optimizer: AdamW
57 |   lr: 0.0003
58 |   decay: 0.005
59 |   clip_grad: true
60 |   scheduler_gamma: 0.99995
61 |   resume: False
62 | 
63 |   val_interval: 5000
64 |   save_interval: 500
65 |   log_interval: 100
66 | 
67 |   frame_schedule:
68 |     incremental:
69 |       initial_frame: 10
70 |       start: 10000
71 |       incremental_period: 70000
72 | 
73 | render_setting:
74 |   num_ray: 512
75 | 


--------------------------------------------------------------------------------
/src/confs/pandas_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: pandas_merge
11 | 
12 | resume_model_path: ../data/output/result/pandas/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/robots/pandas"
17 |   batchsize: 16
18 |   size: 512
19 |   num_parts: 10
20 |   num_workers: 0  # dataloader
21 |   num_frames: 300
22 |   num_view: 5
23 |   coordinate_scale: 1
24 |   prob_sample_latest: 0.2
25 |   thin_out_interval: 1
26 |   background_color: 1
27 | 
28 | network_params:
29 |   pixel_sampler: foreground
30 |   decoder_params:
31 |     sdf_residual_range: 0.02
32 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
33 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
34 |     sdf_scale: 600
35 |     initial_sdf_weight: 30.
36 |   trajectory_params:
37 |     dct:
38 |       hidden_dim: 256
39 |       n_mlp: 4
40 |       k: 30
41 |       n_split: 1
42 |   surface_loss: true
43 |   structure_loss: true
44 |   center_coef_for_structure_loss: 0.02
45 | 
46 | loss_params:
47 |   mask_loss_multiplier: 1
48 |   surface_loss_coef: 400
49 |   structure_loss_coef: 1
50 |   initial_structure_loss_coef: 1
51 |   max_structure_loss_coef: 50
52 |   joint_2d_loss_coef: 1000
53 |   joint_3d_separation_loss_coef: 1
54 |   sdf_loss_coef: 0.02
55 |   pull_rigid_parts_loss_coef: 5
56 | 
57 | train_setting:
58 |   num_iter: 200000
59 |   optimizer: AdamW
60 |   lr: 0.0003
61 |   decay: 0.005
62 |   clip_grad: true
63 |   scheduler_gamma: 0.99995
64 |   resume: False
65 | 
66 |   val_interval: 5000
67 |   save_interval: 500
68 |   log_interval: 100
69 | 
70 |   frame_schedule:
71 |     incremental:
72 |       initial_frame: 10
73 |       start: 10000
74 |       incremental_period: 70000
75 | 
76 | render_setting:
77 |   num_ray: 512
78 | 


--------------------------------------------------------------------------------
/src/confs/spot.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: spot
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/robots/spot"
15 |   batchsize: 16
16 |   size: 512
17 |   num_parts: 15
18 |   num_workers: 0  # dataloader
19 |   num_frames: 300
20 |   num_view: 5
21 |   coordinate_scale: 1
22 |   prob_sample_latest: 0.2
23 |   thin_out_interval: 1
24 |   background_color: 1
25 | 
26 | network_params:
27 |   pixel_sampler: foreground
28 |   decoder_params:
29 |     sdf_residual_range: 0.02
30 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
31 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
32 |     sdf_scale: 600
33 |     initial_sdf_weight: 30.
34 |   trajectory_params:
35 |     dct:
36 |       hidden_dim: 256
37 |       n_mlp: 4
38 |       k: 30
39 |       n_split: 1
40 |   surface_loss: true
41 |   structure_loss: true
42 |   center_coef_for_structure_loss: 0.02
43 | 
44 | loss_params:
45 |   mask_loss_multiplier: 1
46 |   surface_loss_coef: 400
47 |   structure_loss_coef: 1
48 |   initial_structure_loss_coef: 1
49 |   max_structure_loss_coef: 50
50 |   joint_2d_loss_coef: 1000
51 |   joint_3d_separation_loss_coef: 1
52 |   sdf_loss_coef: 0.02
53 | 
54 | train_setting:
55 |   num_iter: 200000
56 |   optimizer: AdamW
57 |   lr: 0.0003
58 |   decay: 0.005
59 |   clip_grad: true
60 |   scheduler_gamma: 0.99995
61 |   resume: False
62 | 
63 |   val_interval: 5000
64 |   save_interval: 500
65 |   log_interval: 100
66 | 
67 |   frame_schedule:
68 |     incremental:
69 |       initial_frame: 10
70 |       start: 10000
71 |       incremental_period: 70000
72 | 
73 | render_setting:
74 |   num_ray: 512
75 | 


--------------------------------------------------------------------------------
/src/confs/spot_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: spot_merge
11 | 
12 | resume_model_path: ../data/output/result/spot/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/robots/spot"
17 |   batchsize: 16
18 |   size: 512
19 |   num_parts: 15
20 |   num_workers: 0  # dataloader
21 |   num_frames: 300
22 |   num_view: 5
23 |   coordinate_scale: 1
24 |   prob_sample_latest: 0.2
25 |   thin_out_interval: 1
26 |   background_color: 1
27 | 
28 | network_params:
29 |   pixel_sampler: foreground
30 |   decoder_params:
31 |     sdf_residual_range: 0.02
32 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
33 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
34 |     sdf_scale: 600
35 |     initial_sdf_weight: 30.
36 |   trajectory_params:
37 |     dct:
38 |       hidden_dim: 256
39 |       n_mlp: 4
40 |       k: 30
41 |       n_split: 1
42 |   surface_loss: true
43 |   structure_loss: true
44 |   center_coef_for_structure_loss: 0.02
45 | 
46 | loss_params:
47 |   mask_loss_multiplier: 1
48 |   surface_loss_coef: 400
49 |   structure_loss_coef: 1
50 |   initial_structure_loss_coef: 1
51 |   max_structure_loss_coef: 50
52 |   joint_2d_loss_coef: 1000
53 |   joint_3d_separation_loss_coef: 1
54 |   sdf_loss_coef: 0.02
55 |   pull_rigid_parts_loss_coef: 5
56 | 
57 | train_setting:
58 |   num_iter: 200000
59 |   optimizer: AdamW
60 |   lr: 0.0003
61 |   decay: 0.005
62 |   clip_grad: true
63 |   scheduler_gamma: 0.99995
64 |   resume: False
65 | 
66 |   val_interval: 5000
67 |   save_interval: 500
68 |   log_interval: 100
69 | 
70 |   frame_schedule:
71 |     incremental:
72 |       initial_frame: 10
73 |       start: 10000
74 |       incremental_period: 70000
75 | 
76 | render_setting:
77 |   num_ray: 512
78 | 


--------------------------------------------------------------------------------
/src/confs/zju366.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: zju366
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/zju_mocap/cache512/366"
15 |   batchsize: 16
16 |   size: 512
17 |   set_name: train
18 |   num_parts: 20
19 |   num_workers: 0  # dataloader
20 |   num_frames: 623
21 |   num_view: 6
22 |   coordinate_scale: 1.5
23 |   prob_sample_latest: 0.2
24 |   thin_out_interval: 1
25 |   background_color: -1
26 | 
27 | network_params:
28 |   pixel_sampler: foreground
29 |   decoder_params:
30 |     sdf_residual_range: 0.02
31 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
32 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
33 |     sdf_scale: 600
34 |     initial_sdf_weight: 30.
35 |   trajectory_params:
36 |     dct:
37 |       hidden_dim: 256
38 |       n_mlp: 4
39 |       k: 50
40 |       n_split: 1
41 |   surface_loss: true
42 |   structure_loss: true
43 |   center_coef_for_structure_loss: 0.02
44 | 
45 | loss_params:
46 |   mask_loss_multiplier: 1
47 |   surface_loss_coef: 600
48 |   structure_loss_coef: 2
49 |   initial_structure_loss_coef: 2
50 |   max_structure_loss_coef: 50
51 |   joint_2d_loss_coef: 1000
52 |   joint_3d_separation_loss_coef: 1
53 |   sdf_loss_coef: 0.2
54 | 
55 | train_setting:
56 |   num_iter: 200000
57 |   optimizer: AdamW
58 |   lr: 0.0003
59 |   decay: 0.005
60 |   clip_grad: true
61 |   scheduler_gamma: 0.99995
62 |   resume: False
63 | 
64 |   val_interval: 5000
65 |   save_interval: 500
66 |   log_interval: 100
67 | 
68 |   frame_schedule:
69 |     incremental:
70 |       initial_frame: 10
71 |       start: 10000
72 |       incremental_period: 70000
73 | 
74 | render_setting:
75 |   num_ray: 384
76 | 


--------------------------------------------------------------------------------
/src/confs/zju366_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: zju366_merge
11 | 
12 | resume_model_path: ../data/output/result/zju366/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/zju_mocap/cache512/366"
17 |   batchsize: 16
18 |   size: 512
19 |   set_name: train
20 |   num_parts: 20
21 |   num_workers: 0  # dataloader
22 |   num_frames: 623
23 |   num_view: 6
24 |   coordinate_scale: 1.5
25 |   prob_sample_latest: 0.2
26 |   thin_out_interval: 1
27 |   background_color: -1
28 | 
29 | network_params:
30 |   pixel_sampler: foreground
31 |   decoder_params:
32 |     sdf_residual_range: 0.02
33 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
34 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
35 |     sdf_scale: 600
36 |     initial_sdf_weight: 30.
37 |   trajectory_params:
38 |     dct:
39 |       hidden_dim: 256
40 |       n_mlp: 4
41 |       k: 50
42 |       n_split: 1
43 |   surface_loss: true
44 |   structure_loss: true
45 |   center_coef_for_structure_loss: 0.02
46 | 
47 | loss_params:
48 |   mask_loss_multiplier: 1
49 |   surface_loss_coef: 600
50 |   structure_loss_coef: 2
51 |   initial_structure_loss_coef: 2
52 |   max_structure_loss_coef: 50
53 |   joint_2d_loss_coef: 1000
54 |   joint_3d_separation_loss_coef: 1
55 |   sdf_loss_coef: 0.2
56 |   pull_rigid_parts_loss_coef: 5
57 | 
58 | train_setting:
59 |   num_iter: 200000
60 |   optimizer: AdamW
61 |   lr: 0.0003
62 |   decay: 0.005
63 |   clip_grad: true
64 |   scheduler_gamma: 0.99995
65 |   resume: False
66 | 
67 |   val_interval: 5000
68 |   save_interval: 500
69 |   log_interval: 100
70 | 
71 |   frame_schedule:
72 |     incremental:
73 |       initial_frame: 10
74 |       start: 10000
75 |       incremental_period: 70000
76 | 
77 | render_setting:
78 |   num_ray: 384
79 | 


--------------------------------------------------------------------------------
/src/confs/zju377.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: zju377
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/zju_mocap/cache512/377"
15 |   batchsize: 16
16 |   size: 512
17 |   set_name: train
18 |   num_parts: 20
19 |   num_workers: 0  # dataloader
20 |   num_frames: 493
21 |   num_view: 6
22 |   coordinate_scale: 1.5
23 |   prob_sample_latest: 0.2
24 |   thin_out_interval: 1
25 |   background_color: -1
26 | 
27 | network_params:
28 |   pixel_sampler: foreground
29 |   decoder_params:
30 |     sdf_residual_range: 0.02
31 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
32 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
33 |     sdf_scale: 600
34 |     initial_sdf_weight: 30.
35 |   trajectory_params:
36 |     dct:
37 |       hidden_dim: 256
38 |       n_mlp: 4
39 |       k: 50
40 |       n_split: 1
41 |   surface_loss: true
42 |   structure_loss: true
43 |   center_coef_for_structure_loss: 0.02
44 | 
45 | loss_params:
46 |   mask_loss_multiplier: 1
47 |   surface_loss_coef: 600
48 |   structure_loss_coef: 2
49 |   initial_structure_loss_coef: 2
50 |   max_structure_loss_coef: 50
51 |   joint_2d_loss_coef: 1000
52 |   joint_3d_separation_loss_coef: 1
53 |   sdf_loss_coef: 0.2
54 | 
55 | train_setting:
56 |   num_iter: 200000
57 |   optimizer: AdamW
58 |   lr: 0.0003
59 |   decay: 0.005
60 |   clip_grad: true
61 |   scheduler_gamma: 0.99995
62 |   resume: False
63 | 
64 |   val_interval: 5000
65 |   save_interval: 500
66 |   log_interval: 100
67 | 
68 |   frame_schedule:
69 |     incremental:
70 |       initial_frame: 10
71 |       start: 10000
72 |       incremental_period: 70000
73 | 
74 | render_setting:
75 |   num_ray: 384
76 | 


--------------------------------------------------------------------------------
/src/confs/zju377_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: zju377_merge
11 | 
12 | resume_model_path: ../data/output/result/zju377/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/zju_mocap/cache512/377"
17 |   batchsize: 16
18 |   size: 512
19 |   set_name: train
20 |   num_parts: 20
21 |   num_workers: 0  # dataloader
22 |   num_frames: 493
23 |   num_view: 6
24 |   coordinate_scale: 1.5
25 |   prob_sample_latest: 0.2
26 |   thin_out_interval: 1
27 |   background_color: -1
28 | 
29 | network_params:
30 |   pixel_sampler: foreground
31 |   decoder_params:
32 |     sdf_residual_range: 0.02
33 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
34 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
35 |     sdf_scale: 600
36 |     initial_sdf_weight: 30.
37 |   trajectory_params:
38 |     dct:
39 |       hidden_dim: 256
40 |       n_mlp: 4
41 |       k: 50
42 |       n_split: 1
43 |   surface_loss: true
44 |   structure_loss: true
45 |   center_coef_for_structure_loss: 0.02
46 | 
47 | loss_params:
48 |   mask_loss_multiplier: 1
49 |   surface_loss_coef: 600
50 |   structure_loss_coef: 2
51 |   initial_structure_loss_coef: 2
52 |   max_structure_loss_coef: 50
53 |   joint_2d_loss_coef: 1000
54 |   joint_3d_separation_loss_coef: 1
55 |   sdf_loss_coef: 0.2
56 |   pull_rigid_parts_loss_coef: 5
57 | 
58 | train_setting:
59 |   num_iter: 200000
60 |   optimizer: AdamW
61 |   lr: 0.0003
62 |   decay: 0.005
63 |   clip_grad: true
64 |   scheduler_gamma: 0.99995
65 |   resume: False
66 | 
67 |   val_interval: 5000
68 |   save_interval: 500
69 |   log_interval: 100
70 | 
71 |   frame_schedule:
72 |     incremental:
73 |       initial_frame: 10
74 |       start: 10000
75 |       incremental_period: 70000
76 | 
77 | render_setting:
78 |   num_ray: 384
79 | 


--------------------------------------------------------------------------------
/src/confs/zju381.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: zju381
11 | 
12 | resume_model_path: ../data/output/result/zju381_str25/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/zju_mocap/cache512/381"
17 |   batchsize: 16
18 |   size: 512
19 |   set_name: train
20 |   num_parts: 20
21 |   num_workers: 0  # dataloader
22 |   num_frames: 500
23 |   num_view: 6
24 |   coordinate_scale: 1.5
25 |   prob_sample_latest: 0.2
26 |   thin_out_interval: 1
27 |   background_color: -1
28 | 
29 | network_params:
30 |   pixel_sampler: foreground
31 |   decoder_params:
32 |     sdf_residual_range: 0.02
33 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
34 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
35 |     sdf_scale: 600
36 |     initial_sdf_weight: 30.
37 |   trajectory_params:
38 |     dct:
39 |       hidden_dim: 256
40 |       n_mlp: 4
41 |       k: 50
42 |       n_split: 1
43 |   surface_loss: true
44 |   structure_loss: true
45 |   center_coef_for_structure_loss: 0.02
46 | 
47 | loss_params:
48 |   mask_loss_multiplier: 1
49 |   surface_loss_coef: 600
50 |   structure_loss_coef: 2
51 |   initial_structure_loss_coef: 2
52 |   max_structure_loss_coef: 50
53 |   joint_2d_loss_coef: 1000
54 |   joint_3d_separation_loss_coef: 1
55 |   sdf_loss_coef: 0.2
56 | 
57 | train_setting:
58 |   num_iter: 200000
59 |   optimizer: AdamW
60 |   lr: 0.0003
61 |   decay: 0.005
62 |   clip_grad: true
63 |   scheduler_gamma: 0.99995
64 |   resume: False
65 | 
66 |   val_interval: 5000
67 |   save_interval: 500
68 |   log_interval: 100
69 | 
70 |   frame_schedule:
71 |     incremental:
72 |       initial_frame: 10
73 |       start: 10000
74 |       incremental_period: 70000
75 | 
76 | render_setting:
77 |   num_ray: 384
78 | 


--------------------------------------------------------------------------------
/src/confs/zju381_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: zju381_merge
11 | 
12 | resume_model_path: ../data/output/result/zju381/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/zju_mocap/cache512/381"
17 |   batchsize: 16
18 |   size: 512
19 |   set_name: train
20 |   num_parts: 20
21 |   num_workers: 0  # dataloader
22 |   num_frames: 500
23 |   num_view: 6
24 |   coordinate_scale: 1.5
25 |   prob_sample_latest: 0.2
26 |   thin_out_interval: 1
27 |   background_color: -1
28 | 
29 | network_params:
30 |   pixel_sampler: foreground
31 |   decoder_params:
32 |     sdf_residual_range: 0.02
33 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
34 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
35 |     sdf_scale: 600
36 |     initial_sdf_weight: 30.
37 |   trajectory_params:
38 |     dct:
39 |       hidden_dim: 256
40 |       n_mlp: 4
41 |       k: 50
42 |       n_split: 1
43 |   surface_loss: true
44 |   structure_loss: true
45 |   center_coef_for_structure_loss: 0.02
46 | 
47 | loss_params:
48 |   mask_loss_multiplier: 1
49 |   surface_loss_coef: 600
50 |   structure_loss_coef: 2
51 |   initial_structure_loss_coef: 2
52 |   max_structure_loss_coef: 50
53 |   joint_2d_loss_coef: 1000
54 |   joint_3d_separation_loss_coef: 1
55 |   sdf_loss_coef: 0.2
56 |   pull_rigid_parts_loss_coef: 5
57 | 
58 | train_setting:
59 |   num_iter: 200000
60 |   optimizer: AdamW
61 |   lr: 0.0003
62 |   decay: 0.005
63 |   clip_grad: true
64 |   scheduler_gamma: 0.99995
65 |   resume: False
66 | 
67 |   val_interval: 5000
68 |   save_interval: 500
69 |   log_interval: 100
70 | 
71 |   frame_schedule:
72 |     incremental:
73 |       initial_frame: 10
74 |       start: 10000
75 |       incremental_period: 70000
76 | 
77 | render_setting:
78 |   num_ray: 384
79 | 


--------------------------------------------------------------------------------
/src/confs/zju384.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: zju384
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/zju_mocap/cache512/384"
15 |   batchsize: 16
16 |   size: 512
17 |   set_name: train
18 |   num_parts: 20
19 |   num_workers: 0  # dataloader
20 |   num_frames: 756
21 |   num_view: 6
22 |   coordinate_scale: 1.5
23 |   prob_sample_latest: 0.2
24 |   thin_out_interval: 1
25 |   background_color: -1
26 | 
27 | network_params:
28 |   pixel_sampler: foreground
29 |   decoder_params:
30 |     sdf_residual_range: 0.02
31 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
32 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
33 |     sdf_scale: 600
34 |     initial_sdf_weight: 30.
35 |   trajectory_params:
36 |     dct:
37 |       hidden_dim: 256
38 |       n_mlp: 4
39 |       k: 50
40 |       n_split: 1
41 |   surface_loss: true
42 |   structure_loss: true
43 |   center_coef_for_structure_loss: 0.02
44 | 
45 | loss_params:
46 |   mask_loss_multiplier: 1
47 |   surface_loss_coef: 600
48 |   structure_loss_coef: 2
49 |   initial_structure_loss_coef: 2
50 |   max_structure_loss_coef: 50
51 |   joint_2d_loss_coef: 1000
52 |   joint_3d_separation_loss_coef: 1
53 |   sdf_loss_coef: 0.2
54 | 
55 | train_setting:
56 |   num_iter: 200000
57 |   optimizer: AdamW
58 |   lr: 0.0003
59 |   decay: 0.005
60 |   clip_grad: true
61 |   scheduler_gamma: 0.99995
62 |   resume: False
63 | 
64 |   val_interval: 5000
65 |   save_interval: 500
66 |   log_interval: 100
67 | 
68 |   frame_schedule:
69 |     incremental:
70 |       initial_frame: 10
71 |       start: 10000
72 |       incremental_period: 70000
73 | 
74 | render_setting:
75 |   num_ray: 384
76 | 


--------------------------------------------------------------------------------
/src/confs/zju384_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: zju384_merge
11 | 
12 | resume_model_path: ../data/output/result/zju384/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/zju_mocap/cache512/384"
17 |   batchsize: 16
18 |   size: 512
19 |   set_name: train
20 |   num_parts: 20
21 |   num_workers: 0  # dataloader
22 |   num_frames: 756
23 |   num_view: 6
24 |   coordinate_scale: 1.5
25 |   prob_sample_latest: 0.2
26 |   thin_out_interval: 1
27 |   background_color: -1
28 | 
29 | network_params:
30 |   pixel_sampler: foreground
31 |   decoder_params:
32 |     sdf_residual_range: 0.02
33 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
34 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
35 |     sdf_scale: 600
36 |     initial_sdf_weight: 30.
37 |   trajectory_params:
38 |     dct:
39 |       hidden_dim: 256
40 |       n_mlp: 4
41 |       k: 50
42 |       n_split: 1
43 |   surface_loss: true
44 |   structure_loss: true
45 |   center_coef_for_structure_loss: 0.02
46 | 
47 | loss_params:
48 |   mask_loss_multiplier: 1
49 |   surface_loss_coef: 600
50 |   structure_loss_coef: 2
51 |   initial_structure_loss_coef: 2
52 |   max_structure_loss_coef: 50
53 |   joint_2d_loss_coef: 1000
54 |   joint_3d_separation_loss_coef: 1
55 |   sdf_loss_coef: 0.2
56 |   pull_rigid_parts_loss_coef: 5
57 | 
58 | train_setting:
59 |   num_iter: 200000
60 |   optimizer: AdamW
61 |   lr: 0.0003
62 |   decay: 0.005
63 |   clip_grad: true
64 |   scheduler_gamma: 0.99995
65 |   resume: False
66 | 
67 |   val_interval: 5000
68 |   save_interval: 500
69 |   log_interval: 100
70 | 
71 |   frame_schedule:
72 |     incremental:
73 |       initial_frame: 10
74 |       start: 10000
75 |       incremental_period: 70000
76 | 
77 | render_setting:
78 |   num_ray: 384
79 | 


--------------------------------------------------------------------------------
/src/confs/zju387.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: zju387
11 | 
12 | 
13 | dataset:
14 |   data_root: "../data/zju_mocap/cache512/387"
15 |   batchsize: 16
16 |   size: 512
17 |   set_name: train
18 |   num_parts: 20
19 |   num_workers: 0  # dataloader
20 |   num_frames: 523
21 |   num_view: 6
22 |   coordinate_scale: 1.5
23 |   prob_sample_latest: 0.2
24 |   thin_out_interval: 1
25 |   background_color: -1
26 | 
27 | network_params:
28 |   pixel_sampler: foreground
29 |   decoder_params:
30 |     sdf_residual_range: 0.02
31 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
32 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
33 |     sdf_scale: 600
34 |     initial_sdf_weight: 30.
35 |   trajectory_params:
36 |     dct:
37 |       hidden_dim: 256
38 |       n_mlp: 4
39 |       k: 50
40 |       n_split: 1
41 |   surface_loss: true
42 |   structure_loss: true
43 |   center_coef_for_structure_loss: 0.02
44 | 
45 | loss_params:
46 |   mask_loss_multiplier: 1
47 |   surface_loss_coef: 600
48 |   structure_loss_coef: 2
49 |   initial_structure_loss_coef: 2
50 |   max_structure_loss_coef: 50
51 |   joint_2d_loss_coef: 1000
52 |   joint_3d_separation_loss_coef: 1
53 |   sdf_loss_coef: 0.2
54 | 
55 | train_setting:
56 |   num_iter: 200000
57 |   optimizer: AdamW
58 |   lr: 0.0003
59 |   decay: 0.005
60 |   clip_grad: true
61 |   scheduler_gamma: 0.99995
62 |   resume: False
63 | 
64 |   val_interval: 5000
65 |   save_interval: 500
66 |   log_interval: 100
67 | 
68 |   frame_schedule:
69 |     incremental:
70 |       initial_frame: 10
71 |       start: 10000
72 |       incremental_period: 70000
73 | 
74 | render_setting:
75 |   num_ray: 384
76 | 


--------------------------------------------------------------------------------
/src/confs/zju387_merge.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | output_dir: ../data/output
10 | exp_name: zju387_merge
11 | 
12 | resume_model_path: ../data/output/result/zju387/snapshot_150000.pth
13 | load_optimizer: false
14 | 
15 | dataset:
16 |   data_root: "../data/zju_mocap/cache512/387"
17 |   batchsize: 16
18 |   size: 512
19 |   set_name: train
20 |   num_parts: 20
21 |   num_workers: 0  # dataloader
22 |   num_frames: 523
23 |   num_view: 6
24 |   coordinate_scale: 1.5
25 |   prob_sample_latest: 0.2
26 |   thin_out_interval: 1
27 |   background_color: -1
28 | 
29 | network_params:
30 |   pixel_sampler: foreground
31 |   decoder_params:
32 |     sdf_residual_range: 0.02
33 |     child_root: [[-0.75, 0, 0], [0, 0.75, 0], [0, -0.75, 0],
34 |                  [0, 0, 0.75], [0, 0, -0.75], [0.75, 0, 0]]
35 |     sdf_scale: 600
36 |     initial_sdf_weight: 30.
37 |   trajectory_params:
38 |     dct:
39 |       hidden_dim: 256
40 |       n_mlp: 4
41 |       k: 50
42 |       n_split: 1
43 |   surface_loss: true
44 |   structure_loss: true
45 |   center_coef_for_structure_loss: 0.02
46 | 
47 | loss_params:
48 |   mask_loss_multiplier: 1
49 |   surface_loss_coef: 600
50 |   structure_loss_coef: 2
51 |   initial_structure_loss_coef: 2
52 |   max_structure_loss_coef: 50
53 |   joint_2d_loss_coef: 1000
54 |   joint_3d_separation_loss_coef: 1
55 |   sdf_loss_coef: 0.2
56 |   pull_rigid_parts_loss_coef: 5
57 | 
58 | train_setting:
59 |   num_iter: 200000
60 |   optimizer: AdamW
61 |   lr: 0.0003
62 |   decay: 0.005
63 |   clip_grad: true
64 |   scheduler_gamma: 0.99995
65 |   resume: False
66 | 
67 |   val_interval: 5000
68 |   save_interval: 500
69 |   log_interval: 100
70 | 
71 |   frame_schedule:
72 |     incremental:
73 |       initial_frame: 10
74 |       start: 10000
75 |       incremental_period: 70000
76 | 
77 | render_setting:
78 |   num_ray: 384
79 | 


--------------------------------------------------------------------------------
/src/datasets/dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | import pickle
 13 | import random
 14 | from typing import Dict, Any, Optional
 15 | 
 16 | import blosc
 17 | import numpy as np
 18 | 
 19 | 
 20 | class SingleVideoDataset:
 21 |     def __init__(self, config: Dict[str, Any]) -> None:
 22 |         """
 23 | 
 24 |         Args:
 25 |             config:
 26 |         """
 27 |         self.config = config
 28 |         self.num_parts = config.num_parts
 29 |         self.num_frames = config.num_frames
 30 |         self.num_view = config.num_view
 31 |         self.img_size = config.size
 32 |         self.thin_out_interval = config.thin_out_interval
 33 |         self.return_neighboring_frames = config.return_neighboring_frames
 34 |         self.return_random_frames = config.return_random_frames
 35 |         self.compression = config.compression
 36 |         self.video_cache = self.cache_data(config.set_name)
 37 |         self.n_repetition_in_epoch = config.n_repetition_in_epoch
 38 |         self.coordinate_scale = config.coordinate_scale
 39 |         self.current_max_frame_id = self.num_frames // self.thin_out_interval
 40 |         self.current_min_frame_id = 0
 41 |         self.prob_sample_latest = config.prob_sample_latest
 42 |         self.background_color = config.background_color
 43 | 
 44 |     @staticmethod
 45 |     def seed():
 46 |         np.random.seed()
 47 |         random.seed()
 48 | 
 49 |     def cache_data(self, set_name: Optional[str] = None) -> Dict:
 50 |         """
 51 |         cache data into a dictionary of numpy array
 52 |         Args:
 53 |             set_name:
 54 | 
 55 |         Returns:
 56 |             video_cache (dict): cached data
 57 |         """
 58 |         file_name = "cache.pickle" if set_name is None else f"cache_{set_name}.pickle"
 59 |         cache_path = f"{self.config.data_root}/{file_name}"
 60 |         with open(cache_path, "rb") as f:
 61 |             video_cache = pickle.load(f)
 62 | 
 63 |         return video_cache
 64 | 
 65 |     def __len__(self) -> int:
 66 |         return self.num_frames * self.num_view // self.thin_out_interval * \
 67 |                self.n_repetition_in_epoch  # number of frames
 68 | 
 69 |     def get_index(self, index: int) -> np.ndarray:
 70 |         """
 71 | 
 72 |         Args:
 73 |             index:
 74 | 
 75 |         Returns:
 76 | 
 77 |         """
 78 |         num_frames = self.num_frames // self.thin_out_interval
 79 |         if self.current_max_frame_id >= num_frames:
 80 |             index = index // self.n_repetition_in_epoch
 81 |         else:
 82 |             current_max_frame_id = min(num_frames, self.current_max_frame_id)
 83 |             current_min_frame_id = self.current_min_frame_id
 84 |             if random.random() < self.prob_sample_latest:
 85 |                 min_frame_id = max(0, current_max_frame_id - 6)
 86 |                 frame_id = random.randint(min_frame_id, current_max_frame_id - 1)
 87 |             else:
 88 |                 frame_id = random.randint(current_min_frame_id, current_max_frame_id - 1)
 89 |             camera_id = random.randint(0, self.num_view - 1)
 90 |             index = self.num_frames * camera_id + frame_id * self.thin_out_interval
 91 | 
 92 |         return index
 93 | 
 94 |     def __getitem__(self, index: int) -> dict:
 95 |         """
 96 | 
 97 |         Args:
 98 |             index:
 99 | 
100 |         Returns:
101 | 
102 |         """
103 |         self.seed()
104 |         index = self.get_index(index)
105 | 
106 |         frame_id = self.video_cache["frame_id"][index]
107 | 
108 |         img = self.video_cache["img"][index]
109 |         mask = self.video_cache["mask"][index]
110 |         if self.compression:
111 |             img = blosc.unpack_array(img)
112 |             mask = blosc.unpack_array(mask)
113 |         img = img / 127.5 - 1
114 | 
115 |         # remove background
116 |         fg_mask = (mask == 1)  # ignore unreliable pixels
117 |         img = img * fg_mask + (1 - fg_mask) * self.background_color
118 | 
119 |         camera_rotation = self.video_cache["camera_rotation"][index]
120 |         camera_translation = self.video_cache["camera_translation"][index] / self.coordinate_scale
121 |         camera_id = self.video_cache["camera_id"][index]
122 |         camera_intrinsic = self.video_cache["camera_intrinsic"][index]
123 |         minibatch = {"frame_id": frame_id,
124 |                      "img": img.astype("float32"),
125 |                      "mask": mask.astype("float32"),
126 |                      "camera_rotation": camera_rotation.astype("float32"),
127 |                      "camera_translation": camera_translation.astype("float32"),
128 |                      "camera_id": camera_id,
129 |                      "camera_intrinsic": camera_intrinsic.astype("float32")}
130 | 
131 |         return minibatch
132 | 


--------------------------------------------------------------------------------
/src/train_single_video.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | from typing import Tuple, Dict, Any
 13 | 
 14 | import torch
 15 | from torch import nn
 16 | 
 17 | from easydict import EasyDict as edict
 18 | from models.loss import SupervisedLoss
 19 | from models.model import SingleVideoPartDecomposition
 20 | from utils.get_args import get_args
 21 | from utils.train_utils import (create_optimizer,
 22 |                                send_model_to_gpu)
 23 | from utils.trainer import TrainerBase
 24 | 
 25 | mse = nn.MSELoss()
 26 | 
 27 | 
 28 | def loss_reconstruction_based(minibatch: dict, model: nn.Module, loss_func: SupervisedLoss, config: edict,
 29 |                               pull_rigid_parts: bool = False) -> Tuple[torch.Tensor, Dict[str, float]]:
 30 |     """
 31 | 
 32 |     Args:
 33 |         minibatch:
 34 |         model:
 35 |         loss_func:
 36 |         config:
 37 |         pull_rigid_parts:
 38 | 
 39 |     Returns:
 40 | 
 41 |     """
 42 |     img = minibatch["img"]
 43 |     mask = minibatch["mask"]
 44 |     camera_rotation = minibatch["camera_rotation"]
 45 |     camera_translation = minibatch["camera_translation"]
 46 |     inv_intrinsics = torch.inverse(minibatch["camera_intrinsic"])
 47 |     frame_id = minibatch["frame_id"]
 48 | 
 49 |     model_output_dict = model(frame_id, camera_rotation, camera_translation,
 50 |                               inv_intrinsics, num_ray=config.render_setting.num_ray, mask=mask)
 51 | 
 52 |     loss, loss_dict = loss_func(img, mask, model_output_dict, pull_rigid_parts=pull_rigid_parts)
 53 | 
 54 |     if "sdf_grad" in model_output_dict and config.loss_params.sdf_loss_coef > 0:
 55 |         sdf_grad = model_output_dict["sdf_grad"]
 56 |         sdf_loss = mse(torch.norm(sdf_grad, dim=1), torch.ones_like(sdf_grad[:, 0]))
 57 |         loss += sdf_loss * config.loss_params.sdf_loss_coef
 58 |         loss_dict["sdf_loss"] = sdf_loss.item()
 59 | 
 60 |     return loss, loss_dict
 61 | 
 62 | 
 63 | class Trainer(TrainerBase):
 64 |     def __init__(self):
 65 |         self.snapshot_prefix = "snapshot"
 66 |         self.only_update_joint = False
 67 |         self.pull_rigid_parts = False
 68 | 
 69 |     def prepare_model_and_optimizer(self, config: edict, rank: int, ddp: int) -> Tuple[nn.Module, nn.Module, Any]:
 70 |         """
 71 | 
 72 |         Args:
 73 |             config:
 74 |             rank:
 75 |             ddp:
 76 | 
 77 |         Returns:
 78 | 
 79 |         """
 80 |         self.config = config
 81 |         model = SingleVideoPartDecomposition(config.network_params)
 82 |         optimizer = create_optimizer(config.train_setting, model)  # optimizer works locally, define before DDP_model
 83 | 
 84 |         model, model_module = send_model_to_gpu(rank, model, ddp)
 85 |         return model, model_module, optimizer
 86 | 
 87 |     def define_loss_func(self, config: edict, model_module: nn.Module, ddp: bool) -> None:
 88 |         """
 89 | 
 90 |         Args:
 91 |             config:
 92 |             model_module:
 93 |             ddp:
 94 | 
 95 |         Returns:
 96 | 
 97 |         """
 98 |         self.reconstruction_loss_func = SupervisedLoss(config.loss_params, model_module, ddp, coarse_rate=64)
 99 | 
100 |     def process_incremental(self, schedule_config: edict, iteration: int) -> None:
101 |         """
102 | 
103 |         Args:
104 |             schedule_config:
105 |             iteration:
106 | 
107 |         Returns:
108 | 
109 |         """
110 |         initial_frame = schedule_config.initial_frame
111 |         start = schedule_config.start
112 |         incremental_period = schedule_config.incremental_period
113 |         num_frames = self.config.dataset.num_frames // self.config.dataset.thin_out_interval
114 | 
115 |         if start is None:
116 |             start = 1e10
117 |         if incremental_period is None:
118 |             incremental_period = 1e10
119 |         self.train_loader.dataset.current_max_frame_id = \
120 |             int(initial_frame + min(max(0, iteration - start), incremental_period) *
121 |                 (num_frames - initial_frame) / incremental_period)
122 |         self.model.current_max_frame_id = self.train_loader.dataset.current_max_frame_id
123 | 
124 |         loss_config = self.reconstruction_loss_func.config
125 |         if loss_config.initial_structure_loss_coef > 0:
126 |             if iteration > start:
127 |                 loss_config.structure_loss_coef = loss_config.max_structure_loss_coef
128 |             else:
129 |                 loss_config.structure_loss_coef = loss_config.initial_structure_loss_coef * (1 - iteration / start) + \
130 |                                                   loss_config.max_structure_loss_coef * (iteration / start)
131 | 
132 |         if iteration > start + incremental_period:
133 |             self.pull_rigid_parts = True
134 |         assert self.train_loader.num_workers == 0
135 | 
136 |     def process_before_train_step(self, iteration: int) -> None:
137 |         """
138 | 
139 |         Args:
140 |             iteration:
141 | 
142 |         Returns:
143 | 
144 |         """
145 |         dataset_schedule_type = self.config.train_setting.dataset_schedule_type
146 |         schedule_config = self.config.train_setting.frame_schedule[dataset_schedule_type]
147 |         if dataset_schedule_type == "incremental":
148 |             self.process_incremental(schedule_config, iteration)
149 |         else:
150 |             raise ValueError("Invalid dataset schedule type")
151 | 
152 |     def lossfunc(self, config: edict, minibatch: dict, model: nn.Module, model_module: nn.Module,
153 |                  pull_rigid_parts: bool = False) -> Tuple[torch.Tensor, dict]:
154 |         """
155 | 
156 |         Args:
157 |             config:
158 |             minibatch:
159 |             model:
160 |             model_module:
161 |             pull_rigid_parts:
162 | 
163 |         Returns:
164 | 
165 |         """
166 |         loss_dict = {}
167 | 
168 |         # reconstruction branch
169 |         recon_loss_func = self.reconstruction_loss_func
170 |         loss, _loss_dict = loss_reconstruction_based(minibatch, model, recon_loss_func, config,
171 |                                                      pull_rigid_parts=self.pull_rigid_parts)
172 |         loss_dict.update(_loss_dict)
173 | 
174 |         return loss, loss_dict
175 | 
176 | 
177 | if __name__ == "__main__":
178 |     args, config = get_args()
179 | 
180 |     trainer = Trainer()
181 |     trainer.run(config)
182 | 


--------------------------------------------------------------------------------
/src/utils/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | import yaml
 13 | from easydict import EasyDict as edict
 14 | 
 15 | 
 16 | def check_config(config: edict):
 17 |     """
 18 | 
 19 |     Args:
 20 |         config:
 21 | 
 22 |     Returns:
 23 | 
 24 |     """
 25 |     if "temporal_consistency_loss_coef" in config.loss_params:
 26 |         assert (config.loss_params.temporal_consistency_loss_coef > 0) == config.network_params.temporal_consistency
 27 |         assert not config.network_params.temporal_consistency or (config.dataset.video_len > 1)
 28 | 
 29 |     if "surface_loss" in config.network_params:
 30 |         assert (config.loss_params.surface_loss_coef > 0) == config.network_params.surface_loss
 31 |     if "structure_loss" in config.network_params:
 32 |         assert (config.loss_params.structure_loss_coef > 0) == config.network_params.structure_loss
 33 | 
 34 |     if "transformation_equivariance" in config.network_params:
 35 |         assert ((config.loss_params.heatmap_2d_equivariance_loss_coef >
 36 |                  0) == config.network_params.transformation_equivariance) or \
 37 |                ((config.loss_params.depth_map_equivariance_loss_coef >
 38 |                  0) == config.network_params.transformation_equivariance) or \
 39 |                ((config.loss_params.pose_equivariance_loss_coef >
 40 |                  0) == config.network_params.transformation_equivariance)
 41 | 
 42 | 
 43 | def yaml_config(config_path: str, default_config_path: str) -> edict:
 44 |     """
 45 | 
 46 |     Args:
 47 |         config_path:
 48 |         default_config_path:
 49 | 
 50 |     Returns:
 51 | 
 52 |     """
 53 |     default_config = edict(yaml.load(open(default_config_path), Loader=yaml.SafeLoader))
 54 |     current_config = edict(yaml.load(open(config_path), Loader=yaml.SafeLoader))
 55 | 
 56 |     def _copy(conf: dict, default_conf: dict):
 57 |         for key in conf:
 58 |             if isinstance(default_conf[key], edict):
 59 |                 _copy(conf[key], default_conf[key])
 60 |             else:
 61 |                 default_conf[key] = conf[key]
 62 | 
 63 |     _copy(current_config, default_config)
 64 | 
 65 |     # copy params
 66 |     default_config.network_params.size = default_config.dataset.size
 67 |     default_config.network_params.num_parts = default_config.dataset.num_parts
 68 | 
 69 |     if "video_len" in default_config.dataset:
 70 |         default_config.network_params.video_len = default_config.dataset.video_len
 71 | 
 72 |     if "transformation_equivariance" in default_config.network_params:
 73 |         default_config.dataset.transformation_equivariance = default_config.network_params.transformation_equivariance
 74 |         default_config.test_dataset.transformation_equivariance = False
 75 | 
 76 |     if "decoder_params" in default_config.network_params:
 77 |         default_config.network_params.decoder_params.num_parts = default_config.dataset.num_parts
 78 |         default_config.network_params.decoder_params.num_camera = default_config.dataset.num_view
 79 | 
 80 |     if "multiview" in default_config.dataset:
 81 |         default_config.network_params.multiview = default_config.dataset.multiview
 82 | 
 83 |     if "num_frames" in default_config.dataset:
 84 |         default_config.network_params.video_length = default_config.dataset.num_frames
 85 |         default_config.network_params.num_view = default_config.dataset.num_view
 86 | 
 87 |     return_neighboring_frames = False
 88 | 
 89 |     default_config.dataset.return_neighboring_frames = return_neighboring_frames
 90 |     default_config.test_dataset.return_neighboring_frames = return_neighboring_frames
 91 | 
 92 |     return_random_frames = False
 93 | 
 94 |     default_config.dataset.return_random_frames = return_random_frames
 95 |     default_config.test_dataset.return_random_frames = return_random_frames
 96 | 
 97 |     default_config.network_params.background_color = default_config.dataset.background_color
 98 | 
 99 |     check_config(default_config)
100 | 
101 |     return default_config
102 | 


--------------------------------------------------------------------------------
/src/utils/get_args.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | import argparse
 13 | from typing import Optional, Any, Tuple
 14 | 
 15 | from easydict import EasyDict as edict
 16 | 
 17 | from .config import yaml_config
 18 | 
 19 | 
 20 | def get_config(args: Any) -> edict:
 21 |     """
 22 | 
 23 |     Args:
 24 |         args:
 25 | 
 26 |     Returns:
 27 | 
 28 |     """
 29 |     config = yaml_config(args.config, args.default_config)
 30 |     config.resume_latest = args.resume_latest
 31 |     if config.resume_model_path is None:
 32 |         config.resume_model_path = args.resume_model_path
 33 | 
 34 |     return config
 35 | 
 36 | 
 37 | def get_args(config_path: Optional[str] = None) -> Tuple[Any, edict]:
 38 |     """
 39 | 
 40 |     Args:
 41 |         config_path:
 42 | 
 43 |     Returns:
 44 | 
 45 |     """
 46 |     parser = argparse.ArgumentParser()
 47 |     parser.add_argument('--config', type=str, default="confs/default.yml")
 48 |     parser.add_argument('--default_config', type=str, default="confs/default.yml")
 49 |     parser.add_argument('--resume_latest', action='store_true')
 50 |     parser.add_argument('--resume_model_path', type=str, default=None)
 51 | 
 52 |     args = parser.parse_args()
 53 |     if config_path is not None:
 54 |         args.config = config_path
 55 | 
 56 |     config = get_config(args)
 57 | 
 58 |     return args, config
 59 | 
 60 | 
 61 | def get_ddp_args(config_path: Optional[str] = None) -> Tuple[Any, edict]:
 62 |     """
 63 | 
 64 |     Args:
 65 |         config_path:
 66 | 
 67 |     Returns:
 68 | 
 69 |     """
 70 |     parser = argparse.ArgumentParser()
 71 |     parser.add_argument('--config', type=str, default="confs/default.yml")
 72 |     parser.add_argument('--default_config', type=str, default="confs/default.yml")
 73 |     parser.add_argument('--resume_latest', action='store_true')
 74 |     parser.add_argument('--resume_model_path', type=str, default=None)
 75 |     parser.add_argument('--gpus', type=int, default=1)
 76 |     parser.add_argument('--nodes', type=int, default=1)
 77 | 
 78 |     args = parser.parse_args()
 79 |     if config_path is not None:
 80 |         args.config = config_path
 81 | 
 82 |     config = get_config(args)
 83 | 
 84 |     return args, config
 85 | 
 86 | 
 87 | def get_args_jupyter(config_path: str = "cons/default.yml", default_config: str = "confs/default.yml"
 88 |                      ) -> Tuple[None, edict]:
 89 |     """
 90 | 
 91 |     Args:
 92 |         config_path:
 93 |         default_config:
 94 | 
 95 |     Returns:
 96 | 
 97 |     """
 98 |     config = yaml_config(config_path, default_config)
 99 | 
100 |     return None, config
101 | 


--------------------------------------------------------------------------------
/src/utils/graph_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 | property and proprietary rights in and to this material, related
 6 | documentation and any modifications thereto. Any use, reproduction,
 7 | disclosure or distribution of this material and related documentation
 8 | without an express license agreement from NVIDIA CORPORATION or
 9 | its affiliates is strictly prohibited.
10 | """
11 | 
12 | 
13 | from typing import Tuple, List
14 | from copy import deepcopy
15 | import numpy as np
16 | import queue
17 | 
18 | 
19 | def get_parent_and_children_id(num_parts: int, joint_connection: np.ndarray, selected_candidate_id: np.ndarray,
20 |                                root_id: int) -> Tuple[np.ndarray, List[List], List[List]]:
21 |     """
22 |     Get parent and children id of each part
23 |     Args:
24 |         num_parts:
25 |         joint_connection:
26 |         selected_candidate_id:
27 |         root_id:
28 | 
29 |     Returns:
30 | 
31 |     """
32 |     parent_id = -np.ones(num_parts, dtype="int64")  # initialize parent id with -1
33 |     children_id = [[_] for _ in range(num_parts)]
34 |     connected_to = [[] for _ in range(num_parts)]
35 |     for j1, j2 in joint_connection:
36 |         connected_to[j1].append(j2)
37 |         connected_to[j2].append(j1)
38 | 
39 |     que = queue.Queue()
40 |     visited = [root_id]
41 |     [que.put(ct) for ct in connected_to[root_id]]
42 |     while True:
43 |         current = que.get()
44 |         visited.append(current)
45 |         parent_id[current] = list(set(visited) & set(connected_to[current]))[0]
46 |         if len(visited) == num_parts:
47 |             break
48 |         not_visited = list(set(connected_to[current]) - set(visited))
49 |         [que.put(ct) for ct in not_visited]
50 | 
51 |     for idx in reversed(visited):
52 |         if parent_id[idx] >= 0:
53 |             children_id[parent_id[idx]] += deepcopy(children_id[idx])
54 | 
55 |     selected_candidate = []
56 |     for i in range(num_parts):
57 |         parent = parent_id[i]
58 |         if parent < 0:
59 |             cand_i, cand_parent = -1, -1
60 |         else:
61 |             matched = (joint_connection == np.array([i, parent])).all(axis=1)
62 | 
63 |             if matched.any():
64 |                 assert matched.sum() == 1
65 |                 cand_i, cand_parent = selected_candidate_id[matched][0]
66 |             else:
67 |                 matched = (joint_connection == np.array([parent, i])).all(axis=1)
68 |                 assert matched.sum() == 1
69 |                 cand_parent, cand_i = selected_candidate_id[matched][0]
70 | 
71 |         selected_candidate.append([cand_i, cand_parent])
72 | 
73 |     return parent_id, children_id, selected_candidate
74 | 


--------------------------------------------------------------------------------
/src/utils/model_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | from typing import Optional, Tuple
 13 | 
 14 | import numpy as np
 15 | import torch
 16 | import torch.nn.functional as F
 17 | from easydict import EasyDict as edict
 18 | from pytorch3d.transforms.rotation_conversions import rotation_6d_to_matrix
 19 | from torch import nn
 20 | 
 21 | 
 22 | def expand_mask(mask: torch.Tensor, coarse_rate: int = 32, stride: int = 32):
 23 |     """
 24 |     Expand mask by max pooling
 25 |     Args:
 26 |         mask:
 27 |         coarse_rate:
 28 |         stride:
 29 | 
 30 |     Returns:
 31 | 
 32 |     """
 33 |     pad = 0 if coarse_rate == stride else (coarse_rate - 1) // 2
 34 |     dilate_mask = F.max_pool2d(mask[:, None], coarse_rate, stride, pad)
 35 |     if stride > 1:
 36 |         dilate_mask = F.interpolate(dilate_mask, scale_factor=stride, mode="nearest")
 37 | 
 38 |     return dilate_mask
 39 | 
 40 | 
 41 | def foreground_sampler(img_size: int, num_ray: int, mask: torch.Tensor, coarse_rate: int = 32,
 42 |                        stride: int = 32, dim: int = 1, expand=True) -> torch.Tensor:
 43 |     """uniformly sample around foreground mask
 44 | 
 45 |     Args:
 46 |         img_size (int): image size
 47 |         num_ray (int): number of points to sample
 48 |         mask (int): shape: (B, img_size, img_size)
 49 |         coarse_rate
 50 |         stride
 51 |         dim
 52 |         expand
 53 | 
 54 |     Returns:
 55 |         torch.Tensor: sampled coordinates, shape: (B, 2, num_ray) if dim==1
 56 |     """
 57 |     if expand:
 58 |         dilate_mask = expand_mask(mask, coarse_rate, stride).squeeze(1) > 0.5
 59 |     else:
 60 |         dilate_mask = mask > 0.5
 61 |     unreliable_mask = mask > 1
 62 |     dilate_mask = dilate_mask.float() - unreliable_mask * 2
 63 |     noised_dilate_mask = dilate_mask + torch.empty_like(dilate_mask, dtype=torch.float).uniform_()
 64 |     noised_dilate_mask = noised_dilate_mask.reshape(-1, img_size ** 2)
 65 |     _, coordinates = torch.topk(noised_dilate_mask, num_ray, dim=1, sorted=False)
 66 |     coordinates = torch.stack([coordinates % img_size,
 67 |                                torch.div(coordinates, img_size, rounding_mode='trunc')], dim=dim)
 68 | 
 69 |     return coordinates
 70 | 
 71 | 
 72 | def patch_sampler(img_size: int, num_ray: int, mask: torch.Tensor, coarse_rate: int = 32,
 73 |                   dim: int = 1, expand=True) -> torch.Tensor:
 74 |     """sample patch
 75 | 
 76 |     Args:
 77 |         img_size (int): image size
 78 |         num_ray (int): number of points to sample
 79 |         mask (torch.Tensor): shape: (B, img_size, img_size)
 80 |         coarse_rate
 81 |         dim
 82 |         expand
 83 | 
 84 |     Returns:
 85 |         torch.Tensor: sampled coordinates, shape: (B, 2, num_ray) if dim==1
 86 |     """
 87 |     assert (num_ray ** 0.5).is_integer()
 88 |     assert expand
 89 | 
 90 |     patch_size = int(num_ray ** 0.5)
 91 |     expansion_size = max(0, coarse_rate - patch_size // 2)
 92 | 
 93 |     dilate_mask = expand_mask(mask, expansion_size + 1, 1)
 94 | 
 95 |     noised_dilate_mask = (dilate_mask > 0.5) + torch.empty_like(dilate_mask, dtype=torch.float).uniform_()
 96 |     noised_dilate_mask = noised_dilate_mask.reshape(-1, img_size ** 2)
 97 |     patch_center = torch.argmax(noised_dilate_mask, dim=1, keepdim=True)
 98 | 
 99 |     device = mask.device
100 |     coordinates = torch.stack([patch_center % img_size,
101 |                                torch.div(patch_center, img_size, rounding_mode='trunc')], dim=dim)
102 |     coordinates = coordinates.clamp(patch_size // 2, img_size - patch_size // 2 - 1)
103 | 
104 |     grid = torch.meshgrid(torch.arange(-patch_size // 2, patch_size // 2, device=device),
105 |                           torch.arange(-patch_size // 2, patch_size // 2, device=device), indexing='ij')
106 |     grid = torch.stack([grid[1].reshape(1, -1), grid[0].reshape(1, -1)], dim=dim)
107 | 
108 |     coordinates = coordinates + grid
109 | 
110 |     return coordinates
111 | 
112 | 
113 | class PixelSampler:
114 |     def __init__(self, sample_strategy: str = "uniform"):
115 |         """
116 | 
117 |         Args:
118 |             sample_strategy:
119 |         """
120 |         self.sample_strategy = sample_strategy
121 | 
122 |     @staticmethod
123 |     def unifrom_sampler(img_size: int, num_ray: int, batchsize: int) -> torch.Tensor:
124 |         """uniformly sample pixel coordinates
125 | 
126 |         Args:
127 |             img_size (int): image size
128 |             num_ray (int): number of points to sample
129 |             batchsize:
130 | 
131 |         Returns:
132 |             torch.Tensor: sampled coordinates, shape: (B, 2, num_ray)
133 |         """
134 |         coordinates = torch.randint(high=img_size, size=(batchsize, 2, num_ray), device="cuda")
135 | 
136 |         return coordinates
137 | 
138 |     def __call__(self, img_size: int, num_ray: int, batchsize: int,
139 |                  mask: Optional[torch.Tensor] = None, expand=True,
140 |                  coarse_rate: int = 32, stride: int = 32) -> torch.Tensor:
141 |         """
142 | 
143 |         Args:
144 |             img_size:
145 |             num_ray:
146 |             batchsize:
147 |             mask:
148 |             expand:
149 |             coarse_rate:
150 |             stride:
151 | 
152 |         Returns:
153 | 
154 |         """
155 |         if self.sample_strategy == "uniform":
156 |             return self.unifrom_sampler(img_size, num_ray, batchsize)
157 |         elif self.sample_strategy == "foreground":
158 |             return foreground_sampler(img_size, num_ray, mask, expand=expand,
159 |                                       coarse_rate=coarse_rate, stride=stride)
160 |         elif self.sample_strategy == "patch":
161 |             return patch_sampler(img_size, num_ray, mask, expand=expand,
162 |                                  coarse_rate=coarse_rate)
163 |         else:
164 |             raise ValueError()
165 | 
166 | 
167 | class PoseTrajectoryMLP(nn.Module):
168 |     def __init__(self, video_len: int, n_keypoints: int, hidden_dim: int = 128, n_mlp: int = 4, k: int = 100,
169 |                  n_split: int = 1, **kwargs):
170 |         """
171 | 
172 |         Args:
173 |             video_len:
174 |             n_keypoints:
175 |             hidden_dim:
176 |             n_mlp:
177 |             k:
178 |             n_split:
179 |             **kwargs:
180 |         """
181 |         super(PoseTrajectoryMLP, self).__init__()
182 |         self.video_len = video_len
183 |         self.n_keypoints = n_keypoints
184 |         self.k = k
185 |         self.n_split = n_split
186 |         if n_split > 1:
187 |             split_loc = [-1 / n_split] + [(i + 1) / n_split for i in range(n_split - 1)] + [1 + 1 / n_split]
188 |             self.split_loc = np.array(split_loc)
189 | 
190 |             layers = [nn.Conv1d(self.k, hidden_dim * n_split, 1), nn.ELU(inplace=True)]
191 |             for i in range(n_mlp - 1):
192 |                 layers.append(nn.Conv1d(hidden_dim * n_split, hidden_dim * n_split, 1, groups=n_split))
193 |                 layers.append(nn.ELU(inplace=True))
194 | 
195 |             layers.append(nn.Conv1d(hidden_dim * n_split, n_keypoints * 9 * n_split, 1, groups=n_split))
196 |         else:
197 |             layers = [nn.Linear(self.k, hidden_dim), nn.ELU(inplace=True)]
198 |             for i in range(n_mlp - 1):
199 |                 layers.append(nn.Linear(hidden_dim, hidden_dim))
200 |                 layers.append(nn.ELU(inplace=True))
201 | 
202 |             layers.append(nn.Linear(hidden_dim, n_keypoints * 9))
203 | 
204 |         self.model = nn.Sequential(*layers)
205 | 
206 |     def backbone(self, t: torch.Tensor) -> torch.Tensor:
207 |         """
208 | 
209 |         Args:
210 |             t:
211 | 
212 |         Returns:
213 | 
214 |         """
215 |         batchsize = t.shape[0]
216 |         device = t.device
217 |         freq = (t[:, None] + 0.5 / self.video_len) * np.pi * torch.arange(0, self.k, device=device)  # (B, k)
218 |         if self.n_split > 1:
219 |             freq = freq[:, :, None]  # (B, k)
220 | 
221 |         trajectory = self.model(torch.cos(freq))  # (B, n_kpts * 9)
222 | 
223 |         if self.n_split > 1:
224 |             trajectory = trajectory.reshape(batchsize, self.n_split, self.n_keypoints * 9)
225 |             split_loc = torch.tensor(self.split_loc, device=device, dtype=torch.float)
226 |             sigmoid_scale = 12 * self.n_split
227 |             weight = torch.sigmoid((t[:, None] - split_loc[None, :-1]) * sigmoid_scale) * \
228 |                      torch.sigmoid(-(t[:, None] - split_loc[None, 1:]) * sigmoid_scale)
229 |             trajectory = torch.sum(trajectory * weight[:, :, None], dim=1)
230 | 
231 |         return trajectory
232 | 
233 |     def forward(self, idx: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
234 |         """
235 |         Compute rotation and translation matrices from positionally encoded time
236 |         Args:
237 |             idx: frame index: (B, )
238 | 
239 |         Returns:
240 |             joint rotation: (B, n_kpts, 3, 3)
241 |             joint_translation: (B, n_kpts, 3, 1)
242 | 
243 |         """
244 |         batchsize = idx.shape[0]
245 | 
246 |         t = idx / self.video_len
247 | 
248 |         trajectory = self.backbone(t)
249 | 
250 |         rot, trans = torch.split(trajectory, [6 * self.n_keypoints,
251 |                                               3 * self.n_keypoints], dim=1)
252 |         rot = rotation_6d_to_matrix(rot.reshape(batchsize * self.n_keypoints, 6))
253 |         rot = rot.reshape(batchsize, self.n_keypoints, 3, 3)
254 |         trans = trans.reshape(batchsize, self.n_keypoints, 3, 1)
255 | 
256 |         return rot, trans
257 | 
258 | 
259 | def get_pose_trajectory(config: edict) -> PoseTrajectoryMLP:
260 |     """
261 | 
262 |     Args:
263 |         config:
264 | 
265 |     Returns:
266 | 
267 |     """
268 |     video_len = config.video_length
269 |     num_parts = config.num_parts
270 |     params = config.trajectory_params.dct
271 | 
272 |     return PoseTrajectoryMLP(video_len, num_parts, **params)
273 | 


--------------------------------------------------------------------------------
/src/utils/render_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | from typing import Tuple
 13 | 
 14 | import numpy as np
 15 | import torch
 16 | import torch.nn.functional as F
 17 | from torch import nn
 18 | 
 19 | 
 20 | def _get_ray_direction(pixel_location: torch.Tensor, inv_intrinsics: torch.Tensor) -> torch.Tensor:
 21 |     """
 22 | 
 23 |     Args:
 24 |         pixel_location:
 25 |         inv_intrinsics:
 26 | 
 27 |     Returns:
 28 | 
 29 |     """
 30 |     batchsize, _, num_ray = pixel_location.shape
 31 |     # + 0.5 is required
 32 |     homogeneous = torch.cat(
 33 |         [pixel_location + 0.5, torch.ones(batchsize, 1, num_ray, device="cuda")], dim=1)
 34 | 
 35 |     ray_direction = torch.matmul(inv_intrinsics,
 36 |                                  homogeneous)  # shape: (B, 3, num_ray), not unit vector
 37 | 
 38 |     return ray_direction
 39 | 
 40 | 
 41 | def _coarse_sample(pixel_location: torch.Tensor, inv_intrinsics: torch.Tensor,
 42 |                    joint_translation: torch.Tensor = None, near_plane: float = 1,
 43 |                    far_plane: float = 5, num_coarse: int = 64
 44 |                    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
 45 |     """coarse sampling. uniform sampling in camera frustrum
 46 | 
 47 |     Args:
 48 |         pixel_location (torch.Tensor): 2D location on image,
 49 |                                        shape: (B, 2, num_ray)
 50 |         inv_intrinsics (torch.Tensor): inverse of camera intrinsics
 51 |                                        shape: (B, 3, 3)
 52 |         joint_translation (torch.Tensor, optional): (B, num_parts, 3, 1)
 53 |         near_plane (float, optional): [description]. Defaults to 1.
 54 |         far_plane (float, optional): [description]. Defaults to 4.
 55 |         num_coarse (int, optional): number of sample in each ray
 56 | 
 57 |     Returns:
 58 |         coarse_location (torch.Tensor): shape: (B, 3, num_ray, num_coarse)
 59 |         coarse_depth (torch.Tensor): shape: (B, 1, num_ray, num_coarse)
 60 |         ray_direction (torch.Tensor): shape: (B, 3, num_ray)
 61 |     """
 62 |     batchsize, _, num_ray = pixel_location.shape
 63 | 
 64 |     ray_direction = _get_ray_direction(pixel_location, inv_intrinsics)
 65 | 
 66 |     if joint_translation is None:
 67 |         uniform_depth = torch.linspace(near_plane, far_plane, num_coarse, device="cuda")
 68 |         coarse_depth = uniform_depth[None, None, None].repeat(batchsize, 1, num_ray, 1)
 69 |     else:
 70 |         max_depth = joint_translation[:, :, 2, 0].max(dim=1)[0]
 71 |         min_depth = joint_translation[:, :, 2, 0].min(dim=1)[0]
 72 | 
 73 |         far = max_depth + 0.5
 74 |         near = torch.clamp_min(min_depth - 0.5, near_plane)
 75 |         eps = torch.linspace(0, 1, num_coarse, device="cuda")
 76 |         uniform_depth = near[:, None] * (1 - eps) + far[:, None] * eps
 77 |         uniform_depth = uniform_depth[:, None, None]
 78 |         coarse_depth = uniform_depth.repeat(1, 1, num_ray, 1)
 79 | 
 80 |     coarse_location = ray_direction[:, :, :, None] * uniform_depth
 81 |     ray_direction = F.normalize(ray_direction, dim=1)
 82 | 
 83 |     return coarse_location, coarse_depth, ray_direction,
 84 | 
 85 | 
 86 | def _weight_for_volume_rendering(density: torch.Tensor, depth: torch.Tensor) -> torch.Tensor:
 87 |     """weight for volume rendering
 88 | 
 89 |     Args:
 90 |         density (torch.Tensor): [description]
 91 |         depth (torch.Tensor): [description]
 92 | 
 93 |     Returns:
 94 |         torch.Tensor: weight for each coarse bin, shape: (B, 1, 1, num_ray, num - 1)
 95 |     """
 96 |     assert density.ndim == 4
 97 |     assert depth.ndim == 4
 98 |     sigmoid = torch.sigmoid(density)  # shape: (B, 1, num_ray, num_on_ray)
 99 |     alpha = torch.clamp_min((sigmoid[..., :-1] - sigmoid[..., 1:]) / (sigmoid[..., :-1] + 1e-10), 0)
100 |     _alpha = torch.cat([torch.zeros_like(alpha[..., :1]), alpha], dim=-1)
101 |     alpha_ = torch.cat([alpha, torch.zeros_like(alpha[..., :1])], dim=-1)
102 |     T_i = torch.cumprod(1 - _alpha, dim=-1)
103 |     weights = T_i * alpha_
104 | 
105 |     return weights
106 | 
107 | 
108 | def _multinomial_sample(weights: torch.Tensor, num_fine: int) -> torch.Tensor:
109 |     """multinomial sample for fine sampling
110 | 
111 |     Args:
112 |         weights (torch.Tensor): [description]
113 |         num_fine (int): [description]
114 | 
115 |     Returns:
116 |         torch.Tensor: normalized sampled position
117 |     """
118 |     batchsize, _, num_ray, num_coarse = weights.shape
119 |     weights = weights.reshape(batchsize * num_ray, num_coarse)
120 |     sampled_bins = torch.multinomial(torch.clamp_min(weights, 1e-8), num_fine,
121 |                                      replacement=True).reshape(batchsize, 1, 1, num_ray, num_fine) / (num_coarse - 1)
122 |     offset_in_bins = torch.cuda.FloatTensor(
123 |         batchsize, 1, 1, num_ray, num_fine).uniform_() / (num_coarse - 1)
124 |     sampled_normalized_depth = sampled_bins + offset_in_bins
125 | 
126 |     return sampled_normalized_depth
127 | 
128 | 
129 | def _get_fine_location(coarse_location: torch.Tensor, coarse_depth: torch.Tensor,
130 |                        sampled_normalized_depth: torch.Tensor
131 |                        ) -> Tuple[torch.Tensor, torch.Tensor]:
132 |     """
133 | 
134 |     Args:
135 |         coarse_location:
136 |         coarse_depth:
137 |         sampled_normalized_depth:
138 | 
139 |     Returns:
140 | 
141 |     """
142 |     near_location = coarse_location[:, :, :, :1]
143 |     far_location = coarse_location[:, :, :, -1:]
144 |     fine_location = (near_location * (1 - sampled_normalized_depth) +
145 |                      far_location * sampled_normalized_depth)
146 | 
147 |     near_depth = coarse_depth[:, :, :, :1]
148 |     far_depth = coarse_depth[:, :, :, -1:]
149 |     fine_depth = (near_depth * (1 - sampled_normalized_depth) +
150 |                   far_depth * sampled_normalized_depth)
151 | 
152 |     fine_location = torch.cat([coarse_location, fine_location], dim=3)
153 |     fine_depth = torch.cat([coarse_depth, fine_depth], dim=3)
154 | 
155 |     return fine_location, fine_depth
156 | 
157 | 
158 | def fine_sample(implicit_model: nn.Module, joint_rotation,
159 |                 joint_translation: torch.Tensor, pixel_location: torch.Tensor,
160 |                 inv_intrinsics: torch.Tensor, near_plane: float = 1, far_plane: float = 4,
161 |                 num_coarse: int = 64, num_fine: int = 64
162 |                 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
163 |     """fine sampling for nerf
164 | 
165 |     Args:
166 |         implicit_model (nn.Module): implicit decoder
167 |         joint_rotation (nn.Module): rotation matrix for each part
168 |         joint_translation (nn.Module): translation for each part
169 |         pixel_location (torch.Tensor): location of pixels
170 |         inv_intrinsics (torch.Tensor): inverse of intrinsic matrix
171 |         near_plane (float, optional): Defaults to 1.
172 |         far_plane (float, optional): Defaults to 4.
173 |         num_coarse (int, optional): number of sampling points for coarse sampling. Defaults to 64.
174 |         num_fine (int, optional): number of sampling points for fine sampling. Defaults to 32.
175 | 
176 |     Returns:
177 |         torch.Tensor: fine points
178 |     """
179 |     (coarse_location, coarse_depth, ray_direction) = _coarse_sample(pixel_location, inv_intrinsics, joint_translation,
180 |                                                                     near_plane, far_plane, num_coarse)
181 | 
182 |     _, _, _, num_coarse = coarse_location.shape
183 |     with torch.no_grad():
184 |         decoder_output = implicit_model(coarse_location, joint_rotation, joint_translation,
185 |                                         coarse_sample=True)
186 | 
187 |     coarse_density = decoder_output["density"]
188 |     sdf_scale = decoder_output.get("sdf_scale")
189 | 
190 |     if sdf_scale is not None:
191 |         coarse_density = coarse_density * sdf_scale
192 | 
193 |     weights = _weight_for_volume_rendering(coarse_density, coarse_depth)
194 | 
195 |     # normalised fine points, shape: (B, 1, num_ray, num_fine)
196 |     sampled_normalized_depth = _multinomial_sample(weights, num_fine)[:, 0]
197 | 
198 |     # fine points, shape: (B, 3, num_ray, num_fine)
199 |     fine_location, fine_depth = _get_fine_location(coarse_location, coarse_depth, sampled_normalized_depth)
200 | 
201 |     sort_idx = torch.argsort(fine_depth, dim=3)
202 |     fine_location = torch.gather(fine_location, dim=3, index=sort_idx.repeat(1, 3, 1, 1))
203 |     fine_depth = torch.gather(fine_depth, dim=3, index=sort_idx)
204 | 
205 |     return (fine_location,  # (batchsize, 3, num_ray, (num_coarse + num_fine))
206 |             fine_depth,  # (batchsize, 1, num_ray, (num_coarse + num_fine))
207 |             ray_direction)  # (batchsize, 3, num_ray)
208 | 
209 | 
210 | def volume_rendering(density: torch.Tensor, color: torch.Tensor, depth: torch.tensor
211 |                      ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
212 |     """volume rendering
213 | 
214 |     Args:
215 |         density (torch.Tensor): shape: (B, 1, num_ray, num_coarse + num_fine)
216 |         color (torch.Tensor): shape: (B, 3, num_ray, num_coarse + num_fine)
217 |         depth (torch.Tensor): shape: (B, 1, num_ray, num_coarse + num_fine)
218 | 
219 |     Returns:
220 |         rendered_color (torch.Tensor): shape: (B, 3, num_ray)
221 |         rendered_mask (torch.Tensor): shape: (B, 1, num_ray)
222 |         rendered_disparity (torch.Tensor): shape: (B, 1, num_ray)
223 |         weights
224 |     """
225 |     weights = _weight_for_volume_rendering(density, depth)
226 | 
227 |     rendered_color = torch.sum(weights * color, dim=3)
228 |     rendered_mask = torch.sum(weights, dim=3).squeeze(1)
229 |     rendered_disparity = torch.sum(weights / depth, dim=3).squeeze(1)
230 | 
231 |     return (rendered_color,  # (B, 3, num_ray)
232 |             rendered_mask,  # (B, num_ray)
233 |             rendered_disparity,  # (B, num_ray)
234 |             weights)  # (B, 1, 1, num_ray, num_points)
235 | 
236 | 
237 | def gather_pixel(img: torch.Tensor, pixel_location: torch.Tensor) -> torch.Tensor:
238 |     """
239 | 
240 |     Args:
241 |         img:
242 |         pixel_location:
243 | 
244 |     Returns:
245 | 
246 |     """
247 |     single_channel = (img.ndim == 3)
248 |     if single_channel:
249 |         img = img[:, None]
250 | 
251 |     batchsize, ch, height, width = img.shape
252 | 
253 |     if pixel_location.dtype == torch.int64:  # pixel index
254 |         img = img.reshape(batchsize, ch, height * width)
255 |         # gather pixel values from pixel_location
256 |         x_coord = pixel_location[:, 0]
257 |         y_coord = pixel_location[:, 1]
258 |         flattened_location = y_coord * width + x_coord  # (B, num_ray)
259 |         gathered_img = torch.gather(img, dim=2, index=flattened_location[:, None].repeat(1, ch, 1))
260 |     elif pixel_location.dtype == torch.float32:  # in pixel index space (top-left = (0, 0))
261 |         _pixel_location = pixel_location.permute(0, 2, 1)[:, :, None] + 0.5  # (B, n_rays, 1, 2)
262 |         _pixel_location = _pixel_location / (height / 2) - 1
263 |         gathered_img = F.grid_sample(img, _pixel_location, mode='bicubic')  # (B, ch, n_rays, 1)
264 |         gathered_img = gathered_img.squeeze(3)
265 |     else:
266 |         raise TypeError("Invalid type for pixel_location")
267 |     if single_channel:
268 |         gathered_img = gathered_img[:, 0]
269 | 
270 |     return gathered_img
271 | 
272 | 
273 | def rotation_matrix(theta: float, axis: str = "y") -> torch.Tensor:
274 |     """
275 | 
276 |     Args:
277 |         theta:
278 |         axis:
279 | 
280 |     Returns:
281 |         R: rotation matrix
282 |     """
283 |     c = np.cos(theta)
284 |     s = np.sin(theta)
285 |     if axis == "y":
286 |         R = torch.tensor(np.array([[c, 0, -s, 0],
287 |                                    [0, 1, 0, 0],
288 |                                    [s, 0, c, 0],
289 |                                    [0, 0, 0, 1]])).float().cuda()
290 |     elif axis == "z":
291 |         R = torch.tensor(np.array([[c, -s, 0, 0],
292 |                                    [s, c, 0, 0],
293 |                                    [0, 0, 1, 0],
294 |                                    [0, 0, 0, 1]])).float().cuda()
295 |     else:
296 |         raise ValueError("invalid axis")
297 | 
298 |     return R
299 | 
300 | 
301 | def rotate_pose(pose_camera: torch.Tensor, R: torch.Tensor) -> torch.Tensor:
302 |     """
303 | 
304 |     Args:
305 |         pose_camera:
306 |         R:
307 | 
308 |     Returns:
309 | 
310 |     """
311 |     center = torch.zeros(4, 4, device=R.device, dtype=torch.float)
312 |     center[:3, 3] = pose_camera[0, :, :3, 3].mean(dim=0)
313 |     center[3, 3] = 1
314 |     rotated_pose = torch.matmul(R, (pose_camera - center)) + center
315 | 
316 |     return rotated_pose
317 | 


--------------------------------------------------------------------------------
/src/utils/sdf_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | from typing import Tuple
 13 | 
 14 | import torch
 15 | 
 16 | 
 17 | def f(radius: torch.Tensor, x: torch.Tensor, lam: torch.Tensor) -> torch.Tensor:
 18 |     """func to optimize
 19 | 
 20 |     Args:
 21 |         radius: radius of ellipsoids, (..., n_part, 3, 1)
 22 |         x: position, (..., n_part, 3, n_pts)
 23 |         lam: Lagrange multiplier, (..., n_part, n_pts)
 24 | 
 25 |     Returns:
 26 | 
 27 |     """
 28 |     lam = lam.unsqueeze(-2)
 29 |     h = radius.square() * x.square() / torch.clamp_min((radius.square() + lam).square(), 1e-15)
 30 |     h = torch.sum(h, dim=-2)
 31 | 
 32 |     return h  # (..., n_part, n_pts)
 33 | 
 34 | 
 35 | def d_f(radius: torch.Tensor, x: torch.Tensor, lam: torch.Tensor) -> torch.Tensor:
 36 |     """derivative of f
 37 | 
 38 |     Args:
 39 |         radius: radius of ellipsoids, (..., n_part, 3, 1)
 40 |         x: position, (..., n_part, 3, n_pts)
 41 |         lam: Lagrange multiplier, (..., n_part, n_pts)
 42 | 
 43 |     Returns:
 44 | 
 45 |     """
 46 |     lam = lam.unsqueeze(-2)
 47 |     eps = (((radius.square() + lam) > 0) * 2 - 1) * 1e-20
 48 |     h = radius.square() * x.square() / ((radius.square() + lam) ** 3 + eps)
 49 |     h = -2 * torch.sum(h, dim=-2)
 50 | 
 51 |     return h  # (..., n_part, n_pts)
 52 | 
 53 | 
 54 | def newton_step(radius: torch.Tensor, x: torch.Tensor, lam: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
 55 |     """
 56 | 
 57 |     Args:
 58 |         radius: radius of ellipsoids, (..., n_part, 3, 1)
 59 |         x: position, (..., n_part, 3, n_pts)
 60 |         lam: Lagrange multiplier, (..., n_part, n_pts)
 61 | 
 62 |     Returns:
 63 | 
 64 |     """
 65 |     with torch.no_grad():
 66 |         diff = 1 - f(radius, x, lam)
 67 |         df = d_f(radius, x, lam)
 68 |         eps = ((df > 0) * 2 - 1) * 1e-15
 69 |         update = diff / (df + eps)
 70 | 
 71 |     return lam + update, diff
 72 | 
 73 | 
 74 | def search_lam(radius: torch.Tensor, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
 75 |     """
 76 | 
 77 |     Args:
 78 |         radius: radius of ellipsoids, (..., n_part, 3)
 79 |         x: position, (..., n_part, 3, n_pts)
 80 | 
 81 |     Returns:
 82 | 
 83 |     """
 84 |     radius = radius.unsqueeze(-1)
 85 |     lam = torch.max(radius * x.abs() - radius.square(), dim=-2)[0]  # (..., n_part, n_pts)
 86 |     diff = torch.tensor(0)
 87 |     for _ in range(10):
 88 |         lam, diff = newton_step(radius, x, lam)
 89 | 
 90 |     valid = torch.lt(diff, 1e-5)
 91 | 
 92 |     valid_lam = torch.lt(-torch.square(radius.min(dim=-2)[0]), lam)
 93 |     valid = torch.logical_and(valid, valid_lam)
 94 | 
 95 |     return lam, valid
 96 | 
 97 | 
 98 | def lam_to_sdf(radius: torch.Tensor, x: torch.Tensor, lam: torch.Tensor) -> torch.Tensor:
 99 |     """
100 | 
101 |     Args:
102 |         radius: radius of ellipsoids, (..., n_part, 3)
103 |         x: position, (..., n_part, 3, n_pts)
104 |         lam: Lagrange multiplier, (..., n_part, n_pts)
105 | 
106 |     Returns:
107 | 
108 |     """
109 |     with torch.no_grad():
110 |         radius = radius.unsqueeze(-1)
111 |         lam = lam.unsqueeze(-2)
112 |         foot_on_sphere = radius / (radius.square() + lam) * x
113 | 
114 |     # differentiable from here!
115 |     foot_on_ellipsoid = foot_on_sphere * radius  # (..., n_part, 3, n_pts)
116 |     with torch.no_grad():
117 |         sign = torch.sign(torch.sum(x.square() / radius.square(), dim=-2) - 1)
118 |     sdf = torch.norm(foot_on_ellipsoid - x, dim=-2) * sign  # (..., n_part, n_pts)
119 | 
120 |     return sdf
121 | 
122 | 
123 | @torch.jit.script
124 | def ellipsoid_sdf(radius: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
125 |     """
126 | 
127 |     Args:
128 |         radius: radius of ellipsoids, (..., n_part, 3)
129 |         x: position, (..., n_part, 3, n_pts)
130 | 
131 |     Returns:
132 | 
133 |     """
134 |     lam, valid = search_lam(radius, x)
135 |     sdf = lam_to_sdf(radius, x, lam)
136 |     min_sdf = -radius.min(dim=-1)[0].unsqueeze(-1)
137 |     sdf = torch.where(valid, sdf, min_sdf)
138 |     sdf = torch.where(sdf < min_sdf, min_sdf, sdf)
139 | 
140 |     return sdf
141 | 


--------------------------------------------------------------------------------
/src/utils/train_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | import os
 13 | from typing import Any, Tuple
 14 | 
 15 | import torch
 16 | import torch.optim as optim
 17 | from easydict import EasyDict as edict
 18 | from torch import nn
 19 | from torch.utils.data import DataLoader, Dataset
 20 | 
 21 | from datasets.dataset import SingleVideoDataset as HumanVideoDataset
 22 | 
 23 | 
 24 | def create_dataloaders(config: dict, shuffle: bool = True) -> DataLoader:
 25 |     """create train and val dataloaders
 26 | 
 27 |     Args:
 28 |         config (dict): config.dataset
 29 |         shuffle
 30 | 
 31 |     Returns:
 32 |         data_loader (DataLoader): dataloader
 33 |     """
 34 |     dataset = HumanVideoDataset(config)
 35 | 
 36 |     batchsize = config.batchsize
 37 |     num_workers = config.num_workers
 38 |     data_loader = DataLoader(dataset, batch_size=batchsize, num_workers=num_workers,
 39 |                              shuffle=shuffle, drop_last=True, pin_memory=True)
 40 | 
 41 |     return data_loader
 42 | 
 43 | 
 44 | def ddp_data_sampler(dataset: Dataset, rank: int, world_size: int, shuffle: bool, drop_last: bool
 45 |                      ) -> torch.utils.data.distributed.DistributedSampler:
 46 |     """
 47 | 
 48 |     Args:
 49 |         dataset:
 50 |         rank:
 51 |         world_size:
 52 |         shuffle:
 53 |         drop_last:
 54 | 
 55 |     Returns:
 56 | 
 57 |     """
 58 |     dist_sampler = torch.utils.data.distributed.DistributedSampler(
 59 |         dataset, rank=rank, num_replicas=world_size, shuffle=shuffle,
 60 |         drop_last=drop_last)
 61 | 
 62 |     return dist_sampler
 63 | 
 64 | 
 65 | def create_ddp_dataloaders(config: dict, rank: int, world_size: int) -> DataLoader:
 66 |     """create train and val dataloaders for ddp
 67 | 
 68 |     Args:
 69 |         config (dict): config.dataset
 70 |         rank
 71 |         world_size
 72 | 
 73 |     Returns:
 74 |         train_loader (DataLoader): dataloader for train
 75 |         val_loader (DataLoader): dataloader for val
 76 |     """
 77 | 
 78 |     dataset = HumanVideoDataset(config)
 79 | 
 80 |     batchsize = config.batchsize
 81 |     num_workers = config.num_workers
 82 |     ddp_sampler = ddp_data_sampler(dataset, rank, world_size, shuffle=True, drop_last=True)
 83 |     data_loader = DataLoader(dataset, batch_size=batchsize, num_workers=num_workers,
 84 |                              sampler=ddp_sampler, pin_memory=True)
 85 | 
 86 |     return data_loader
 87 | 
 88 | 
 89 | def to_gpu(minibatch: dict) -> dict:
 90 |     """send minibatch dict to gpu
 91 | 
 92 |     Args:
 93 |         minibatch (dict): [description]
 94 | 
 95 |     Returns:
 96 |         dict: [description]
 97 |     """
 98 | 
 99 |     return {key: val.cuda(non_blocking=True) for key, val in minibatch.items()}
100 | 
101 | 
102 | def to_tensor(minibatch: dict) -> dict:
103 |     """numpy to torch.tensor
104 |     Args:
105 |         minibatch (dict): [description]
106 | 
107 |     Returns:
108 |         dict: [description]
109 |     """
110 | 
111 |     return {key: torch.tensor(val).cuda(non_blocking=True).float() for key, val in minibatch.items()}
112 | 
113 | 
114 | def cat_dim0(tensor: torch.Tensor) -> torch.Tensor:
115 |     """
116 | 
117 |     Args:
118 |         tensor:
119 | 
120 |     Returns:
121 | 
122 |     """
123 |     shape = tensor.shape
124 | 
125 |     return tensor.reshape((shape[0] * shape[1],) + shape[2:])
126 | 
127 | 
128 | def cat_dim0_dict(minibatch: dict) -> dict:
129 |     """
130 | 
131 |     Args:
132 |         minibatch:
133 | 
134 |     Returns:
135 | 
136 |     """
137 |     out_dict = {}
138 |     for key, val in minibatch.items():
139 |         shape = val.shape
140 |         if len(shape) <= 2:
141 |             reshaped = val.reshape(-1)
142 |         else:
143 |             reshaped = val.reshape((shape[0] * shape[1],) + shape[2:])
144 |         out_dict[key] = reshaped
145 | 
146 |     return out_dict
147 | 
148 | 
149 | def set_port(config: edict) -> None:
150 |     """
151 | 
152 |     Args:
153 |         config:
154 | 
155 |     Returns:
156 | 
157 |     """
158 |     master_addr = config.master_addr
159 |     master_port = config.master_port
160 |     os.environ['MASTER_ADDR'] = master_addr
161 |     os.environ['MASTER_PORT'] = master_port
162 | 
163 | 
164 | def all_reduce_scalar(scalar: float) -> float:
165 |     """
166 | 
167 |     Args:
168 |         scalar:
169 | 
170 |     Returns:
171 | 
172 |     """
173 |     scalar = torch.tensor(scalar).cuda(non_blocking=True)
174 |     torch.distributed.all_reduce(scalar)
175 | 
176 |     return scalar.item()
177 | 
178 | 
179 | def all_reduce_dict(dictionary: dict, world_size: int) -> dict:
180 |     """
181 | 
182 |     Args:
183 |         dictionary:
184 |         world_size:
185 | 
186 |     Returns:
187 | 
188 |     """
189 |     reduced_dict = {}
190 |     for key, val in dictionary.items():
191 |         reduced_dict[key] = all_reduce_scalar(val) / world_size
192 | 
193 |     return reduced_dict
194 | 
195 | 
196 | def grid_coordinates(size: int, device: str, scale: int = 2) -> torch.Tensor:
197 |     """
198 | 
199 |     Args:
200 |         size:
201 |         device:
202 |         scale:
203 | 
204 |     Returns:
205 | 
206 |     """
207 |     grid = torch.meshgrid(torch.arange(size, device=device),
208 |                           torch.arange(size, device=device), indexing='ij')[::-1]
209 |     grid = torch.stack(grid, dim=-1) * scale + 0.5
210 |     grid = grid.reshape(1, size ** 2, 2)
211 | 
212 |     return grid
213 | 
214 | 
215 | def check_nan(model: nn.Module) -> bool:
216 |     """
217 | 
218 |     Args:
219 |         model:
220 | 
221 |     Returns:
222 | 
223 |     """
224 |     state_dict = model.state_dict()
225 | 
226 |     isnan = False
227 |     for val in state_dict.values():
228 |         if val.isnan().any():
229 |             isnan = True
230 |             break
231 | 
232 |     return isnan
233 | 
234 | 
235 | def load_snapshot(model: nn.Module, optimizer, path: str, load_optimizer: bool = True) -> int:
236 |     """
237 | 
238 |     Args:
239 |         model:
240 |         optimizer:
241 |         path:
242 |         load_optimizer:
243 | 
244 |     Returns:
245 | 
246 |     """
247 |     snapshot = torch.load(path, map_location=lambda storage, loc: storage)  # avoid OOM
248 | 
249 |     name_in_model = [n for n, _ in model.named_parameters()]
250 |     for name in list(snapshot["model"].keys()):
251 |         if name not in name_in_model:
252 |             snapshot["model"].pop(name)
253 | 
254 |     model.load_state_dict(snapshot["model"], strict=False)
255 | 
256 |     if load_optimizer:
257 |         optimizer.load_state_dict(snapshot["optimizer"])
258 |     iter = snapshot["iteration"]
259 |     del snapshot
260 |     torch.cuda.empty_cache()  # remove cache for resuming
261 | 
262 |     return iter
263 | 
264 | 
265 | def create_optimizer(config: dict, model: nn.Module) -> Any:
266 |     """create optimizer
267 | 
268 |     Args:
269 |         config (dict): config.tran_setting
270 |         model (nn.Module): target model
271 | 
272 |     Returns:
273 |         [type]: optimizer
274 |     """
275 |     if config.optimizer == "Adam":
276 |         lr = config.lr
277 |         decay = config.decay
278 |         optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-20, weight_decay=decay)
279 |     elif config.optimizer == "AdamW":
280 |         lr = config.lr
281 |         decay = config.decay
282 |         print("adamw", decay)
283 |         optimizer = optim.AdamW(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-20, weight_decay=decay)
284 |     else:
285 |         raise ValueError()
286 | 
287 |     return optimizer
288 | 
289 | 
290 | def send_model_to_gpu(rank: int, model: nn.Module, ddp: bool) -> Tuple[nn.Module, nn.Module]:
291 |     """
292 | 
293 |     Args:
294 |         rank:
295 |         model:
296 |         ddp:
297 | 
298 |     Returns:
299 | 
300 |     """
301 |     num_gpus = torch.cuda.device_count()
302 |     n_gpu = rank % num_gpus
303 | 
304 |     torch.cuda.set_device(n_gpu)
305 |     model.cuda(n_gpu)
306 | 
307 |     if ddp:
308 |         print(n_gpu)
309 |         model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
310 |         model = nn.parallel.DistributedDataParallel(model, device_ids=[n_gpu], find_unused_parameters=True)
311 |         model_module = model.module
312 |     else:
313 |         model_module = model
314 | 
315 |     return model, model_module,
316 | 
317 | 
318 | def save_model(model_module: nn.Module, optimizer, save_dir: str, iteration: int, rank: int,
319 |                snapshot_prefix: str = "snapshot") -> int:
320 |     """
321 |     Save model. If nan is detected, load the latest snapshot
322 |     Args:
323 |         model_module:
324 |         optimizer:
325 |         save_dir:
326 |         iteration:
327 |         rank:
328 |         snapshot_prefix:
329 | 
330 |     Returns:
331 | 
332 |     """
333 |     isnan = check_nan(model_module)
334 | 
335 |     if isnan:
336 |         print("nan detected")
337 |         model_path = os.path.join(save_dir, f"{snapshot_prefix}_latest.pth")
338 |         assert os.path.exists(model_path), "model snapshot is not saved"
339 | 
340 |         iteration = load_snapshot(model_module, optimizer, model_path)
341 |     else:
342 |         if rank == 0:
343 |             params_to_save = {"iteration": iteration,
344 |                               "model": model_module.state_dict(),
345 |                               "optimizer": optimizer.state_dict()}
346 |             torch.save(params_to_save, os.path.join(save_dir, f"{snapshot_prefix}_latest.pth"))
347 |             torch.save(
348 |                 params_to_save, os.path.join(
349 |                     save_dir, f"{snapshot_prefix}_{(iteration // 10000 + 1) * 10000}.pth"))
350 | 
351 |     return iteration
352 | 


--------------------------------------------------------------------------------
/src/utils/trainer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | 
 13 | import os
 14 | 
 15 | import tensorboardX as tbx
 16 | import torch
 17 | import torch.distributed as dist
 18 | from torch.cuda.amp import GradScaler
 19 | 
 20 | from utils.train_utils import (all_reduce_dict, cat_dim0_dict, create_dataloaders, create_ddp_dataloaders,
 21 |                                load_snapshot, to_gpu, save_model)
 22 | from utils.train_utils import set_port
 23 | 
 24 | 
 25 | class TrainerBase:
 26 |     def run(self, config: dict, rank: int = 0, world_size: int = 1) -> None:
 27 |         """
 28 | 
 29 |         Args:
 30 |             config:
 31 |             rank:
 32 |             world_size:
 33 | 
 34 |         Returns:
 35 | 
 36 |         """
 37 |         torch.backends.cudnn.benchmark = True
 38 |         ddp = False
 39 |         assert world_size == 1
 40 |         train_loader = create_dataloaders(config.dataset)
 41 | 
 42 |         self.train_loader = train_loader
 43 | 
 44 |         self.train_func(config, train_loader, None, rank=rank, ddp=ddp, world_size=world_size)
 45 | 
 46 |     def ddp_run(self, rank: int, config: dict, world_size: int = 1) -> None:
 47 |         """
 48 | 
 49 |         Args:
 50 |             rank:
 51 |             config:
 52 |             world_size:
 53 | 
 54 |         Returns:
 55 | 
 56 |         """
 57 |         assert world_size > 1
 58 |         torch.backends.cudnn.benchmark = True
 59 |         ddp = True
 60 | 
 61 |         set_port(config.train_setting)
 62 |         backend = config.train_setting.backend
 63 |         dist.init_process_group(backend=backend, init_method='env://', rank=rank,
 64 |                                 world_size=world_size)
 65 |         torch.manual_seed(0)
 66 | 
 67 |         train_loader = create_ddp_dataloaders(config.dataset, rank, world_size)
 68 | 
 69 |         self.train_loader = train_loader
 70 | 
 71 |         try:
 72 |             self.train_func(config, train_loader, None, rank=rank, ddp=ddp, world_size=world_size)
 73 |         except KeyboardInterrupt:
 74 |             print('interrupted')
 75 | 
 76 |         dist.destroy_process_group()
 77 | 
 78 |     def prepare_model_and_optimizer(self, *args, **kwargs):
 79 |         raise NotImplementedError("Please implement prepare_model_and_optimizer")
 80 | 
 81 |     def define_loss_func(self, *args, **kwargs):
 82 |         raise NotImplementedError("Please implement define_loss_func")
 83 | 
 84 |     def lossfunc(self, *args, **kwargs):
 85 |         raise NotImplementedError("Please implement lossfunc")
 86 | 
 87 |     def process_before_train_step(self, iteration: int):
 88 |         pass
 89 | 
 90 |     def train_func(self, config: dict, train_loader, val_loader=None, rank: int = 0,
 91 |                    ddp: bool = False, world_size: int = 1) -> None:
 92 |         """
 93 | 
 94 |         Args:
 95 |             config:
 96 |             train_loader:
 97 |             val_loader:
 98 |             rank:
 99 |             ddp:
100 |             world_size:
101 | 
102 |         Returns:
103 | 
104 |         """
105 |         num_iter = config.train_setting.num_iter
106 |         log_interval = config.train_setting.log_interval
107 |         save_interval = config.train_setting.save_interval
108 |         out_dir = config.output_dir
109 |         exp_name = config.exp_name
110 | 
111 |         model, model_module, optimizer = self.prepare_model_and_optimizer(config, rank, ddp)
112 |         self.model = model
113 | 
114 |         save_dir = os.path.join(out_dir, "result", exp_name)
115 |         if rank == 0:
116 |             writer = tbx.SummaryWriter(os.path.join(out_dir, "tensorboard", exp_name))
117 |             os.makedirs(save_dir, exist_ok=True)
118 |             os.chmod(save_dir, 0o755)
119 | 
120 |         iteration = 0
121 | 
122 |         if config.resume_model_path or config.resume_latest:
123 |             if config.resume_model_path is not None:
124 |                 model_path = config.resume_model_path
125 |             else:
126 |                 model_path = os.path.join(save_dir, f"{self.snapshot_prefix}_latest.pth")
127 | 
128 |             iteration = load_snapshot(model_module, optimizer, model_path, load_optimizer=config.load_optimizer)
129 |             if config.iteration is not None:
130 |                 iteration = config.iteration
131 | 
132 |         # define loss
133 |         self.define_loss_func(config, model_module, ddp)
134 | 
135 |         self.process_before_train_step(iteration)
136 |         while iteration < num_iter:
137 |             for i, minibatch in enumerate(train_loader):
138 |                 self.process_before_train_step(iteration)
139 | 
140 |                 iteration += 1
141 |                 model.train()
142 |                 minibatch = to_gpu(minibatch)
143 | 
144 |                 if minibatch["img"].ndim == 5:
145 |                     # reshape (B, video_len, *) -> (B * video_len, *)
146 |                     minibatch = cat_dim0_dict(minibatch)
147 | 
148 |                 optimizer.zero_grad(set_to_none=True)
149 | 
150 |                 # loss calculation
151 |                 loss, loss_dict = self.lossfunc(config, minibatch, model, model_module)
152 | 
153 |                 if config.fp16:
154 |                     scaler = GradScaler()
155 |                     scaler.scale(loss).backward()
156 |                     scaler.step(optimizer)
157 |                     scaler.update()
158 |                 else:
159 | 
160 |                     # with torch.autograd.detect_anomaly():
161 |                     loss.backward()
162 | 
163 |                     # detect nan
164 |                     nan = any([p.grad.isnan().any() for p in model_module.parameters() if p.grad is not None])
165 |                     if nan:
166 |                         print("NaN is detected!!!!")
167 |                         del loss
168 |                         torch.cuda.empty_cache()
169 |                     else:
170 |                         if config.train_setting.clip_grad:
171 |                             torch.nn.utils.clip_grad_norm_(model.parameters(),
172 |                                                            max_norm=2.0, norm_type=2)
173 | 
174 |                         optimizer.step()
175 | 
176 |                 if ddp:
177 |                     loss_dict = all_reduce_dict(loss_dict, world_size)
178 | 
179 |                 if iteration % 10 == 0 and rank == 0:
180 |                     print(iteration, loss_dict)
181 |                 # tensorboard
182 |                 if iteration % log_interval == 0 and rank == 0:
183 |                     print("log")
184 |                     for key, val in loss_dict.items():
185 |                         writer.add_scalar("metrics/" + key, val, iteration)
186 | 
187 |                 if iteration % save_interval == 0:
188 |                     iteration = save_model(model_module, optimizer, save_dir, iteration, rank, self.snapshot_prefix)
189 | 


--------------------------------------------------------------------------------
/src/validation/SMPL_regression.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | 
 13 | import argparse
 14 | import os
 15 | import sys
 16 | from typing import Dict, Tuple
 17 | 
 18 | import torch
 19 | 
 20 | sys.path.append(".")
 21 | 
 22 | from models.model import SingleVideoPartDecomposition
 23 | from datasets.dataset import SingleVideoDataset as HumanVideoDataset
 24 | from utils.get_args import get_args_jupyter
 25 | from utils.train_utils import create_dataloaders
 26 | 
 27 | 
 28 | def regress(model: SingleVideoPartDecomposition, config: Dict, dataset: HumanVideoDataset
 29 |             ) -> Tuple[torch.Tensor, torch.Tensor, float]:
 30 |     """
 31 | 
 32 |     Args:
 33 |         model:
 34 |         config:
 35 |         dataset:
 36 | 
 37 |     Returns:
 38 | 
 39 |     """
 40 |     with torch.no_grad():
 41 |         frame_id = torch.arange(config.dataset.num_frames, dtype=torch.float, device="cuda")
 42 |         trajectory = model.joint_trajectory(frame_id)
 43 | 
 44 |         rotation, translation = trajectory
 45 |         video_len = rotation.shape[0]
 46 | 
 47 |         smpl_pose = torch.tensor(dataset.video_cache["smpl_pose"], device=rotation.device,
 48 |                                  dtype=torch.float) / dataset.coordinate_scale
 49 | 
 50 |         child_root, self_root = model.decoder.joint_root_locations(rotation, translation)
 51 |         child_root = child_root.permute(0, 1, 3, 2).reshape(video_len, -1, 3)
 52 |         estimated_keypoints = torch.cat([child_root, self_root.squeeze(-1)], dim=1)
 53 |         smpl_keypoints = smpl_pose[:, :, :3, 3]
 54 | 
 55 |         # train test split
 56 |         train_idx = range(0, video_len, 10)
 57 |         test_idx = [i for i in range(video_len) if i % 10 != 0]
 58 |         estimated_keypoints_train = estimated_keypoints[train_idx]
 59 |         estimated_keypoints_test = estimated_keypoints[test_idx]
 60 |         smpl_keypoints_train = smpl_keypoints[train_idx]
 61 |         smpl_keypoints_test = smpl_keypoints[test_idx]
 62 | 
 63 |         estimated_keypoints_train = estimated_keypoints_train.permute(0, 2, 1).reshape(len(train_idx) * 3, -1).cpu()
 64 |         estimated_keypoints_test = estimated_keypoints_test.permute(0, 2, 1).reshape(len(test_idx) * 3, -1).cpu()
 65 |         smpl_keypoints_train = smpl_keypoints_train.permute(0, 2, 1).reshape(len(train_idx) * 3, -1).cpu()
 66 |         smpl_keypoints_test = smpl_keypoints_test.permute(0, 2, 1).reshape(len(test_idx) * 3, -1).cpu()
 67 | 
 68 |     lstsq_result_train = torch.linalg.lstsq(estimated_keypoints_train, smpl_keypoints_train, driver="gelsd")
 69 |     j2s_mapping_train = lstsq_result_train.solution
 70 |     test_error = estimated_keypoints_test @ j2s_mapping_train - smpl_keypoints_test
 71 |     test_error = test_error.reshape(len(test_idx), 3, smpl_keypoints_test.shape[-1])
 72 |     test_error = test_error.norm(dim=1).mean()
 73 | 
 74 |     return j2s_mapping_train, estimated_keypoints, test_error * dataset.coordinate_scale * 1000  # millimeter
 75 | 
 76 | 
 77 | def smpl_regression(config_path: str, default_config: str) -> None:
 78 |     """
 79 | 
 80 |     Args:
 81 |         config_path:
 82 |         default_config:
 83 | 
 84 |     Returns:
 85 | 
 86 |     """
 87 |     args, config = get_args_jupyter(config_path, default_config)
 88 |     config.dataset.batchsize = 1
 89 | 
 90 |     train_dataset: HumanVideoDataset = create_dataloaders(config.dataset, shuffle=True).dataset
 91 | 
 92 |     out_dir = config.output_dir
 93 |     exp_name = config.exp_name
 94 | 
 95 |     # model
 96 |     model = SingleVideoPartDecomposition(config.network_params)
 97 |     model.cuda()
 98 | 
 99 |     save_dir = os.path.join(out_dir, "result", exp_name)
100 |     model_path = os.path.join(save_dir, "snapshot_latest.pth")
101 |     if os.path.exists(model_path):
102 |         snapshot = torch.load(model_path)
103 |         state_dict = snapshot["model"]
104 |         model.load_state_dict(state_dict, strict=False)
105 |     else:
106 |         assert False, "model is not loaded"
107 | 
108 |     _, _, mpjpe = regress(model, config, train_dataset)
109 | 
110 |     print(f"{exp_name}: MPJPE={mpjpe:.4f}mm")
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     parser = argparse.ArgumentParser(description='SMPL regression evaluation')
115 |     parser.add_argument('--exp_name', action='append', required=True)
116 |     args = parser.parse_args()
117 |     default_config = "confs/default.yml"
118 | 
119 |     exp_names = args.exp_name
120 |     for exp_name in exp_names:
121 |         config_path = f"confs/{exp_name}.yml"
122 |         smpl_regression(config_path, default_config)
123 | 


--------------------------------------------------------------------------------
/src/validation/lpips_ssim.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | import sys
 12 | import argparse
 13 | import os
 14 | import pickle
 15 | from typing import Tuple
 16 | 
 17 | import lpips
 18 | import numpy as np
 19 | import torch
 20 | from skimage.metrics import structural_similarity as ssim
 21 | 
 22 | sys.path.append(".")
 23 | from utils.get_args import get_args_jupyter
 24 | 
 25 | loss_fn_vgg = lpips.LPIPS(net='vgg').cuda()
 26 | 
 27 | 
 28 | def evaluate(path: str) -> Tuple[np.float, np.float]:
 29 |     """
 30 | 
 31 |     Args:
 32 |         path:
 33 | 
 34 |     Returns:
 35 | 
 36 |     """
 37 |     with open(path, "rb") as f:
 38 |         data = pickle.load(f)
 39 | 
 40 |     gt_img = np.clip(data["gt_img"], 0, 255) / 127.5 - 1
 41 |     gen_img = np.clip(data["gen_img"], 0, 255) / 127.5 - 1
 42 | 
 43 |     batchsize = 16
 44 |     lpips_vals = []
 45 |     with torch.no_grad():
 46 |         for i in range(0, gt_img.shape[0], batchsize):
 47 |             lpips_val = loss_fn_vgg(torch.tensor(gt_img[i:i + batchsize]).cuda(),
 48 |                                     torch.tensor(gen_img[i:i + batchsize]).cuda()).squeeze().cpu().numpy()
 49 |             lpips_vals.append(lpips_val)
 50 | 
 51 |     mean_lpips = np.concatenate(lpips_vals).mean()
 52 | 
 53 |     ssim_vals = []
 54 |     for i in range(gt_img.shape[0]):
 55 |         gt = gt_img[i].transpose(1, 2, 0)
 56 |         gen = gen_img[i].transpose(1, 2, 0)
 57 |         ssim_vals.append(ssim(gt, gen, data_range=gt.max() - gt.min(), multichannel=True))
 58 | 
 59 |     mean_ssim = np.array(ssim_vals).mean()
 60 | 
 61 |     return mean_lpips, mean_ssim
 62 | 
 63 | 
 64 | def eval_all(exp_name: str) -> None:
 65 |     """
 66 | 
 67 |     Args:
 68 |         exp_name:
 69 | 
 70 |     Returns:
 71 | 
 72 |     """
 73 |     default_config = "confs/default.yml"
 74 |     config_path = f"confs/{exp_name}.yml"
 75 |     args, config = get_args_jupyter(config_path, default_config)
 76 |     out_dir = config.output_dir
 77 |     exp_name = config.exp_name
 78 |     root = os.path.join(out_dir, "result")
 79 |     result = {}
 80 |     validation_dir_name = f"{root}/{exp_name}/validation"
 81 |     mean_lpips, mean_ssim = evaluate(f"{validation_dir_name}/reconstruction_test.pkl")
 82 |     print(exp_name)
 83 |     print("NV", mean_lpips, mean_ssim)
 84 |     result["novel_view"] = {"lpips": mean_lpips, "ssim": mean_ssim}
 85 |     mean_lpips, mean_ssim = evaluate(f"{validation_dir_name}/reconstruction_novel_pose.pkl")
 86 |     print("NP", mean_lpips, mean_ssim)
 87 |     result["novel_pose"] = {"lpips": mean_lpips, "ssim": mean_ssim}
 88 | 
 89 |     with open(f"{validation_dir_name}/lpips_ssim.pkl", "wb") as f:
 90 |         pickle.dump(result, f)
 91 | 
 92 | 
 93 | if __name__ == "__main__":
 94 |     parser = argparse.ArgumentParser(description='Compute lpips and ssim')
 95 |     parser.add_argument('--exp_name', action='append', required=True)
 96 |     args = parser.parse_args()
 97 | 
 98 |     exp_names = args.exp_name
 99 | 
100 |     for exp_name in exp_names:
101 |         eval_all(exp_name)
102 | 


--------------------------------------------------------------------------------
/src/validation/reconstruction.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | import argparse
 13 | import os
 14 | import pickle
 15 | import sys
 16 | from typing import Tuple, Any, Optional
 17 | 
 18 | import numpy as np
 19 | import torch
 20 | from easydict import EasyDict as edict
 21 | from torch import nn
 22 | from torch.utils.data import Dataset
 23 | from tqdm import tqdm
 24 | 
 25 | sys.path.append(".")
 26 | from models.model import SingleVideoPartDecomposition
 27 | from utils.get_args import get_args_jupyter
 28 | from utils.train_utils import to_gpu, create_dataloaders
 29 | 
 30 | 
 31 | def render(model: nn.Module, test_dataset: Dataset, data_idx: int, bg_color: np.ndarray,
 32 |            part_pose: Optional[Tuple[Any, Any]] = None) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
 33 |     """
 34 | 
 35 |     Args:
 36 |         model:
 37 |         test_dataset:
 38 |         data_idx:
 39 |         bg_color:
 40 |         part_pose:
 41 | 
 42 |     Returns:
 43 | 
 44 |     """
 45 |     minibatch = test_dataset[data_idx]
 46 |     minibatch = {k: torch.tensor(v) for k, v in minibatch.items()}
 47 |     minibatch = to_gpu(minibatch)
 48 |     img = minibatch["img"]
 49 |     gt_mask = minibatch["mask"]
 50 |     camera_rotation = minibatch["camera_rotation"][None]
 51 |     camera_translation = minibatch["camera_translation"][None]
 52 |     inv_intrinsics = torch.inverse(minibatch["camera_intrinsic"])[None]
 53 |     frame_id = minibatch["frame_id"][None]
 54 | 
 55 |     model.eval()
 56 | 
 57 |     with torch.no_grad():
 58 |         if part_pose is not None:
 59 |             _part_pose = (part_pose[0][frame_id], part_pose[1][frame_id])
 60 |         else:
 61 |             _part_pose = None
 62 | 
 63 |         rendered_dict = model.render_entire_img(frame_id, camera_rotation, camera_translation,
 64 |                                                 inv_intrinsics, segmentation_label=False,
 65 |                                                 ray_batchsize=16384,
 66 |                                                 rotate_angle=0, manipulate_pose_config=None,
 67 |                                                 part_pose=_part_pose)
 68 |         color = rendered_dict["rendered_colors"]
 69 |         mask = rendered_dict["rendered_masks"]
 70 | 
 71 |         color = color + (1 - mask) * bg_color
 72 | 
 73 |     img = img.cpu().numpy() * 127.5 + 127.5
 74 |     gt_mask = gt_mask.cpu().numpy()
 75 |     color = color.cpu().numpy() * 127.5 + 127.5
 76 |     mask = mask.cpu().numpy()
 77 | 
 78 |     return img, gt_mask, color, mask
 79 | 
 80 | 
 81 | def test(config_path: str, default_config: str, mode: str = "test") -> None:
 82 |     """
 83 | 
 84 |     Args:
 85 |         config_path:
 86 |         default_config:
 87 |         mode:
 88 | 
 89 |     Returns:
 90 | 
 91 |     """
 92 |     assert mode in ["test", "novel_pose"]
 93 | 
 94 |     args, config = get_args_jupyter(config_path, default_config)
 95 |     config.dataset.batchsize = 1
 96 | 
 97 |     config.dataset.set_name = mode  # test and novel_pose
 98 | 
 99 |     test_dataset = create_dataloaders(config.dataset, shuffle=True).dataset
100 |     num_train_data = test_dataset.num_frames
101 | 
102 |     test_dataset.n_repetition_in_epoch = 1
103 |     test_dataset.color_augmentation = False
104 |     test_dataset.camera_dir_augmentation = False
105 |     test_dataset.thin_out_interval = 1
106 | 
107 |     num_test_data = len(test_dataset.video_cache["img"])
108 |     test_dataset.current_max_frame_id = 100000000
109 |     test_dataset.num_frames = 20
110 |     num_test_view = num_test_data // test_dataset.num_frames
111 |     num_camera_to_use = 5
112 |     camera_for_test = np.linspace(0, num_test_view, num_camera_to_use, endpoint=False, dtype="int")
113 | 
114 |     out_dir = config.output_dir
115 |     exp_name = config.exp_name
116 |     save_dir = os.path.join(out_dir, "result", exp_name)
117 | 
118 |     # model
119 |     model = SingleVideoPartDecomposition(config.network_params)
120 |     model.cuda()
121 | 
122 |     model_path = os.path.join(save_dir, "snapshot_latest.pth")
123 |     if os.path.exists(model_path):
124 |         snapshot = torch.load(model_path)
125 |         state_dict = snapshot["model"]
126 |         model.load_state_dict(state_dict, strict=False)
127 |     else:
128 |         raise FileNotFoundError()
129 | 
130 |     if mode == "novel_pose":
131 |         part_pose = regress_learned_from_smpl(num_train_data, model, test_dataset, config, use_smpl_verts=True)
132 |     else:
133 |         part_pose = None
134 | 
135 |     background_color = config.dataset.background_color
136 | 
137 |     gt_imgs, gt_masks, gen_imgs, gen_masks = [], [], [], []
138 |     for fra_idx in tqdm(range(test_dataset.num_frames)):
139 |         for cam_idx in camera_for_test:
140 |             data_idx = fra_idx + cam_idx * test_dataset.num_frames
141 |             gt_img, gt_mask, gen_img, gen_mask = render(model, test_dataset, data_idx, background_color, part_pose)
142 |             gt_imgs.append(gt_img)
143 |             gt_masks.append(gt_mask)
144 |             gen_imgs.append(gen_img)
145 |             gen_masks.append(gen_mask)
146 | 
147 |     gt_imgs = np.array(gt_imgs)
148 |     gt_masks = np.array(gt_masks)
149 |     gen_imgs = np.array(gen_imgs)
150 |     gen_masks = np.array(gen_masks)
151 | 
152 |     save_dict = {"gt_img": gt_imgs, "gt_mask": gt_masks, "gen_img": gen_imgs, "gen_mask": gen_masks}
153 | 
154 |     os.makedirs(f"{save_dir}/validation", exist_ok=True)
155 |     with open(f"{save_dir}/validation/reconstruction_{mode}.pkl", "wb") as f:
156 |         pickle.dump(save_dict, f)
157 | 
158 | 
159 | def regress_learned_from_smpl(num_train_data: int, model: nn.Module, test_dataset: Dataset, config: edict,
160 |                               use_smpl_verts: bool = True) -> Tuple[torch.Tensor, torch.Tensor]:
161 |     """
162 | 
163 |     Args:
164 |         num_train_data:
165 |         model:
166 |         test_dataset:
167 |         config:
168 |         use_smpl_verts:
169 | 
170 |     Returns:
171 | 
172 |     """
173 |     with torch.no_grad():
174 |         frame_id = torch.arange(num_train_data, dtype=torch.float, device="cuda")
175 |         trajectory = model.joint_trajectory(frame_id)
176 | 
177 |         rotation, translation = trajectory
178 |         video_len = rotation.shape[0]
179 | 
180 |         child_root, self_root = model.decoder.joint_root_locations(rotation, translation)
181 |         estimated_keypoints = torch.cat([child_root, self_root], dim=-1)
182 | 
183 |         estimated_keypoints = estimated_keypoints.permute(0, 1, 3, 2)
184 | 
185 |         if use_smpl_verts:
186 |             smpl_verts_path = os.path.join(config.dataset.data_root, "smpl_verts.pickle")
187 |             with open(smpl_verts_path, "rb") as f:
188 |                 smpl_keypoints = pickle.load(f)["smpl_verts"]
189 |             smpl_keypoints = smpl_keypoints / 1.5
190 |             smpl_keypoints = torch.tensor(smpl_keypoints, dtype=torch.float)
191 |             smpl_translation = smpl_keypoints  # (L, n_verts, 3)
192 |             smpl_keypoints = smpl_keypoints[:num_train_data].cpu()
193 |         else:
194 |             smpl_pose = torch.tensor(test_dataset.video_cache["smpl_pose"], device="cpu", dtype=torch.float).clone()
195 |             smpl_pose[:, :, :3, 3] /= 1.5
196 |             smpl_keypoints = smpl_pose[:num_train_data, :, :3, 3]
197 | 
198 |             smpl_translation = smpl_pose[:, :, :3, 3]  # (L, 22, 3)
199 | 
200 |         _estimated_keypoints = estimated_keypoints.reshape(video_len, -1, 3).permute(0, 2, 1).reshape(video_len * 3,
201 |                                                                                                       -1).cpu()
202 |         _smpl_keypoints = smpl_keypoints.permute(0, 2, 1).reshape(video_len * 3, -1)
203 | 
204 |         lam = 5e-1 if use_smpl_verts else 1e-1
205 |         s2j_mapping = torch.inverse(
206 |             _smpl_keypoints.T.matmul(_smpl_keypoints) + torch.eye(_smpl_keypoints.shape[1]) * lam).matmul(
207 |             _smpl_keypoints.T).matmul(_estimated_keypoints)
208 | 
209 |     regressed = s2j_mapping.T @ smpl_translation  # (L, 3, 140)
210 | 
211 |     # canonical pose
212 |     with torch.no_grad():
213 |         child_root_can, self_root_can = model.decoder.joint_root_locations(torch.eye(3, device="cuda",
214 |                                                                                      dtype=torch.float),
215 |                                                                            torch.zeros(model.num_parts, 3, 1,
216 |                                                                                        device="cuda",
217 |                                                                                        dtype=torch.float))
218 |         estimated_keypoints_can = torch.cat([child_root_can, self_root_can], dim=-1)
219 |         estimated_keypoints_can = estimated_keypoints_can.permute(0, 2, 1).cpu()
220 | 
221 |     regressed_translation = regressed.reshape(regressed.shape[0], model.num_parts, 7, 3)
222 |     U, S, Vh = torch.linalg.svd(regressed_translation.permute(0, 1, 3, 2) @ estimated_keypoints_can)
223 |     R = Vh.permute(0, 1, 3, 2) @ U.permute(0, 1, 3, 2)
224 |     det = torch.linalg.det(R)
225 |     Vh[:, :, 2] = Vh[:, :, 2] * det[:, :, None]
226 |     R = Vh.permute(0, 1, 3, 2) @ U.permute(0, 1, 3, 2)
227 |     joint_rotation = R.permute(0, 1, 3, 2).cuda()
228 |     joint_translation = regressed_translation[:, :, -1, :, None].cuda()
229 | 
230 |     return joint_rotation, joint_translation  # (L, num_parts, 3, 3)
231 | 
232 | 
233 | if __name__ == "__main__":
234 |     # evaluate novel view and novel pose reconstruction
235 |     # novel view -> learned pose, new camera
236 |     # novel pose -> novel pose, all camera. Requires smpl regression
237 |     parser = argparse.ArgumentParser(description='Save reconstructed images')
238 |     parser.add_argument('--exp_name', action='append', required=True)
239 |     args = parser.parse_args()
240 |     exp_names = args.exp_name
241 | 
242 |     default_config = "confs/default.yml"
243 |     for exp_name in exp_names:
244 |         config_path = f"confs/{exp_name}.yml"
245 |         test(config_path, default_config, mode="test")  # novel view
246 |         test(config_path, default_config, mode="novel_pose")
247 | 


--------------------------------------------------------------------------------
/src/visualize/create_reconstruction_video.py:
--------------------------------------------------------------------------------
 1 | """
 2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 | property and proprietary rights in and to this material, related
 6 | documentation and any modifications thereto. Any use, reproduction,
 7 | disclosure or distribution of this material and related documentation
 8 | without an express license agreement from NVIDIA CORPORATION or
 9 | its affiliates is strictly prohibited.
10 | """
11 | 
12 | 
13 | import argparse
14 | import sys
15 | 
16 | sys.path.append(".")
17 | from utils.visualization_utils import GenerateVideoFromConfig
18 | 
19 | if __name__ == "__main__":
20 |     parser = argparse.ArgumentParser(description='Save reconstruction video')
21 |     parser.add_argument('--exp_name', action='append', required=True)
22 |     parser.add_argument('--camera_id', type=int, default=0)
23 |     parser.add_argument('--num_video_frames', type=int, default=50)
24 |     args = parser.parse_args()
25 | 
26 |     exp_names = args.exp_name
27 |     default_config = "confs/default.yml"
28 |     camera_id = args.camera_id
29 |     num_video_frames = args.num_video_frames
30 | 
31 |     for exp_name in exp_names:
32 |         config_path = f"confs/{exp_name}.yml"
33 |         generate_vide_from_conf = GenerateVideoFromConfig(config_path, default_config)
34 |         generate_vide_from_conf(rotate=True, increment=True, camera_id=camera_id, num_video_frames=num_video_frames)
35 | 


--------------------------------------------------------------------------------
/src/visualize/create_repose_video.py:
--------------------------------------------------------------------------------
 1 | """
 2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 | property and proprietary rights in and to this material, related
 6 | documentation and any modifications thereto. Any use, reproduction,
 7 | disclosure or distribution of this material and related documentation
 8 | without an express license agreement from NVIDIA CORPORATION or
 9 | its affiliates is strictly prohibited.
10 | """
11 | 
12 | import argparse
13 | import sys
14 | 
15 | import yaml
16 | from easydict import EasyDict as edict
17 | 
18 | sys.path.append(".")
19 | from utils.visualization_utils import GenerateVideoFromConfig
20 | 
21 | if __name__ == "__main__":
22 |     parser = argparse.ArgumentParser(description='Manual re-posing')
23 |     parser.add_argument('--exp_name', required=True, type=str)
24 |     parser.add_argument('--repose_config', required=True, type=str)
25 |     parser.add_argument('--rotate', action="store_true")
26 |     parser.add_argument('--num_video_frames', type=int, default=20)
27 |     parser.add_argument('--iteration', type=int, default=-1)
28 | 
29 |     args = parser.parse_args()
30 | 
31 |     config_path = f"confs/{args.exp_name}.yml"
32 | 
33 |     default_config = "confs/default.yml"
34 | 
35 |     repose_config = edict(yaml.load(open(args.repose_config), Loader=yaml.SafeLoader))
36 |     frame_id = repose_config.frame_id
37 |     camera_id = repose_config.camera_id
38 |     root = repose_config.root
39 |     first = repose_config.first
40 |     second = repose_config.second
41 |     rotate = args.rotate
42 |     num_video_frames = args.num_video_frames
43 |     iteration = args.iteration
44 | 
45 |     generate_vide_from_conf = GenerateVideoFromConfig(config_path, default_config, iteration=iteration)
46 |     generate_vide_from_conf.repose(root, first, second, frame_id, camera_id,
47 |                                    rotate=rotate, num_video_frames=num_video_frames)
48 | 


--------------------------------------------------------------------------------
/src/visualize/part_merging.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | import argparse
 13 | import io
 14 | import os
 15 | import sys
 16 | from copy import deepcopy
 17 | from typing import List, Tuple
 18 | 
 19 | import cv2
 20 | import matplotlib.pyplot as plt
 21 | import numpy as np
 22 | import torch
 23 | 
 24 | sys.path.append(".")
 25 | from utils.visualization_utils import GenerateVideoFromConfig, render_and_get_joints
 26 | 
 27 | 
 28 | def draw_polygon(joints: np.ndarray, color: str) -> None:
 29 |     """
 30 | 
 31 |     Args:
 32 |         joints: (n, 2)
 33 |         color:
 34 | 
 35 |     Returns:
 36 | 
 37 |     """
 38 |     indices = []
 39 | 
 40 |     visited = np.zeros(len(joints))
 41 |     current_idx = joints[:, 0].argmin()
 42 |     current_angle = np.pi / 2
 43 |     indices.append(current_idx)
 44 | 
 45 |     while visited[current_idx] == 0:
 46 |         visited[current_idx] = 1
 47 |         vec = joints - joints[current_idx]
 48 |         angle = np.arctan2(vec[:, 1], vec[:, 0])
 49 |         rel_angle = (current_angle - angle) % (2 * np.pi)
 50 |         rel_angle[current_idx] = 1e4  # ignore self
 51 |         current_idx = rel_angle.argmin()
 52 |         current_angle = angle[current_idx]
 53 |         indices.append(current_idx)
 54 |     indices.append(indices[0])
 55 | 
 56 |     plt.plot(joints[indices, 0],
 57 |              joints[indices, 1], c=f"#{color}")
 58 | 
 59 | 
 60 | def main(config_path: str, default_config: str, cam_id: int) -> None:
 61 |     """
 62 | 
 63 |     Args:
 64 |         config_path:
 65 |         default_config:
 66 |         cam_id:
 67 | 
 68 |     Returns:
 69 | 
 70 |     """
 71 |     self = GenerateVideoFromConfig(config_path, default_config, args.iteration)
 72 |     model = self.model
 73 |     train_loader = self.train_loader
 74 | 
 75 |     with torch.no_grad():
 76 |         frame_id = torch.arange(train_loader.dataset.num_frames, dtype=torch.float, device="cuda")
 77 |         trajectory = model.joint_trajectory(frame_id)
 78 | 
 79 |     rotation, translation = trajectory
 80 |     relative_rotation = torch.matmul(rotation[:, :, None].transpose(-1, -2), rotation[:, None])
 81 |     relative_translation = torch.matmul(rotation[:, :, None].transpose(-1, -2),
 82 |                                         translation[:, None] - translation[:, :, None])
 83 |     mat = relative_rotation.std(dim=0).mean(dim=(2, 3)) + relative_translation.std(dim=0).mean(dim=(2, 3)) * 3
 84 |     mat = mat + mat.transpose(0, 1)
 85 |     mat = mat
 86 | 
 87 |     asort = (mat + torch.eye(model.num_parts, device="cuda") * 1e10).reshape(-1).argsort()
 88 |     merged = torch.stack([torch.div(asort, model.num_parts, rounding_mode='trunc'),
 89 |                           asort % model.num_parts], dim=1)[::2]
 90 |     val = mat[merged[:, 0], merged[:, 1]]
 91 |     for i in range(len(merged[:10])):
 92 |         print(merged[i].cpu().numpy(), val[i].item())
 93 | 
 94 |     frame_id = 0
 95 |     color_each_view = []
 96 |     mask_each_view = []
 97 |     segmentation_each_view = []
 98 |     gt_img_each_view = []
 99 |     gt_mask_each_view = []
100 |     joint_2d_each_view = []
101 |     background_each_view = []
102 |     disparity_each_view = []
103 |     child_root_each_view = []
104 |     self_root_each_view = []
105 | 
106 |     (img, gt_mask, color, mask, disparity, segmentation, joint_2d, child_root,
107 |      self_root, background) = render_and_get_joints(model, train_loader, frame_id, cam_id)
108 |     color_each_view.append(color)
109 |     mask_each_view.append(mask)
110 |     segmentation_each_view.append(segmentation)
111 |     gt_img_each_view.append(img)
112 |     gt_mask_each_view.append(gt_mask)
113 |     joint_2d_each_view.append(joint_2d)
114 |     background_each_view.append(background)
115 |     disparity_each_view.append(disparity)
116 |     child_root_each_view.append(child_root)
117 |     self_root_each_view.append(self_root)
118 | 
119 |     num_prune = 40
120 | 
121 |     i = 0
122 | 
123 |     joint_2d = joint_2d_each_view[i][0, :, :]
124 |     child_root = child_root_each_view[i]
125 | 
126 |     joint_connection = model.joint_connection.cpu().numpy()
127 |     child_ids = model.child_ids.cpu().numpy()
128 | 
129 |     new_to_old_ = {_: [_] for _ in range(model.num_parts)}
130 |     old_to_new_ = {_: _ for _ in range(model.num_parts)}
131 | 
132 |     def create_figures(text: bool) -> Tuple[List[np.ndarray], List[np.ndarray]]:
133 |         """
134 | 
135 |         Args:
136 |             text:
137 | 
138 |         Returns:
139 | 
140 |         """
141 |         old_to_new = deepcopy(old_to_new_)
142 |         new_to_old = deepcopy(new_to_old_)
143 |         joint_figure = []
144 |         center_figure = []
145 |         fig_id = 0
146 |         for _ in range(num_prune):
147 |             should_merge = True
148 |             if _ > 0:
149 |                 merged_idx = merged[_ - 1].cpu().numpy()
150 |                 From = np.max(merged_idx)
151 |                 To = np.min(merged_idx)
152 |                 if old_to_new[To] != old_to_new[From]:
153 |                     new_to_old[old_to_new[To]] += new_to_old[old_to_new[From]].copy()
154 |                     new_to_old[old_to_new[From]] = []
155 |                     old_to_new = {}
156 |                     for ii in range(model.num_parts):
157 |                         connected_to_ii = new_to_old[ii]
158 |                         for jj in connected_to_ii:
159 |                             old_to_new[jj] = ii
160 |                 else:
161 |                     should_merge = False
162 | 
163 |             if should_merge:
164 |                 fig_id += 1
165 |                 out = 1 - mask_each_view[i].cpu().numpy()[:, :, None][:, :, [0, 0, 0]] / 2
166 |                 plt.imshow(out, vmin=0, vmax=1, alpha=0.2)
167 |                 joint_location = (child_root[0, joint_connection[:, 0], :, child_ids[:, 0]] +
168 |                                   child_root[0, joint_connection[:, 1], :, child_ids[:, 1]]) / 2
169 |                 new_joint_connection = np.array([[old_to_new[jc[0]], old_to_new[jc[1]]] for jc in joint_connection])
170 |                 for j in range(model.num_parts):
171 |                     if len(new_to_old[j]) > 0:
172 |                         joints = joint_location[np.where((new_joint_connection == old_to_new[j]) & (
173 |                                 new_joint_connection[:, :1] != new_joint_connection[:, 1:]))[0]]
174 |                         if len(joints) > 1:
175 |                             color = format(j * 600000, '06x')
176 |                             draw_polygon(joints, color)
177 |                         elif len(joints) == 1:
178 |                             plt.plot([joints[0, 0], joint_2d[j, 0]],
179 |                                      [joints[0, 1], joint_2d[j, 1]], c="b")
180 |                         else:
181 |                             break
182 |                         if text:
183 |                             plt.text(joint_2d[j, 0], joint_2d[j, 1], j, fontsize="small")
184 | 
185 |                 plt.axis("off")
186 |                 plt.subplots_adjust(left=0, right=1, bottom=0, top=1)
187 | 
188 |                 buf = io.BytesIO()
189 |                 plt.savefig(buf, format='png', dpi=150)
190 |                 enc = np.frombuffer(buf.getvalue(), dtype=np.uint8)
191 |                 dst = cv2.imdecode(enc, 1)[:, :, ::-1]
192 |                 joint_figure.append(dst)
193 |                 plt.clf()
194 | 
195 |                 out = 1 - mask_each_view[i].cpu().numpy()[:, :, None][:, :, [0, 0, 0]] / 2
196 |                 plt.imshow(out, vmin=0, vmax=1, alpha=0.2)
197 | 
198 |                 new_joint_2d = [np.mean(joint_2d[new_to_old[_]], axis=0) if len(new_to_old[_]) > 0 else None
199 |                                 for _ in range(model.num_parts)]
200 |                 for j in range(model.num_parts):
201 |                     if len(new_to_old[j]) > 0:
202 |                         if text:
203 |                             plt.text(new_joint_2d[old_to_new[j]][0], new_joint_2d[old_to_new[j]][1], j,
204 |                                      fontsize="small")
205 |                         if j == model.num_parts - 1:
206 |                             break
207 |                 for njc in new_joint_connection:
208 |                     if njc[0] != njc[1]:
209 |                         plt.plot([new_joint_2d[njc[0]][0], new_joint_2d[njc[1]][0]],
210 |                                  [new_joint_2d[njc[0]][1], new_joint_2d[njc[1]][1]])
211 | 
212 |                 plt.axis("off")
213 |                 plt.subplots_adjust(left=0, right=1, bottom=0, top=1)
214 | 
215 |                 buf = io.BytesIO()
216 |                 plt.savefig(buf, format='png', dpi=150)
217 |                 enc = np.frombuffer(buf.getvalue(), dtype=np.uint8)
218 |                 dst = cv2.imdecode(enc, 1)[:, :, ::-1]
219 |                 center_figure.append(dst)
220 |                 plt.clf()
221 | 
222 |         return joint_figure, center_figure
223 | 
224 |     joint_figure, center_figure = create_figures(text=True)
225 |     os.makedirs(f"{self.save_dir}/merge", exist_ok=True)
226 |     for idx, jf in enumerate(joint_figure):
227 |         cv2.imwrite(f"{self.save_dir}/merge/joints_{idx:0>4}.png", jf)
228 |     for idx, cf in enumerate(center_figure):
229 |         cv2.imwrite(f"{self.save_dir}/merge/centers_{idx:0>4}.png", cf)
230 | 
231 |     joint_figure, center_figure = create_figures(text=False)
232 |     os.makedirs(f"{self.save_dir}/merge", exist_ok=True)
233 |     for idx, jf in enumerate(joint_figure):
234 |         cv2.imwrite(f"{self.save_dir}/merge/joints_notext_{idx:0>4}.png", jf)
235 |     for idx, cf in enumerate(center_figure):
236 |         cv2.imwrite(f"{self.save_dir}/merge/centers_notext_{idx:0>4}.png", cf)
237 | 
238 | 
239 | if __name__ == "__main__":
240 |     parser = argparse.ArgumentParser(description='Part merging')
241 |     parser.add_argument('--exp_name', required=True, type=str)
242 |     parser.add_argument('--camera_id', required=True, type=int)
243 |     parser.add_argument('--iteration', default=-1, type=int)
244 |     args = parser.parse_args()
245 | 
246 |     exp_name = args.exp_name
247 |     camera_id = args.camera_id
248 |     config_path = f"confs/{exp_name}.yml"
249 | 
250 |     default_config = "confs/default.yml"
251 | 
252 |     main(config_path, default_config, camera_id)
253 | 


--------------------------------------------------------------------------------
/src/visualize/repose_configs/cassie.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | 
10 | 
11 | camera_id: 3
12 | frame_id: 0
13 | root: 8
14 | first: [ [ 0, [ 0, -2.0943, 0 ] ], [ 6, [ 0, 1.0471, 0 ] ] ]
15 | second: [ [ 3, [ 0, -2.0943, 0 ] ], [ 4, [ 0, 1.0471, 0 ] ] ]


--------------------------------------------------------------------------------
/src/visualize/repose_configs/iiwa.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | 
10 | 
11 | camera_id: 1
12 | frame_id: 0
13 | root: 7
14 | first: [ [ 6, [ 0, -2.0943, 0 ] ] ]
15 | second: [ [ 5, [ 0, 2.0943, 0 ] ] ]


--------------------------------------------------------------------------------
/src/visualize/repose_configs/spot.yml:
--------------------------------------------------------------------------------
 1 | #SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 3 | #NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | #property and proprietary rights in and to this material, related
 5 | #documentation and any modifications thereto. Any use, reproduction,
 6 | #disclosure or distribution of this material and related documentation
 7 | #without an express license agreement from NVIDIA CORPORATION or
 8 | #its affiliates is strictly prohibited.
 9 | 
10 | 
11 | camera_id: 0
12 | frame_id: 0
13 | root: 2
14 | first: [ [ 0, [ 0, -2.0943, 0 ] ],[ 4, [ 0, 1.0471, 0 ] ],[ 7, [ 0, -2.0943, 0 ] ],[ 11, [ 0, 1.0471, 0 ] ] ]
15 | second: [ [ 12, [ 0, -2.0943, 0 ] ],[ 8, [ 0, 1.0471, 0 ] ],[ 3, [ 0, -2.0943, 0 ] ],[ 9, [ 0, 1.0471, 0 ] ] ]


--------------------------------------------------------------------------------
/src/visualize/repose_person_by_driving_pose.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 | SPDX-License-Identifier: LicenseRef-NvidiaProprietary
  4 | NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 | property and proprietary rights in and to this material, related
  6 | documentation and any modifications thereto. Any use, reproduction,
  7 | disclosure or distribution of this material and related documentation
  8 | without an express license agreement from NVIDIA CORPORATION or
  9 | its affiliates is strictly prohibited.
 10 | """
 11 | 
 12 | import argparse
 13 | import os
 14 | import pickle
 15 | import sys
 16 | from typing import List
 17 | 
 18 | import cv2
 19 | import numpy as np
 20 | import torch
 21 | from easydict import EasyDict as edict
 22 | from torch import nn
 23 | from torch.utils.data import DataLoader
 24 | from tqdm import tqdm
 25 | 
 26 | sys.path.append(".")
 27 | from models.model import SingleVideoPartDecomposition
 28 | from utils.get_args import get_args_jupyter
 29 | from utils.train_utils import to_gpu, create_dataloaders
 30 | 
 31 | 
 32 | def generate_video_from_pose(model: nn.Module, train_loader: DataLoader, num_video_frames: int, camera_id: int,
 33 |                              joint_rotation: torch.Tensor, joint_translation: torch.Tensor,
 34 |                              rotate: bool = False, frame_id: int = 0):
 35 |     """
 36 | 
 37 |     Args:
 38 |         model:
 39 |         train_loader:
 40 |         num_video_frames:
 41 |         camera_id:
 42 |         joint_rotation:
 43 |         joint_translation:
 44 |         rotate:
 45 |         frame_id:
 46 | 
 47 |     Returns:
 48 | 
 49 |     """
 50 |     num_frames = train_loader.dataset.num_frames
 51 |     minibatch = train_loader.dataset[camera_id * num_frames]
 52 |     minibatch = {k: torch.tensor(v) for k, v in minibatch.items()}
 53 |     minibatch = to_gpu(minibatch)
 54 |     camera_rotation = minibatch["camera_rotation"][None]
 55 |     camera_translation = minibatch["camera_translation"][None]
 56 |     inv_intrinsics = torch.inverse(minibatch["camera_intrinsic"])[None]
 57 |     img = minibatch["img"].cpu().numpy()
 58 | 
 59 |     video = []
 60 | 
 61 |     model.eval()
 62 |     rotate_angle = 0
 63 |     frame_interval = 5
 64 |     with torch.no_grad():
 65 |         for i in tqdm(range(num_video_frames)):
 66 |             frame_id += frame_interval
 67 |             if rotate:
 68 |                 rotate_angle = i / 20 * (2 * np.pi)
 69 |             if frame_id >= len(joint_translation):
 70 |                 break
 71 |             out_dict = model.render_entire_img(None, camera_rotation, camera_translation,
 72 |                                                inv_intrinsics, segmentation_label=False,
 73 |                                                rotate_angle=rotate_angle, ray_batchsize=10000,
 74 |                                                part_pose=(joint_rotation[frame_id], joint_translation[frame_id]))
 75 |             color = out_dict["rendered_colors"]
 76 |             mask = out_dict["rendered_masks"]
 77 |             segmentation = out_dict["segmentation_colors"]
 78 | 
 79 |             color = (color + (1 - mask[None])).cpu().numpy().transpose(1, 2, 0)
 80 |             segmentation = (segmentation + (1 - mask[None])).cpu().numpy().transpose(1, 2, 0)
 81 |             color = np.concatenate([img.transpose(1, 2, 0), color, segmentation], axis=1)
 82 |             video.append(np.clip(color * 127.5 + 127.5, 0, 255).astype("uint8"))
 83 | 
 84 |     return video
 85 | 
 86 | 
 87 | def save_video(frames: List[np.ndarray], file_name: str, fps: int = 10, n_repeat: int = 10):
 88 |     """
 89 | 
 90 |     Args:
 91 |         frames:
 92 |         file_name:
 93 |         fps:
 94 |         n_repeat:
 95 | 
 96 |     Returns:
 97 | 
 98 |     """
 99 |     size = (frames[0].shape[-2], frames[0].shape[-3])
100 | 
101 |     fmt = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
102 |     writer = cv2.VideoWriter(file_name, fmt, fps, size)
103 | 
104 |     for i in range(n_repeat):
105 |         for frame in frames:
106 |             writer.write(frame[:, :, ::-1])
107 | 
108 |     writer.release()
109 | 
110 | 
111 | def save_png(frames: List[np.ndarray], dir_name: str):
112 |     """
113 | 
114 |     Args:
115 |         frames:
116 |         dir_name:
117 | 
118 |     Returns:
119 | 
120 |     """
121 |     os.makedirs(dir_name, exist_ok=True)
122 |     for i, frame in enumerate(frames):
123 |         img_size = frame.shape[0]
124 |         cv2.imwrite(f'{dir_name}/gt_{i:0>5}.png', frame[:, :img_size, ::-1])
125 |         cv2.imwrite(f'{dir_name}/gen_{i:0>5}.png', frame[:, img_size:img_size * 2, ::-1])
126 |         cv2.imwrite(f'{dir_name}/seg_{i:0>5}.png', frame[:, img_size * 2:, ::-1])
127 | 
128 | 
129 | def get_mapping(model: nn.Module, config: edict, train_loader: DataLoader, use_smpl_verts: bool) -> torch.Tensor:
130 |     """
131 | 
132 |     Args:
133 |         model:
134 |         config:
135 |         train_loader:
136 |         use_smpl_verts:
137 | 
138 |     Returns:
139 | 
140 |     """
141 |     with torch.no_grad():
142 |         num_frames = config.dataset.num_frames
143 |         coordinate_scale = train_loader.dataset.coordinate_scale
144 |         frame_id = torch.arange(config.dataset.num_frames, dtype=torch.float, device="cuda")
145 |         trajectory = model.joint_trajectory(frame_id)
146 | 
147 |         rotation, translation = trajectory
148 |         video_len = rotation.shape[0]
149 | 
150 |         child_root, self_root = model.decoder.joint_root_locations(rotation, translation)
151 |         estimated_keypoints = torch.cat([child_root, self_root], dim=-1)
152 | 
153 |         estimated_keypoints = estimated_keypoints.permute(0, 1, 3, 2)
154 | 
155 |         if use_smpl_verts:
156 |             smpl_verts_path = os.path.join(config.dataset.data_root, "smpl_verts.pickle")
157 |             with open(smpl_verts_path, "rb") as f:
158 |                 smpl_keypoints = pickle.load(f)["smpl_verts"]
159 |             smpl_keypoints = smpl_keypoints / coordinate_scale
160 |             smpl_keypoints = torch.tensor(smpl_keypoints, device=rotation.device, dtype=torch.float)
161 |             smpl_keypoints = smpl_keypoints[:num_frames]
162 | 
163 |         else:
164 |             smpl_pose = torch.tensor(train_loader.dataset.video_cache["smpl_pose"], device=rotation.device,
165 |                                      dtype=torch.float).clone()
166 |             smpl_pose[:, :, :3, 3] /= coordinate_scale
167 | 
168 |             smpl_keypoints = smpl_pose[:num_frames, :, :3].clone()
169 |             smpl_keypoints[:, :, :3, :3] *= 0.1
170 |             smpl_keypoints[:, :, :3, :3] += smpl_keypoints[:, :, :3, 3:]
171 |             smpl_keypoints = smpl_keypoints.permute(0, 1, 3, 2).reshape(video_len, -1, 3)
172 | 
173 |         _estimated_keypoints = estimated_keypoints.reshape(video_len, -1, 3).permute(0, 2, 1).reshape(video_len * 3,
174 |                                                                                                       -1).cpu()
175 |         _smpl_keypoints = smpl_keypoints.permute(0, 2, 1).reshape(video_len * 3, -1).cpu()
176 | 
177 |         lam = 5e-1 if use_smpl_verts else 1e-1
178 |         s2j_mapping = torch.inverse(
179 |             _smpl_keypoints.T.matmul(_smpl_keypoints) + torch.eye(_smpl_keypoints.shape[1]) * lam).matmul(
180 |             _smpl_keypoints.T).matmul(_estimated_keypoints)
181 | 
182 |     return s2j_mapping
183 | 
184 | 
185 | def repose(config_path: str, default_config: str, driving_person_id: int, num_video_frames: int, use_smpl_verts: bool
186 |            ) -> None:
187 |     """
188 | 
189 |     Args:
190 |         config_path:
191 |         default_config:
192 |         driving_person_id:
193 |         num_video_frames:
194 |         use_smpl_verts: use smpl vertices for regression or not
195 | 
196 |     Returns:
197 | 
198 |     """
199 |     args, config = get_args_jupyter(config_path, default_config)
200 |     config.dataset.batchsize = 1
201 | 
202 |     train_loader = create_dataloaders(config.dataset, shuffle=True)
203 | 
204 |     train_loader.dataset.n_repetition_in_epoch = 1
205 |     train_loader.dataset.color_augmentation = False
206 |     train_loader.dataset.camera_dir_augmentation = False
207 |     train_loader.dataset.background_color = 1
208 | 
209 |     out_dir = config.output_dir
210 |     exp_name = config.exp_name
211 | 
212 |     same_person = driving_person_id == -1
213 | 
214 |     # model
215 |     model = SingleVideoPartDecomposition(config.network_params)
216 |     model.cuda()
217 | 
218 |     save_dir = os.path.join(out_dir, "result", exp_name)
219 |     model_path = os.path.join(save_dir, "snapshot_latest.pth")
220 |     if os.path.exists(model_path):
221 |         snapshot = torch.load(model_path)
222 |         state_dict = snapshot["model"]
223 |         model.load_state_dict(state_dict, strict=False)
224 |     else:
225 |         print("model is not loaded")
226 | 
227 |     train_loader.dataset.current_max_frame_id = train_loader.dataset.num_frames
228 | 
229 |     num_frames = train_loader.dataset.num_frames
230 |     coordinate_scale = train_loader.dataset.coordinate_scale
231 | 
232 |     s2j_mapping = get_mapping(model, config, train_loader, use_smpl_verts)
233 | 
234 |     if use_smpl_verts:
235 |         if same_person:
236 |             data_root = config.dataset.data_root
237 |         else:
238 |             person_id = str(driving_person_id)
239 |             data_root = f"../data/zju_mocap/cache512/{person_id}/"
240 |         smpl_verts_path = os.path.join(data_root, "smpl_verts.pickle")
241 |         with open(smpl_verts_path, "rb") as f:
242 |             smpl_keypoints = pickle.load(f)["smpl_verts"]
243 |         smpl_keypoints = smpl_keypoints / coordinate_scale
244 |         smpl_translation = torch.tensor(smpl_keypoints, dtype=torch.float)
245 |     else:
246 |         if same_person:
247 |             smpl_pose = torch.tensor(train_loader.dataset.video_cache["smpl_pose"], dtype=torch.float).clone()
248 |         else:
249 |             # read other smpl sequence
250 |             person_id = str(driving_person_id)
251 |             with open(f"../data/zju_mocap/cache512/{person_id}/cache_train.pickle", "rb") as f:
252 |                 smpl_pose = torch.tensor(pickle.load(f)["smpl_pose"], dtype=torch.float).clone()
253 | 
254 |         smpl_pose[:, :, :3, 3] /= coordinate_scale
255 | 
256 |         smpl_translation = smpl_pose[:, :, :3].clone()
257 |         smpl_translation[:, :, :3, :3] *= 0.1
258 |         smpl_translation[:, :, :3, :3] += smpl_translation[:, :, :3, 3:]
259 |         smpl_translation = smpl_translation.permute(0, 1, 3, 2).reshape(-1, 23 * 4, 3)
260 | 
261 |     estimated_367_translation = s2j_mapping.T @ smpl_translation  # (L, 140, 3)
262 | 
263 |     # org pose
264 |     with torch.no_grad():
265 |         child_root_org, self_root_org = model.decoder.joint_root_locations(torch.eye(3, device="cuda",
266 |                                                                                      dtype=torch.float),
267 |                                                                            torch.zeros(model.num_parts, 3, 1,
268 |                                                                                        device="cuda",
269 |                                                                                        dtype=torch.float))
270 |         estimated_keypoints_org = torch.cat([child_root_org, self_root_org], dim=-1)
271 |         estimated_keypoints_org = estimated_keypoints_org.permute(0, 2, 1).cpu()
272 | 
273 |     estimated_367_translation = estimated_367_translation.reshape(estimated_367_translation.shape[0], model.num_parts,
274 |                                                                   7, 3)
275 |     estimated_367_translation_centered = estimated_367_translation - estimated_367_translation[:, :, -1:]
276 |     U, S, Vh = torch.linalg.svd(estimated_367_translation_centered.permute(0, 1, 3, 2) @ estimated_keypoints_org)
277 |     R = Vh.permute(0, 1, 3, 2) @ U.permute(0, 1, 3, 2)
278 |     det = torch.linalg.det(R)
279 |     Vh[:, :, 2] = Vh[:, :, 2] * det[:, :, None]
280 |     R = Vh.permute(0, 1, 3, 2) @ U.permute(0, 1, 3, 2)
281 | 
282 |     frame_id = num_frames if same_person else 0
283 |     camera_id = 0
284 |     rotate = False
285 | 
286 |     video = generate_video_from_pose(model, train_loader, num_video_frames, camera_id,
287 |                                      rotate=rotate,
288 |                                      joint_rotation=R.permute(0, 1, 3, 2).cuda(),
289 |                                      joint_translation=estimated_367_translation[:, :, -1, :, None].cuda(),
290 |                                      frame_id=frame_id)
291 |     save_video(video, os.path.join(
292 |         save_dir,
293 |         'drive_' + 'verts_' * use_smpl_verts + f'{driving_person_id}' * ~same_person + f'_{camera_id}.mp4'))
294 | 
295 |     save_png(video, os.path.join(
296 |         save_dir,
297 |         'drive_' + 'verts_' * use_smpl_verts + f'{driving_person_id}' * ~same_person + f'_{camera_id}'))
298 | 
299 | 
300 | if __name__ == "__main__":
301 |     parser = argparse.ArgumentParser(description='Create reposing videos with test poses')
302 |     parser.add_argument('--exp_name', required=True, type=str)
303 |     parser.add_argument('--camera_id', required=True, type=int)
304 |     parser.add_argument('--num_video_frames', type=int, default=100)
305 |     parser.add_argument('--driving_person_id', type=int, default=-1,
306 |                         help="Driving person id is same as input person id if -1")
307 |     args = parser.parse_args()
308 | 
309 |     exp_name = args.exp_name
310 |     num_video_frames = args.num_video_frames
311 |     driving_person_id = args.driving_person_id
312 | 
313 |     default_config = "confs/default.yml"
314 | 
315 |     config_path = f"confs/{exp_name}.yml"
316 |     repose(config_path, default_config, driving_person_id, num_video_frames, use_smpl_verts=True)
317 | 


--------------------------------------------------------------------------------