├── README.md
├── checkpoints
├── init_deform_deform_cond_pe8.pth
└── script
│ ├── midpoint.mlx
│ ├── midpoint_head.mlx
│ ├── remesh.mlx
│ ├── remesh_bac.mlx
│ └── wt.mlx
├── configs
└── f3c.json
├── dataset
├── __pycache__
│ ├── dataset.cpython-38.pyc
│ └── dataset_split.cpython-38.pyc
├── dataset.py
└── dataset_split.py
├── deform
├── __pycache__
│ └── smplx_exavatar_deformer.cpython-38.pyc
├── smplx_exavatar
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── body_models.cpython-38.pyc
│ │ ├── lbs.cpython-38.pyc
│ │ ├── utils.cpython-38.pyc
│ │ ├── vertex_ids.cpython-38.pyc
│ │ └── vertex_joint_selector.cpython-38.pyc
│ ├── body_models.py
│ ├── joint_names.py
│ ├── lbs.py
│ ├── utils.py
│ ├── vertex_ids.py
│ └── vertex_joint_selector.py
└── smplx_exavatar_deformer.py
├── denoiser
├── __pycache__
│ └── denoiser.cpython-38.pyc
└── denoiser.py
├── figs
└── pipe.jpg
├── geometry
├── __pycache__
│ ├── embedding.cpython-38.pyc
│ ├── gshell_tets.cpython-38.pyc
│ ├── hmsdf.cpython-38.pyc
│ ├── hmsdf_tets_split.cpython-38.pyc
│ └── mlp.cpython-38.pyc
├── embedding.py
├── gshell_tets.py
├── hmsdf.py
├── hmsdf_tets_split.py
└── mlp.py
├── lap_loss.py
├── render
├── __pycache__
│ ├── light.cpython-38.pyc
│ ├── material.cpython-38.pyc
│ ├── mesh.cpython-38.pyc
│ ├── mlptexture.cpython-38.pyc
│ ├── obj.cpython-38.pyc
│ ├── regularizer.cpython-38.pyc
│ ├── render.cpython-38.pyc
│ ├── render_mask.cpython-38.pyc
│ ├── texture.cpython-38.pyc
│ └── util.cpython-38.pyc
├── light.py
├── material.py
├── mesh.py
├── mlptexture.py
├── obj.py
├── optixutils
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ └── ops.cpython-38.pyc
│ ├── build
│ │ ├── build.ninja
│ │ ├── denoising.cuda.o
│ │ ├── optix_wrapper.o
│ │ ├── optixutils_plugin.so
│ │ └── torch_bindings.o
│ ├── c_src
│ │ ├── accessor.h
│ │ ├── bsdf.h
│ │ ├── common.h
│ │ ├── denoising.cu
│ │ ├── denoising.h
│ │ ├── envsampling
│ │ │ ├── kernel.cu
│ │ │ └── params.h
│ │ ├── math_utils.h
│ │ ├── optix_wrapper.cpp
│ │ ├── optix_wrapper.h
│ │ └── torch_bindings.cpp
│ ├── include
│ │ ├── internal
│ │ │ ├── optix_7_device_impl.h
│ │ │ ├── optix_7_device_impl_exception.h
│ │ │ └── optix_7_device_impl_transformations.h
│ │ ├── optix.h
│ │ ├── optix_7_device.h
│ │ ├── optix_7_host.h
│ │ ├── optix_7_types.h
│ │ ├── optix_denoiser_tiling.h
│ │ ├── optix_device.h
│ │ ├── optix_function_table.h
│ │ ├── optix_function_table_definition.h
│ │ ├── optix_host.h
│ │ ├── optix_stack_size.h
│ │ ├── optix_stubs.h
│ │ └── optix_types.h
│ ├── ops.py
│ └── tests
│ │ └── filter_test.py
├── regularizer.py
├── render.py
├── render_mask.py
├── renderutils
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── bsdf.cpython-38.pyc
│ │ ├── loss.cpython-38.pyc
│ │ └── ops.cpython-38.pyc
│ ├── bsdf.py
│ ├── build
│ │ ├── bsdf.cuda.o
│ │ ├── build.ninja
│ │ ├── common.o
│ │ ├── cubemap.cuda.o
│ │ ├── loss.cuda.o
│ │ ├── mesh.cuda.o
│ │ ├── normal.cuda.o
│ │ ├── renderutils_plugin.so
│ │ └── torch_bindings.o
│ ├── c_src
│ │ ├── bsdf.cu
│ │ ├── bsdf.h
│ │ ├── common.cpp
│ │ ├── common.h
│ │ ├── cubemap.cu
│ │ ├── cubemap.h
│ │ ├── loss.cu
│ │ ├── loss.h
│ │ ├── mesh.cu
│ │ ├── mesh.h
│ │ ├── normal.cu
│ │ ├── normal.h
│ │ ├── tensor.h
│ │ ├── torch_bindings.cpp
│ │ ├── vec3f.h
│ │ └── vec4f.h
│ ├── loss.py
│ ├── ops.py
│ └── tests
│ │ ├── test_bsdf.py
│ │ ├── test_loss.py
│ │ ├── test_mesh.py
│ │ └── test_perf.py
├── texture.py
└── util.py
├── script
├── __pycache__
│ ├── connet_face_head.cpython-38.pyc
│ ├── get_tet_smpl.cpython-38.pyc
│ └── process_body_cloth_head_msdfcut.cpython-38.pyc
├── connet_face_head.py
├── get_tet_smpl.py
└── process_body_cloth_head_msdfcut.py
├── ssim_loss.py
├── third_parties
├── __init__.py
├── __pycache__
│ └── __init__.cpython-38.pyc
├── lpips
│ ├── __init__.py
│ ├── __pycache__
│ │ └── __init__.cpython-38.pyc
│ ├── lpips.py
│ ├── pretrained_networks.py
│ ├── trainer.py
│ └── weights
│ │ ├── v0.0
│ │ ├── alex.pth
│ │ ├── squeeze.pth
│ │ └── vgg.pth
│ │ └── v0.1
│ │ ├── alex.pth
│ │ ├── squeeze.pth
│ │ └── vgg.pth
└── pytorch3d
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-38.pyc
│ └── ops.cpython-38.pyc
│ ├── cuda
│ ├── knn.cpp
│ ├── knn.cu
│ ├── knn_cpu.cpp
│ └── utils
│ │ ├── dispatch.cuh
│ │ ├── index_utils.cuh
│ │ ├── mink.cuh
│ │ └── pytorch3d_cutils.h
│ └── ops.py
└── train.py
/README.md:
--------------------------------------------------------------------------------
1 | # D3-Human: Dynamic Disentangled Digital Human from Monocular Video
2 |
3 | PyTorch implementation of the paper "D3-Human: Dynamic Disentangled Digital Human from Monocular Video". This repository contains the reconstructing code and data.
4 |
5 | **|[Project Page](https://ustc3dv.github.io/D3Human/)|** **|[Paper](https://arxiv.org/html/2501.01589v1)|**
6 |
7 | This method can reconstruct disentangled garment and body geometry from monocular videos.
8 |
9 | ## Pipeline
10 | Neural-ABC is a neural implicit parametric model with latent spaces of human identity, clothing, shape and pose.
11 | It can generate various human identities and different clothes.
12 | The clothed human body can deform into different body shapes and poses.
13 |
14 | 
15 |
16 | ## Setup
17 |
18 | This code has been tested on Tesla V100.
19 |
20 | Environment:
21 | * Ubuntu 20.04
22 | * python 3.8.19
23 |
24 | Run the following:
25 | ```
26 | pip install ninja imageio PyOpenGL glfw xatlas gdown
27 | pip install git+https://github.com/NVlabs/nvdiffrast/
28 | pip install --global-option="--no-networks" git+https://github.com/NVlabs/tiny-cuda-nn#subdirectory=bindings/torch
29 | ```
30 |
31 | Download the female SMPL-X model from https://smpl-x.is.tue.mpg.de/ and place them in the folder of `./smplx`.
32 |
33 | Download the preprocess data from [here](https://drive.google.com/drive/folders/1-OY5X7pnt45XBMURVTM55xhOrKKUi7BX?usp=sharing) and place it in the folder of `./data`.
34 |
35 | ## Reconstruction
36 |
37 | Use the following code to reconstruct:
38 |
39 | ```
40 | CUDA_VISIBLE_DEVICES=0 python train.py -o res/f3c --folder_name female-3-casual --config configs/f3c.json
41 | ```
42 |
43 |
44 | ## Dataset Preparation
45 | If you wish to reconstruct your own monocular video, you can use [ExAvatar](https://github.com/mks0601/ExAvatar_RELEASE) to obtain SMPL-X coefficients and camera parameters, [Sapiens](https://github.com/facebookresearch/sapiens) to obtain normals, and [SAM2](https://github.com/facebookresearch/sam2) to obtain masks for garments, the body, and the fully clothed human.
46 |
47 | ## Notes
48 | If MeshLab cannot be executed from the command line, you can manually perform remeshing and watertight processing within the software.
49 |
50 | ## Citation
51 |
52 | If you find our paper useful for your work please cite:
53 |
54 | ```
55 | @article{Chen2024D3human,
56 | author = {Honghu, Chen and Bo, Peng and Yunfan, Tao and Juyong, Zhang},
57 | title = {D$^3$-Human: Dynamic Disentangled Digital Human from Monocular Video},
58 | journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
59 | year={2025}
60 | }
61 | ```
62 |
63 | ## Contact
64 | For more questions, please contact honghuc@mail.ustc.edu.cn
65 |
66 | ## Acknowledgement
67 |
68 | Our data is processed with the help of [G-Shell](https://github.com/CrisHY1995/StereoPIFu_Code):
69 | ```
70 | @inproceedings{Liu2024gshell,
71 | title={Ghost on the Shell: An Expressive Representation of General 3D Shapes},
72 | author={Liu, Zhen and Feng, Yao and Xiu, Yuliang and Liu, Weiyang
73 | and Paull, Liam and Black, Michael J and Sch{\"o}lkopf, Bernhard},
74 | booktitle={ICLR},
75 | year={2024}
76 | }
77 |
78 | ```
79 |
--------------------------------------------------------------------------------
/checkpoints/init_deform_deform_cond_pe8.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/checkpoints/init_deform_deform_cond_pe8.pth
--------------------------------------------------------------------------------
/checkpoints/script/midpoint.mlx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/checkpoints/script/midpoint_head.mlx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/checkpoints/script/remesh.mlx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/checkpoints/script/remesh_bac.mlx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/checkpoints/script/wt.mlx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/configs/f3c.json:
--------------------------------------------------------------------------------
1 | {
2 | "ref_mesh": "data/spot/spot.obj",
3 | "random_textures": true,
4 | "iter": 10000,
5 | "save_interval": 100,
6 | "save_interval_fine": 100,
7 | "texture_res": [ 1080, 1080 ],
8 | "train_res": [1080, 1080],
9 | "batch": 1,
10 | "learning_rate": [0.03, 0.005],
11 | "ks_min" : [0, 0.001, 0.0],
12 | "ks_max" : [0, 1.0, 1.0],
13 | "lock_pos" : false,
14 | "display": [{"latlong" : true}],
15 | "background" : "white",
16 | "denoiser": "bilateral",
17 | "n_samples" : 24,
18 | "env_scale" : 2.0,
19 | "gshell_grid" : 128,
20 | "validate" : true,
21 | "laplace_scale" : 6000,
22 | "boxscale": [1, 1, 1],
23 | "aabb": [-1, -1, -1, 1, 1, 1]
24 | }
25 |
--------------------------------------------------------------------------------
/dataset/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/dataset/__pycache__/dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/dataset/__pycache__/dataset_split.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/dataset/__pycache__/dataset_split.cpython-38.pyc
--------------------------------------------------------------------------------
/deform/__pycache__/smplx_exavatar_deformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/__pycache__/smplx_exavatar_deformer.cpython-38.pyc
--------------------------------------------------------------------------------
/deform/smplx_exavatar/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | from .body_models import (
18 | create,
19 | SMPL,
20 | SMPLH,
21 | SMPLX,
22 | MANO,
23 | FLAME,
24 | build_layer,
25 | SMPLLayer,
26 | SMPLHLayer,
27 | SMPLXLayer,
28 | MANOLayer,
29 | FLAMELayer,
30 | )
31 |
--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/body_models.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/body_models.cpython-38.pyc
--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/lbs.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/lbs.cpython-38.pyc
--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/utils.cpython-38.pyc
--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/vertex_ids.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/vertex_ids.cpython-38.pyc
--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/vertex_joint_selector.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/vertex_joint_selector.cpython-38.pyc
--------------------------------------------------------------------------------
/deform/smplx_exavatar/joint_names.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | import numpy as np
18 |
19 | JOINT_NAMES = [
20 | "pelvis",
21 | "left_hip",
22 | "right_hip",
23 | "spine1",
24 | "left_knee",
25 | "right_knee",
26 | "spine2",
27 | "left_ankle",
28 | "right_ankle",
29 | "spine3",
30 | "left_foot",
31 | "right_foot",
32 | "neck",
33 | "left_collar",
34 | "right_collar",
35 | "head",
36 | "left_shoulder",
37 | "right_shoulder",
38 | "left_elbow",
39 | "right_elbow",
40 | "left_wrist",
41 | "right_wrist",
42 | "jaw",
43 | "left_eye_smplhf",
44 | "right_eye_smplhf",
45 | "left_index1",
46 | "left_index2",
47 | "left_index3",
48 | "left_middle1",
49 | "left_middle2",
50 | "left_middle3",
51 | "left_pinky1",
52 | "left_pinky2",
53 | "left_pinky3",
54 | "left_ring1",
55 | "left_ring2",
56 | "left_ring3",
57 | "left_thumb1",
58 | "left_thumb2",
59 | "left_thumb3",
60 | "right_index1",
61 | "right_index2",
62 | "right_index3",
63 | "right_middle1",
64 | "right_middle2",
65 | "right_middle3",
66 | "right_pinky1",
67 | "right_pinky2",
68 | "right_pinky3",
69 | "right_ring1",
70 | "right_ring2",
71 | "right_ring3",
72 | "right_thumb1",
73 | "right_thumb2",
74 | "right_thumb3",
75 | "nose",
76 | "right_eye",
77 | "left_eye",
78 | "right_ear",
79 | "left_ear",
80 | "left_big_toe",
81 | "left_small_toe",
82 | "left_heel",
83 | "right_big_toe",
84 | "right_small_toe",
85 | "right_heel",
86 | "left_thumb",
87 | "left_index",
88 | "left_middle",
89 | "left_ring",
90 | "left_pinky",
91 | "right_thumb",
92 | "right_index",
93 | "right_middle",
94 | "right_ring",
95 | "right_pinky",
96 | "right_eye_brow1",
97 | "right_eye_brow2",
98 | "right_eye_brow3",
99 | "right_eye_brow4",
100 | "right_eye_brow5",
101 | "left_eye_brow5",
102 | "left_eye_brow4",
103 | "left_eye_brow3",
104 | "left_eye_brow2",
105 | "left_eye_brow1",
106 | "nose1",
107 | "nose2",
108 | "nose3",
109 | "nose4",
110 | "right_nose_2",
111 | "right_nose_1",
112 | "nose_middle",
113 | "left_nose_1",
114 | "left_nose_2",
115 | "right_eye1",
116 | "right_eye2",
117 | "right_eye3",
118 | "right_eye4",
119 | "right_eye5",
120 | "right_eye6",
121 | "left_eye4",
122 | "left_eye3",
123 | "left_eye2",
124 | "left_eye1",
125 | "left_eye6",
126 | "left_eye5",
127 | "right_mouth_1",
128 | "right_mouth_2",
129 | "right_mouth_3",
130 | "mouth_top",
131 | "left_mouth_3",
132 | "left_mouth_2",
133 | "left_mouth_1",
134 | "left_mouth_5", # 59 in OpenPose output
135 | "left_mouth_4", # 58 in OpenPose output
136 | "mouth_bottom",
137 | "right_mouth_4",
138 | "right_mouth_5",
139 | "right_lip_1",
140 | "right_lip_2",
141 | "lip_top",
142 | "left_lip_2",
143 | "left_lip_1",
144 | "left_lip_3",
145 | "lip_bottom",
146 | "right_lip_3",
147 | # Face contour
148 | "right_contour_1",
149 | "right_contour_2",
150 | "right_contour_3",
151 | "right_contour_4",
152 | "right_contour_5",
153 | "right_contour_6",
154 | "right_contour_7",
155 | "right_contour_8",
156 | "contour_middle",
157 | "left_contour_8",
158 | "left_contour_7",
159 | "left_contour_6",
160 | "left_contour_5",
161 | "left_contour_4",
162 | "left_contour_3",
163 | "left_contour_2",
164 | "left_contour_1",
165 | ]
166 |
167 |
168 | SMPLH_JOINT_NAMES = [
169 | "pelvis",
170 | "left_hip",
171 | "right_hip",
172 | "spine1",
173 | "left_knee",
174 | "right_knee",
175 | "spine2",
176 | "left_ankle",
177 | "right_ankle",
178 | "spine3",
179 | "left_foot",
180 | "right_foot",
181 | "neck",
182 | "left_collar",
183 | "right_collar",
184 | "head",
185 | "left_shoulder",
186 | "right_shoulder",
187 | "left_elbow",
188 | "right_elbow",
189 | "left_wrist",
190 | "right_wrist",
191 | "left_index1",
192 | "left_index2",
193 | "left_index3",
194 | "left_middle1",
195 | "left_middle2",
196 | "left_middle3",
197 | "left_pinky1",
198 | "left_pinky2",
199 | "left_pinky3",
200 | "left_ring1",
201 | "left_ring2",
202 | "left_ring3",
203 | "left_thumb1",
204 | "left_thumb2",
205 | "left_thumb3",
206 | "right_index1",
207 | "right_index2",
208 | "right_index3",
209 | "right_middle1",
210 | "right_middle2",
211 | "right_middle3",
212 | "right_pinky1",
213 | "right_pinky2",
214 | "right_pinky3",
215 | "right_ring1",
216 | "right_ring2",
217 | "right_ring3",
218 | "right_thumb1",
219 | "right_thumb2",
220 | "right_thumb3",
221 | "nose",
222 | "right_eye",
223 | "left_eye",
224 | "right_ear",
225 | "left_ear",
226 | "left_big_toe",
227 | "left_small_toe",
228 | "left_heel",
229 | "right_big_toe",
230 | "right_small_toe",
231 | "right_heel",
232 | "left_thumb",
233 | "left_index",
234 | "left_middle",
235 | "left_ring",
236 | "left_pinky",
237 | "right_thumb",
238 | "right_index",
239 | "right_middle",
240 | "right_ring",
241 | "right_pinky",
242 | ]
243 |
244 | SMPL_JOINT_NAMES = [
245 | "pelvis",
246 | "left_hip",
247 | "right_hip",
248 | "spine1",
249 | "left_knee",
250 | "right_knee",
251 | "spine2",
252 | "left_ankle",
253 | "right_ankle",
254 | "spine3",
255 | "left_foot",
256 | "right_foot",
257 | "neck",
258 | "left_collar",
259 | "right_collar",
260 | "head",
261 | "left_shoulder",
262 | "right_shoulder",
263 | "left_elbow",
264 | "right_elbow",
265 | "left_wrist",
266 | "right_wrist",
267 | "left_hand",
268 | "right_hand",
269 | ]
270 |
271 |
272 | class Body:
273 | """
274 | Class for storing a single body pose.
275 | """
276 |
277 | def __init__(self, joints, joint_names):
278 | assert joints.ndim > 1
279 | assert joints.shape[0] == len(joint_names)
280 | self.joints = {}
281 | for i, j in enumerate(joint_names):
282 | self.joints[j] = joints[i]
283 |
284 | @staticmethod
285 | def from_smpl(joints):
286 | """
287 | Create a Body object from SMPL joints.
288 | """
289 | return Body(joints, SMPL_JOINT_NAMES)
290 |
291 | @staticmethod
292 | def from_smplh(joints):
293 | """
294 | Create a Body object from SMPLH joints.
295 | """
296 | return Body(joints, SMPLH_JOINT_NAMES)
297 |
298 | def _as(self, joint_names):
299 | """
300 | Return a Body object with the specified joint names.
301 | """
302 | joint_list = []
303 | for j in joint_names:
304 | if j not in self.joints:
305 | joint_list.append(np.zeros_like(self.joints["spine1"]))
306 | else:
307 | joint_list.append(self.joints[j])
308 | return np.stack(joint_list, axis=0)
309 |
310 | def as_smpl(self):
311 | """
312 | Convert the body to SMPL joints.
313 | """
314 | return self._as(SMPL_JOINT_NAMES)
315 |
316 | def as_smplh(self):
317 | """
318 | Convert the body to SMPLH joints.
319 | """
320 | return self._as(SMPLH_JOINT_NAMES)
321 |
--------------------------------------------------------------------------------
/deform/smplx_exavatar/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | from typing import NewType, Union, Optional
18 | from dataclasses import dataclass, asdict, fields
19 | import numpy as np
20 | import torch
21 |
22 | Tensor = NewType('Tensor', torch.Tensor)
23 | Array = NewType('Array', np.ndarray)
24 |
25 |
26 | @dataclass
27 | class ModelOutput:
28 | vertices: Optional[Tensor] = None
29 | joints: Optional[Tensor] = None
30 | full_pose: Optional[Tensor] = None
31 | global_orient: Optional[Tensor] = None
32 | transl: Optional[Tensor] = None
33 | v_shaped: Optional[Tensor] = None
34 |
35 | def __getitem__(self, key):
36 | return getattr(self, key)
37 |
38 | def get(self, key, default=None):
39 | return getattr(self, key, default)
40 |
41 | def __iter__(self):
42 | return self.keys()
43 |
44 | def keys(self):
45 | keys = [t.name for t in fields(self)]
46 | return iter(keys)
47 |
48 | def values(self):
49 | values = [getattr(self, t.name) for t in fields(self)]
50 | return iter(values)
51 |
52 | def items(self):
53 | data = [(t.name, getattr(self, t.name)) for t in fields(self)]
54 | return iter(data)
55 |
56 |
57 | @dataclass
58 | class SMPLOutput(ModelOutput):
59 | betas: Optional[Tensor] = None
60 | body_pose: Optional[Tensor] = None
61 |
62 |
63 | @dataclass
64 | class SMPLHOutput(SMPLOutput):
65 | left_hand_pose: Optional[Tensor] = None
66 | right_hand_pose: Optional[Tensor] = None
67 | transl: Optional[Tensor] = None
68 |
69 |
70 | @dataclass
71 | class SMPLXOutput(SMPLHOutput):
72 | expression: Optional[Tensor] = None
73 | jaw_pose: Optional[Tensor] = None
74 |
75 |
76 | @dataclass
77 | class MANOOutput(ModelOutput):
78 | betas: Optional[Tensor] = None
79 | hand_pose: Optional[Tensor] = None
80 |
81 |
82 | @dataclass
83 | class FLAMEOutput(ModelOutput):
84 | betas: Optional[Tensor] = None
85 | expression: Optional[Tensor] = None
86 | jaw_pose: Optional[Tensor] = None
87 | neck_pose: Optional[Tensor] = None
88 |
89 |
90 | def find_joint_kin_chain(joint_id, kinematic_tree):
91 | kin_chain = []
92 | curr_idx = joint_id
93 | while curr_idx != -1:
94 | kin_chain.append(curr_idx)
95 | curr_idx = kinematic_tree[curr_idx]
96 | return kin_chain
97 |
98 |
99 | def to_tensor(
100 | array: Union[Array, Tensor], dtype=torch.float32
101 | ) -> Tensor:
102 | if torch.is_tensor(array):
103 | return array
104 | else:
105 | return torch.tensor(array, dtype=dtype)
106 |
107 |
108 | class Struct(object):
109 | def __init__(self, **kwargs):
110 | for key, val in kwargs.items():
111 | setattr(self, key, val)
112 |
113 |
114 | def to_np(array, dtype=np.float32):
115 | if 'scipy.sparse' in str(type(array)):
116 | array = array.todense()
117 | return np.array(array, dtype=dtype)
118 |
119 |
120 | def rot_mat_to_euler(rot_mats):
121 | # Calculates rotation matrix to euler angles
122 | # Careful for extreme cases of eular angles like [0.0, pi, 0.0]
123 |
124 | sy = torch.sqrt(rot_mats[:, 0, 0] * rot_mats[:, 0, 0] +
125 | rot_mats[:, 1, 0] * rot_mats[:, 1, 0])
126 | return torch.atan2(-rot_mats[:, 2, 0], sy)
127 |
--------------------------------------------------------------------------------
/deform/smplx_exavatar/vertex_ids.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | from __future__ import print_function
18 | from __future__ import absolute_import
19 | from __future__ import division
20 |
21 | # Joint name to vertex mapping. SMPL/SMPL-H/SMPL-X vertices that correspond to
22 | # MSCOCO and OpenPose joints
23 | vertex_ids = {
24 | 'smplh': {
25 | 'nose': 332,
26 | 'reye': 6260,
27 | 'leye': 2800,
28 | 'rear': 4071,
29 | 'lear': 583,
30 | 'rthumb': 6191,
31 | 'rindex': 5782,
32 | 'rmiddle': 5905,
33 | 'rring': 6016,
34 | 'rpinky': 6133,
35 | 'lthumb': 2746,
36 | 'lindex': 2319,
37 | 'lmiddle': 2445,
38 | 'lring': 2556,
39 | 'lpinky': 2673,
40 | 'LBigToe': 3216,
41 | 'LSmallToe': 3226,
42 | 'LHeel': 3387,
43 | 'RBigToe': 6617,
44 | 'RSmallToe': 6624,
45 | 'RHeel': 6787
46 | },
47 | 'smplx': {
48 | 'nose': 9120,
49 | 'reye': 9929,
50 | 'leye': 9448,
51 | 'rear': 616,
52 | 'lear': 6,
53 | 'rthumb': 8079,
54 | 'rindex': 7669,
55 | 'rmiddle': 7794,
56 | 'rring': 7905,
57 | 'rpinky': 8022,
58 | 'lthumb': 5361,
59 | 'lindex': 4933,
60 | 'lmiddle': 5058,
61 | 'lring': 5169,
62 | 'lpinky': 5286,
63 | 'LBigToe': 5770,
64 | 'LSmallToe': 5780,
65 | 'LHeel': 8846,
66 | 'RBigToe': 8463,
67 | 'RSmallToe': 8474,
68 | 'RHeel': 8635
69 | },
70 | 'mano': {
71 | 'thumb': 744,
72 | 'index': 320,
73 | 'middle': 443,
74 | 'ring': 554,
75 | 'pinky': 671,
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/deform/smplx_exavatar/vertex_joint_selector.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
4 | # holder of all proprietary rights on this computer program.
5 | # You can only use this computer program if you have closed
6 | # a license agreement with MPG or you get the right to use the computer
7 | # program from someone who is authorized to grant you that right.
8 | # Any use of the computer program without a valid license is prohibited and
9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 |
17 | from __future__ import absolute_import
18 | from __future__ import print_function
19 | from __future__ import division
20 |
21 | import numpy as np
22 |
23 | import torch
24 | import torch.nn as nn
25 |
26 | from .utils import to_tensor
27 |
28 |
29 | class VertexJointSelector(nn.Module):
30 |
31 | def __init__(self, vertex_ids=None,
32 | use_hands=True,
33 | use_feet_keypoints=True, **kwargs):
34 | super(VertexJointSelector, self).__init__()
35 |
36 | extra_joints_idxs = []
37 |
38 | face_keyp_idxs = np.array([
39 | vertex_ids['nose'],
40 | vertex_ids['reye'],
41 | vertex_ids['leye'],
42 | vertex_ids['rear'],
43 | vertex_ids['lear']], dtype=np.int64)
44 |
45 | extra_joints_idxs = np.concatenate([extra_joints_idxs,
46 | face_keyp_idxs])
47 |
48 | if use_feet_keypoints:
49 | feet_keyp_idxs = np.array([vertex_ids['LBigToe'],
50 | vertex_ids['LSmallToe'],
51 | vertex_ids['LHeel'],
52 | vertex_ids['RBigToe'],
53 | vertex_ids['RSmallToe'],
54 | vertex_ids['RHeel']], dtype=np.int32)
55 |
56 | extra_joints_idxs = np.concatenate(
57 | [extra_joints_idxs, feet_keyp_idxs])
58 |
59 | if use_hands:
60 | self.tip_names = ['thumb', 'index', 'middle', 'ring', 'pinky']
61 |
62 | tips_idxs = []
63 | for hand_id in ['l', 'r']:
64 | for tip_name in self.tip_names:
65 | tips_idxs.append(vertex_ids[hand_id + tip_name])
66 |
67 | extra_joints_idxs = np.concatenate(
68 | [extra_joints_idxs, tips_idxs])
69 |
70 | self.register_buffer('extra_joints_idxs',
71 | to_tensor(extra_joints_idxs, dtype=torch.long))
72 |
73 | def forward(self, vertices, joints):
74 | extra_joints = torch.index_select(vertices, 1, self.extra_joints_idxs.to(torch.long)) #The '.to(torch.long)'.
75 | # added to make the trace work in c++,
76 | # otherwise you get a runtime error in c++:
77 | # 'index_select(): Expected dtype int32 or int64 for index'
78 | joints = torch.cat([joints, extra_joints], dim=1)
79 |
80 | return joints
81 |
--------------------------------------------------------------------------------
/denoiser/__pycache__/denoiser.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/denoiser/__pycache__/denoiser.cpython-38.pyc
--------------------------------------------------------------------------------
/denoiser/denoiser.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch
4 | import numpy as np
5 | import math
6 |
7 | from render import util
8 | if "TWOSIDED_TEXTURE" not in os.environ or os.environ["TWOSIDED_TEXTURE"] == "True":
9 | from render import optixutils as ou
10 | else:
11 | from render import optixutils_single_sided as ou
12 |
13 |
14 | ###############################################################################
15 | # Bilateral denoiser
16 | #
17 | # Loosely based on SVGF, but removing temporal components and variance stopping guides.
18 | # https://research.nvidia.com/publication/2017-07_spatiotemporal-variance-guided-filtering-real-time-reconstruction-path-traced
19 | ###############################################################################
20 |
21 | class BilateralDenoiser(torch.nn.Module):
22 | def __init__(self, influence=1.0):
23 | super(BilateralDenoiser, self).__init__()
24 | self.set_influence(influence)
25 |
26 | def set_influence(self, factor):
27 | self.sigma = max(factor * 2, 0.0001)
28 | self.variance = self.sigma**2.
29 | self.N = 2 * math.ceil(self.sigma * 2.5) + 1
30 |
31 | def forward(self, input):
32 | col = input[..., 0:3]
33 | nrm = util.safe_normalize(input[..., 3:6]) # Bent normals can produce normals of length < 1 here
34 | zdz = input[..., 6:8]
35 | return ou.bilateral_denoiser(col, nrm, zdz, self.sigma)
36 |
--------------------------------------------------------------------------------
/figs/pipe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/figs/pipe.jpg
--------------------------------------------------------------------------------
/geometry/__pycache__/embedding.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/embedding.cpython-38.pyc
--------------------------------------------------------------------------------
/geometry/__pycache__/gshell_tets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/gshell_tets.cpython-38.pyc
--------------------------------------------------------------------------------
/geometry/__pycache__/hmsdf.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/hmsdf.cpython-38.pyc
--------------------------------------------------------------------------------
/geometry/__pycache__/hmsdf_tets_split.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/hmsdf_tets_split.cpython-38.pyc
--------------------------------------------------------------------------------
/geometry/__pycache__/mlp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/mlp.cpython-38.pyc
--------------------------------------------------------------------------------
/geometry/embedding.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | class Embedding(nn.Module):
5 | def __init__(self, in_channels, N_freqs, logscale=True):
6 | """
7 | Defines a function that embeds x to (x, sin(2^k x), cos(2^k x), ...)
8 | in_channels: number of input channels (3 for both xyz and direction)
9 | """
10 | super(Embedding, self).__init__()
11 | self.N_freqs = N_freqs
12 | self.in_channels = in_channels
13 | self.funcs = [torch.sin, torch.cos]
14 | self.out_channels = in_channels*(len(self.funcs)*N_freqs+1)
15 |
16 | if logscale:
17 | self.freq_bands = 2**torch.linspace(0, N_freqs-1, N_freqs)
18 | else:
19 | self.freq_bands = torch.linspace(1, 2**(N_freqs-1), N_freqs)
20 |
21 | def forward(self, x):
22 | """
23 | Embeds x to (x, sin(2^k x), cos(2^k x), ...)
24 | Different from the paper, "x" is also in the output
25 | See https://github.com/bmild/nerf/issues/12
26 |
27 | Inputs:
28 | x: (B, self.in_channels)
29 |
30 | Outputs:
31 | out: (B, self.out_channels)
32 | """
33 | out = [x]
34 | for freq in self.freq_bands:
35 | for func in self.funcs:
36 | out += [func(freq*x)]
37 |
38 | return torch.cat(out, -1)
39 |
40 |
--------------------------------------------------------------------------------
/lap_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def compute_body_laplacian(self):
5 | edges_packed = self._body_edges.clone().detach()
6 | body_verts = self.get_xyz[:self.body_verts_num]
7 | V = body_verts.shape[0]
8 |
9 | e0, e1 = edges_packed.unbind(1)
10 |
11 | idx01 = torch.stack([e0, e1], dim=1) # (sum(E_n), 2)
12 | idx10 = torch.stack([e1, e0], dim=1) # (sum(E_n), 2)
13 | idx = torch.cat([idx01, idx10], dim=0).t() # (2, 2*sum(E_n))
14 |
15 | # First, we construct the adjacency matrix,
16 | # i.e. A[i, j] = 1 if (i,j) is an edge, or
17 | # A[e0, e1] = 1 & A[e1, e0] = 1
18 | ones = torch.ones(idx.shape[1], dtype=torch.float32, device=self._xyz.device)
19 | A = torch.sparse.FloatTensor(idx, ones, (V, V))
20 |
21 | # the sum of i-th row of A gives the degree of the i-th vertex
22 | deg = torch.sparse.sum(A, dim=1).to_dense()
23 |
24 | # We construct the Laplacian matrix by adding the non diagonal values
25 | # i.e. L[i, j] = 1 ./ deg(i) if (i, j) is an edge
26 | deg0 = deg[e0]
27 | deg0 = torch.where(deg0 > 0.0, 1.0 / deg0, deg0)
28 | deg1 = deg[e1]
29 | deg1 = torch.where(deg1 > 0.0, 1.0 / deg1, deg1)
30 | val = torch.cat([deg0, deg1])
31 | L = torch.sparse.FloatTensor(idx, val, (V, V))
32 |
33 | # Then we add the diagonal values L[i, i] = -1.
34 | idx = torch.arange(V, device=self._xyz.device)
35 | idx = torch.stack([idx, idx], dim=0)
36 | ones = torch.ones(idx.shape[1], dtype=torch.float32, device=self._xyz.device)
37 | L -= torch.sparse.FloatTensor(idx, ones, (V, V))
38 | self.body_laplacian = L
39 |
40 | def body_laplacian_loss(mesh):
41 |
42 | L = mesh.laplacian
43 | V = mesh.v_pos
44 |
45 | loss = L.mm(V)
46 | loss = loss.norm(dim=1)**2
47 | return loss.mean()
48 |
49 |
50 | def body_normal_loss(mesh):
51 |
52 |
53 | # loss = 1 - torch.cosine_similarity(mesh.face_normals[mesh.connected_faces[:, 0]], mesh.face_normals[mesh.connected_faces[:, 1]], dim=1)
54 |
55 | return mesh.normal_consistency()
56 |
57 | def find_edges(indices, remove_duplicates=True):
58 | # Extract the three edges (in terms of vertex indices) for each face
59 | # edges_0 = [f0_e0, ..., fN_e0]
60 | # edges_1 = [f0_e1, ..., fN_e1]
61 | # edges_2 = [f0_e2, ..., fN_e2]
62 | edges_0 = torch.index_select(indices, 1, torch.tensor([0,1], device=indices.device))
63 | edges_1 = torch.index_select(indices, 1, torch.tensor([1,2], device=indices.device))
64 | edges_2 = torch.index_select(indices, 1, torch.tensor([2,0], device=indices.device))
65 |
66 | # Merge the into one tensor so that the three edges of one face appear sequentially
67 | # edges = [f0_e0, f0_e1, f0_e2, ..., fN_e0, fN_e1, fN_e2]
68 | edges = torch.cat([edges_0, edges_1, edges_2], dim=1).view(indices.shape[0] * 3, -1)
69 |
70 | if remove_duplicates:
71 | edges, _ = torch.sort(edges, dim=1)
72 | edges = torch.unique(edges, dim=0)
73 |
74 | return edges
75 |
76 | def find_connected_faces(indices):
77 | edges = find_edges(indices, remove_duplicates=False)
78 |
79 | # Make sure that two edges that share the same vertices have the vertex ids appear in the same order
80 | edges, _ = torch.sort(edges, dim=1)
81 |
82 | # Now find edges that share the same vertices and make sure there are only manifold edges
83 | _, inverse_indices, counts = torch.unique(edges, dim=0, sorted=False, return_inverse=True, return_counts=True)
84 |
85 | # print("counts.max():", counts.max())
86 | assert counts.max() == 2
87 |
88 | # We now create a tensor that contains corresponding faces.
89 | # If the faces with ids fi and fj share the same edge, the tensor contains them as
90 | # [..., [fi, fj], ...]
91 | face_ids = torch.arange(indices.shape[0])
92 | face_ids = torch.repeat_interleave(face_ids, 3, dim=0) # Tensor with the face id for each edge
93 |
94 | face_correspondences = torch.zeros((counts.shape[0], 2), dtype=torch.int64)
95 | face_correspondences_indices = torch.zeros(counts.shape[0], dtype=torch.int64)
96 |
97 | # ei = edge index
98 | for ei, ei_unique in enumerate(list(inverse_indices.cpu().numpy())):
99 | face_correspondences[ei_unique, face_correspondences_indices[ei_unique]] = face_ids[ei]
100 | face_correspondences_indices[ei_unique] += 1
101 |
102 | face_correspondences = face_correspondences.cuda()
103 |
104 | return face_correspondences[counts == 2].to(device=indices.device), edges
105 |
--------------------------------------------------------------------------------
/render/__pycache__/light.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/light.cpython-38.pyc
--------------------------------------------------------------------------------
/render/__pycache__/material.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/material.cpython-38.pyc
--------------------------------------------------------------------------------
/render/__pycache__/mesh.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/mesh.cpython-38.pyc
--------------------------------------------------------------------------------
/render/__pycache__/mlptexture.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/mlptexture.cpython-38.pyc
--------------------------------------------------------------------------------
/render/__pycache__/obj.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/obj.cpython-38.pyc
--------------------------------------------------------------------------------
/render/__pycache__/regularizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/regularizer.cpython-38.pyc
--------------------------------------------------------------------------------
/render/__pycache__/render.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/render.cpython-38.pyc
--------------------------------------------------------------------------------
/render/__pycache__/render_mask.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/render_mask.cpython-38.pyc
--------------------------------------------------------------------------------
/render/__pycache__/texture.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/texture.cpython-38.pyc
--------------------------------------------------------------------------------
/render/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/util.cpython-38.pyc
--------------------------------------------------------------------------------
/render/light.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import os
10 | import numpy as np
11 | import torch
12 | import nvdiffrast.torch as dr
13 |
14 | from . import util
15 | from . import renderutils as ru
16 |
17 | ######################################################################################
18 | # Monte-carlo sampled environment light with PDF / CDF computation
19 | ######################################################################################
20 |
21 | class EnvironmentLight:
22 | LIGHT_MIN_RES = 16
23 |
24 | MIN_ROUGHNESS = 0.08
25 | MAX_ROUGHNESS = 0.5
26 |
27 | def __init__(self, base):
28 | self.mtx = None
29 | self.base = base
30 |
31 | self.pdf_scale = (self.base.shape[0] * self.base.shape[1]) / (2 * np.pi * np.pi)
32 | self.update_pdf()
33 |
34 | def xfm(self, mtx):
35 | self.mtx = mtx
36 |
37 | def parameters(self):
38 | return [self.base]
39 |
40 | def clone(self):
41 | return EnvironmentLight(self.base.clone().detach())
42 |
43 | def clamp_(self, min=None, max=None):
44 | self.base.clamp_(min, max)
45 |
46 | def update_pdf(self):
47 | with torch.no_grad():
48 | # Compute PDF
49 | Y = util.pixel_grid(self.base.shape[1], self.base.shape[0])[..., 1]
50 | self._pdf = torch.max(self.base, dim=-1)[0] * torch.sin(Y * np.pi) # Scale by sin(theta) for lat-long, https://cs184.eecs.berkeley.edu/sp18/article/25
51 | self._pdf = self._pdf / torch.sum(self._pdf)
52 |
53 | # Compute cumulative sums over the columns and rows
54 | self.cols = torch.cumsum(self._pdf, dim=1)
55 | self.rows = torch.cumsum(self.cols[:, -1:].repeat([1, self.cols.shape[1]]), dim=0)
56 |
57 | # Normalize
58 | self.cols = self.cols / torch.where(self.cols[:, -1:] > 0, self.cols[:, -1:], torch.ones_like(self.cols))
59 | self.rows = self.rows / torch.where(self.rows[-1:, :] > 0, self.rows[-1:, :], torch.ones_like(self.rows))
60 |
61 | @torch.no_grad()
62 | def generate_image(self, res):
63 | texcoord = util.pixel_grid(res[1], res[0])
64 | return dr.texture(self.base[None, ...].contiguous(), texcoord[None, ...].contiguous(), filter_mode='linear')[0]
65 |
66 | ######################################################################################
67 | # Load and store
68 | ######################################################################################
69 |
70 | @torch.no_grad()
71 | def _load_env_hdr(fn, scale=1.0, res=None, trainable=False):
72 | latlong_img = torch.tensor(util.load_image(fn), dtype=torch.float32, device='cuda')*scale
73 |
74 | if res is not None:
75 | texcoord = util.pixel_grid(res[1], res[0])
76 | latlong_img = torch.clamp(dr.texture(latlong_img[None, ...], texcoord[None, ...], filter_mode='linear')[0], min=0.0001)
77 |
78 | print("EnvProbe,", latlong_img.shape, ", min/max", torch.min(latlong_img).item(), torch.max(latlong_img).item())
79 | if trainable:
80 | print("trainable light loaded")
81 | return EnvironmentLight(base=latlong_img.clone().detach().requires_grad_(True))
82 | else:
83 | return EnvironmentLight(base=latlong_img)
84 |
85 | @torch.no_grad()
86 | def load_env(fn, scale=1.0, res=None, trainable=False):
87 | if os.path.splitext(fn)[1].lower() == ".hdr":
88 | return _load_env_hdr(fn, scale, res, trainable=trainable)
89 | else:
90 | assert False, "Unknown envlight extension %s" % os.path.splitext(fn)[1]
91 |
92 | @torch.no_grad()
93 | def save_env_map(fn, light):
94 | assert isinstance(light, EnvironmentLight)
95 | color = light.generate_image([512, 1024])
96 | util.save_image_raw(fn, color.detach().cpu().numpy())
97 |
98 | ######################################################################################
99 | # Create trainable with random initialization
100 | ######################################################################################
101 |
102 | def create_trainable_env_rnd(base_res, scale=0.5, bias=0.25):
103 | base = torch.rand(base_res, base_res, 3, dtype=torch.float32, device='cuda') * scale + bias
104 | l = EnvironmentLight(base.clone().detach().requires_grad_(True))
105 | return l
106 |
--------------------------------------------------------------------------------
/render/material.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import os
10 | import numpy as np
11 | import torch
12 |
13 | from . import util
14 | from . import texture
15 | from . import mlptexture
16 |
17 | ######################################################################################
18 | # .mtl material format loading / storing
19 | ######################################################################################
20 |
21 | def load_mtl(fn, clear_ks=True):
22 | import re
23 | mtl_path = os.path.dirname(fn)
24 |
25 | # Read file
26 | with open(fn, 'r') as f:
27 | lines = f.readlines()
28 |
29 | # Parse materials
30 | materials = []
31 | for line in lines:
32 | split_line = re.split(' +|\t+|\n+', line.strip())
33 | prefix = split_line[0].lower()
34 | data = split_line[1:]
35 | if 'newmtl' in prefix:
36 | material = {'name' : data[0]}
37 | materials += [material]
38 | elif materials:
39 | if 'bsdf' in prefix or 'map_kd' in prefix or 'map_ks' in prefix or 'bump' in prefix:
40 | material[prefix] = data[0]
41 | else:
42 | material[prefix] = torch.tensor(tuple(float(d) for d in data), dtype=torch.float32, device='cuda')
43 |
44 | # Convert everything to textures. Our code expects 'kd' and 'ks' to be texture maps. So replace constants with 1x1 maps
45 | for mat in materials:
46 | if not 'bsdf' in mat:
47 | mat['bsdf'] = 'pbr'
48 |
49 | if 'map_kd' in mat:
50 | mat['kd'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_kd']))
51 | else:
52 | mat['kd'] = texture.Texture2D(mat['kd'])
53 |
54 | if 'map_ks' in mat:
55 | mat['ks'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_ks']), channels=3)
56 | else:
57 | mat['ks'] = texture.Texture2D(mat['ks'])
58 |
59 | if 'bump' in mat:
60 | mat['normal'] = texture.load_texture2D(os.path.join(mtl_path, mat['bump']), lambda_fn=lambda x: x * 2 - 1, channels=3)
61 |
62 | # Convert Kd from sRGB to linear RGB
63 | mat['kd'] = texture.srgb_to_rgb(mat['kd'])
64 |
65 | if clear_ks:
66 | # Override ORM occlusion (red) channel by zeros. We hijack this channel
67 | for mip in mat['ks'].getMips():
68 | mip[..., 0] = 0.0
69 |
70 | return materials
71 |
72 | def save_mtl(fn, material):
73 | folder = os.path.dirname(fn)
74 | with open(fn, "w") as f:
75 | f.write('newmtl defaultMat\n')
76 | if material is not None:
77 | f.write('bsdf %s\n' % material['bsdf'])
78 | if 'kd' in material.keys():
79 | f.write('map_Kd texture_kd.png\n')
80 | texture.save_texture2D(os.path.join(folder, 'texture_kd.png'), texture.rgb_to_srgb(material['kd']))
81 | if 'ks' in material.keys():
82 | f.write('map_Ks texture_ks.png\n')
83 | texture.save_texture2D(os.path.join(folder, 'texture_ks.png'), material['ks'])
84 | if 'normal' in material.keys():
85 | f.write('bump texture_n.png\n')
86 | texture.save_texture2D(os.path.join(folder, 'texture_n.png'), material['normal'], lambda_fn=lambda x:(util.safe_normalize(x)+1)*0.5)
87 | else:
88 | f.write('Kd 1 1 1\n')
89 | f.write('Ks 0 0 0\n')
90 | f.write('Ka 0 0 0\n')
91 | f.write('Tf 1 1 1\n')
92 | f.write('Ni 1\n')
93 | f.write('Ns 0\n')
94 |
95 | ######################################################################################
96 | # Utility function to convert an existing material and make all textures trainable
97 | ######################################################################################
98 |
99 | def create_trainable(material):
100 | result = material.copy()
101 | for key, val in result.items():
102 | if isinstance(val, texture.Texture2D):
103 | result[key] = texture.create_trainable(val)
104 | return result
105 |
106 | def get_parameters(material):
107 | trainable = []
108 | for key, val in material.items():
109 | if isinstance(val, texture.Texture2D) or isinstance(val, mlptexture.MLPTexture3D):
110 | trainable += val.parameters()
111 | return trainable
112 |
113 | ######################################################################################
114 | # Merge multiple materials into a single uber-material
115 | ######################################################################################
116 |
117 | def _upscale_replicate(x, full_res):
118 | x = x.permute(0, 3, 1, 2)
119 | x = torch.nn.functional.pad(x, (0, full_res[1] - x.shape[3], 0, full_res[0] - x.shape[2]), 'replicate')
120 | return x.permute(0, 2, 3, 1).contiguous()
121 |
122 | def merge_materials(materials, texcoords, tfaces, mfaces):
123 | assert len(materials) > 0
124 | for mat in materials:
125 | assert mat['bsdf'] == materials[0]['bsdf'], "All materials must have the same BSDF (uber shader)"
126 | assert ('normal' in mat) is ('normal' in materials[0]), "All materials must have either normal map enabled or disabled"
127 |
128 | uber_material = {
129 | 'name' : 'uber_material',
130 | 'bsdf' : materials[0]['bsdf'],
131 | }
132 |
133 | textures = ['kd', 'ks', 'normal']
134 |
135 | # Find maximum texture resolution across all materials and textures
136 | max_res = None
137 | for mat in materials:
138 | for tex in textures:
139 | tex_res = np.array(mat[tex].getRes()) if tex in mat else np.array([1, 1])
140 | max_res = np.maximum(max_res, tex_res) if max_res is not None else tex_res
141 |
142 | # Compute size of compund texture and round up to nearest PoT
143 | full_res = 2**np.ceil(np.log2(max_res * np.array([1, len(materials)]))).astype(np.int)
144 |
145 | # Normalize texture resolution across all materials & combine into a single large texture
146 | for tex in textures:
147 | if tex in materials[0]:
148 | tex_data = torch.cat(tuple(util.scale_img_nhwc(mat[tex].data, tuple(max_res)) for mat in materials), dim=2) # Lay out all textures horizontally, NHWC so dim2 is x
149 | tex_data = _upscale_replicate(tex_data, full_res)
150 | uber_material[tex] = texture.Texture2D(tex_data)
151 |
152 | # Compute scaling values for used / unused texture area
153 | s_coeff = [full_res[0] / max_res[0], full_res[1] / max_res[1]]
154 |
155 | # Recompute texture coordinates to cooincide with new composite texture
156 | new_tverts = {}
157 | new_tverts_data = []
158 | for fi in range(len(tfaces)):
159 | matIdx = mfaces[fi]
160 | for vi in range(3):
161 | ti = tfaces[fi][vi]
162 | if not (ti in new_tverts):
163 | new_tverts[ti] = {}
164 | if not (matIdx in new_tverts[ti]): # create new vertex
165 | new_tverts_data.append([(matIdx + texcoords[ti][0]) / s_coeff[1], texcoords[ti][1] / s_coeff[0]]) # Offset texture coodrinate (x direction) by material id & scale to local space. Note, texcoords are (u,v) but texture is stored (w,h) so the indexes swap here
166 | new_tverts[ti][matIdx] = len(new_tverts_data) - 1
167 | tfaces[fi][vi] = new_tverts[ti][matIdx] # reindex vertex
168 |
169 | return uber_material, new_tverts_data, tfaces
170 |
--------------------------------------------------------------------------------
/render/mlptexture.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
4 | # property and proprietary rights in and to this material, related
5 | # documentation and any modifications thereto. Any use, reproduction,
6 | # disclosure or distribution of this material and related documentation
7 | # without an express license agreement from NVIDIA CORPORATION or
8 | # its affiliates is strictly prohibited.
9 |
10 | import torch
11 | import tinycudann as tcnn
12 | import numpy as np
13 |
14 | #######################################################################################################################################################
15 | # Small MLP using PyTorch primitives, internal helper class
16 | #######################################################################################################################################################
17 |
18 | class _MLP(torch.nn.Module):
19 | def __init__(self, cfg, loss_scale=1.0):
20 | super(_MLP, self).__init__()
21 | self.loss_scale = loss_scale
22 | net = (torch.nn.Linear(cfg['n_input_dims'], cfg['n_neurons'], bias=False), torch.nn.ReLU())
23 | for i in range(cfg['n_hidden_layers']-1):
24 | net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_neurons'], bias=False), torch.nn.ReLU())
25 | net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_output_dims'], bias=False),)
26 | self.net = torch.nn.Sequential(*net).cuda()
27 |
28 | self.net.apply(self._init_weights)
29 |
30 | if self.loss_scale != 1.0:
31 | self.net.register_full_backward_hook(lambda module, grad_i, grad_o: (grad_i[0] * self.loss_scale, ))
32 |
33 | def forward(self, x):
34 | return self.net(x.to(torch.float32))
35 |
36 | @staticmethod
37 | def _init_weights(m):
38 | if type(m) == torch.nn.Linear:
39 | torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
40 | if hasattr(m.bias, 'data'):
41 | m.bias.data.fill_(0.0)
42 |
43 | ############################################
44 |
45 |
46 |
47 | #######################################################################################################################################################
48 | # Outward visible MLP class
49 | #######################################################################################################################################################
50 |
51 | class MLPTexture3D(torch.nn.Module):
52 | def __init__(self, AABB, channels = 3, internal_dims = 32, hidden = 2, min_max = None, use_float16=False):
53 | super(MLPTexture3D, self).__init__()
54 |
55 | self.channels = channels
56 | self.internal_dims = internal_dims
57 | self.AABB = AABB
58 | self.min_max = min_max
59 | self.use_float16 = use_float16
60 |
61 | # Setup positional encoding, see https://github.com/NVlabs/tiny-cuda-nn for details
62 | desired_resolution = 4096
63 | base_grid_resolution = 16
64 | num_levels = 16
65 | per_level_scale = np.exp(np.log(desired_resolution / base_grid_resolution) / (num_levels-1))
66 |
67 |
68 | enc_cfg = {
69 | "otype": "HashGrid",
70 | "n_levels": 5, # 16
71 | "n_features_per_level": 2, #2
72 | "log2_hashmap_size": 21, # 21
73 | "base_resolution": base_grid_resolution, # 16
74 | "per_level_scale" : per_level_scale # 1.4472692374403782
75 | }
76 |
77 |
78 | gradient_scaling = 128.0
79 | self.encoder = tcnn.Encoding(3, enc_cfg)
80 |
81 | # Setup MLP
82 | mlp_cfg = {
83 | "n_input_dims" : self.encoder.n_output_dims,
84 | "n_output_dims" : self.channels,
85 | "n_hidden_layers" : hidden,
86 | "n_neurons" : self.internal_dims
87 | }
88 | self.net = _MLP(mlp_cfg, gradient_scaling)
89 | print("Encoder output: %d dims" % (self.encoder.n_output_dims))
90 |
91 | def sample(self, texc, frame_id):
92 |
93 | ###################################
94 | bbox = torch.tensor([0.6, 0.6, 0.2]).cuda(), torch.tensor([-0.8, -1.2, -0.2]).cuda()
95 | _texc = (texc.view(-1, 3) - bbox[0][None, ...]) / (bbox[1][None, ...] - bbox[0][None, ...])
96 | _texc = torch.clamp(_texc, min=0, max=1)
97 |
98 | p_enc = self.encoder(_texc.contiguous())
99 |
100 | with torch.autocast('cuda', dtype=torch.float16, enabled=self.use_float16):
101 | out = self.net.forward(p_enc)
102 |
103 | # Sigmoid limit and scale to the allowed range
104 | out = torch.sigmoid(out) * (self.min_max[1][None, :] - self.min_max[0][None, :]) + self.min_max[0][None, :]
105 |
106 |
107 | return out.view(*texc.shape[:-1], self.channels) # Remap to [n, h, w, c]
108 |
109 |
110 | # In-place clamp with no derivative to make sure values are in valid range after training
111 | def clamp_(self):
112 | pass
113 |
114 | def cleanup(self):
115 | tcnn.free_temporary_memory()
116 |
117 |
118 |
119 | class MeshTexture3D(torch.nn.Module):
120 | def __init__(self, v):
121 | pass
122 |
123 | def __get_load_Texture3d(self, mesh, FLAGS):
124 |
125 | kd_min, kd_max = torch.tensor(FLAGS.kd_min, dtype=torch.float32, device='cuda'), torch.tensor(FLAGS.kd_max, dtype=torch.float32, device='cuda')
126 | ks_min, ks_max = torch.tensor(FLAGS.ks_min, dtype=torch.float32, device='cuda'), torch.tensor(FLAGS.ks_max, dtype=torch.float32, device='cuda')
127 |
128 | mlp_min = torch.cat((kd_min[0:3], ks_min), dim=0)
129 | mlp_max = torch.cat((kd_max[0:3], ks_max), dim=0)
130 |
131 | self.mlp_material = MLPTexture3D(mesh.getAABB(), channels=6, min_max=[mlp_min, mlp_max], use_float16=FLAGS.use_float16)
132 |
133 |
134 |
135 |
--------------------------------------------------------------------------------
/render/optixutils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | from .ops import OptiXContext, optix_build_bvh, optix_env_shade, bilateral_denoiser
10 | __all__ = ["OptiXContext", "optix_build_bvh", "optix_env_shade", 'bilateral_denoiser']
11 |
--------------------------------------------------------------------------------
/render/optixutils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/render/optixutils/__pycache__/ops.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/__pycache__/ops.cpython-38.pyc
--------------------------------------------------------------------------------
/render/optixutils/build/build.ninja:
--------------------------------------------------------------------------------
1 | ninja_required_version = 1.3
2 | cxx = c++
3 | nvcc = /usr/local/cuda/bin/nvcc
4 |
5 | cflags = -DTORCH_EXTENSION_NAME=optixutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/nas_data/chh/D3Human_main/render/optixutils/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -DNVDR_TORCH
6 | post_cflags =
7 | cuda_cflags = -DTORCH_EXTENSION_NAME=optixutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/nas_data/chh/D3Human_main/render/optixutils/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -DNVDR_TORCH -std=c++14
8 | cuda_post_cflags =
9 | cuda_dlink_post_cflags =
10 | ldflags = -shared -lcuda -lnvrtc -L/home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart
11 |
12 | rule compile
13 | command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
14 | depfile = $out.d
15 | deps = gcc
16 |
17 | rule cuda_compile
18 | depfile = $out.d
19 | deps = gcc
20 | command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags
21 |
22 |
23 |
24 | rule link
25 | command = $cxx $in $ldflags -o $out
26 |
27 | build denoising.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/optixutils/c_src/denoising.cu
28 | build optix_wrapper.o: compile /nas_data/chh/D3Human_main/render/optixutils/c_src/optix_wrapper.cpp
29 | build torch_bindings.o: compile /nas_data/chh/D3Human_main/render/optixutils/c_src/torch_bindings.cpp
30 |
31 |
32 |
33 | build optixutils_plugin.so: link denoising.cuda.o optix_wrapper.o torch_bindings.o
34 |
35 | default optixutils_plugin.so
36 |
37 |
--------------------------------------------------------------------------------
/render/optixutils/build/denoising.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/denoising.cuda.o
--------------------------------------------------------------------------------
/render/optixutils/build/optix_wrapper.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/optix_wrapper.o
--------------------------------------------------------------------------------
/render/optixutils/build/optixutils_plugin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/optixutils_plugin.so
--------------------------------------------------------------------------------
/render/optixutils/build/torch_bindings.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/torch_bindings.o
--------------------------------------------------------------------------------
/render/optixutils/c_src/common.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 |
11 | // Helper functions to do broadcast guarded fetches
12 | #if defined(__CUDACC__)
13 | template
14 | static __device__ inline float3 fetch3(const T &tensor, U idx, Args... args) {
15 | return tensor.size(0) == 1 ? fetch3(tensor[0], args...) : fetch3(tensor[idx], args...);
16 | }
17 | template static __device__ inline float3 fetch3(const T &tensor) {
18 | return tensor.size(0) == 1 ? make_float3(tensor[0], tensor[0], tensor[0]) : make_float3(tensor[0], tensor[1], tensor[2]);
19 | }
20 |
21 | template
22 | static __device__ inline float2 fetch2(const T &tensor, U idx, Args... args) {
23 | return tensor.size(0) == 1 ? fetch2(tensor[0], args...) : fetch2(tensor[idx], args...);
24 | }
25 | template static __device__ inline float2 fetch2(const T &tensor) {
26 | return tensor.size(0) == 1 ? make_float2(tensor[0], tensor[0]) : make_float2(tensor[0], tensor[1]);
27 | }
28 |
29 | #include "math_utils.h"
30 | #include "bsdf.h"
31 | #endif
32 |
33 | //------------------------------------------------------------------------------
34 | // CUDA error-checking macros
35 | //------------------------------------------------------------------------------
36 |
37 | #define CUDA_CHECK( call ) \
38 | do \
39 | { \
40 | cudaError_t error = call; \
41 | if( error != cudaSuccess ) \
42 | { \
43 | std::stringstream ss; \
44 | ss << "CUDA call (" << #call << " ) failed with error: '" \
45 | << cudaGetErrorString( error ) \
46 | << "' (" __FILE__ << ":" << __LINE__ << ")\n"; \
47 | } \
48 | } while( 0 )
49 |
50 |
51 | #define OPTIX_CHECK( call ) \
52 | do \
53 | { \
54 | OptixResult res = call; \
55 | if( res != OPTIX_SUCCESS ) \
56 | { \
57 | std::stringstream ss; \
58 | ss << "Optix call '" << #call << "' failed: " __FILE__ ":" \
59 | << __LINE__ << ")\n"; \
60 | } \
61 | } while( 0 )
62 |
63 | #define OPTIX_CHECK_LOG( call ) \
64 | do \
65 | { \
66 | OptixResult res = call; \
67 | const size_t sizeof_log_returned = sizeof_log; \
68 | sizeof_log = sizeof( log ); /* reset sizeof_log for future calls */ \
69 | if( res != OPTIX_SUCCESS ) \
70 | { \
71 | std::stringstream ss; \
72 | ss << "Optix call '" << #call << "' failed: " __FILE__ ":" \
73 | << __LINE__ << ")\nLog:\n" << log \
74 | << ( sizeof_log_returned > sizeof( log ) ? "" : "" ) \
75 | << "\n"; \
76 | } \
77 | } while( 0 )
78 |
79 | #define NVRTC_CHECK_ERROR( func ) \
80 | do \
81 | { \
82 | nvrtcResult code = func; \
83 | if( code != NVRTC_SUCCESS ) \
84 | throw std::runtime_error( "ERROR: " __FILE__ "(): " + std::string( nvrtcGetErrorString( code ) ) ); \
85 | } while( 0 )
86 |
--------------------------------------------------------------------------------
/render/optixutils/c_src/denoising.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "common.h"
10 | #include "denoising.h"
11 |
12 | #define FLT_EPS 0.0001f
13 |
14 | __global__ void bilateral_denoiser_fwd_kernel(BilateralDenoiserParams params)
15 | {
16 | uint3 idx = make_uint3(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z);
17 |
18 | if (idx.z >= params.col.size(0) || idx.y >= params.col.size(1) || idx.x >= params.col.size(2))
19 | return;
20 |
21 | // Fetch central tap
22 | float3 c_nrm = fetch3(params.nrm, idx.z, idx.y, idx.x);
23 | float2 c_zdz = fetch2(params.zdz, idx.z, idx.y, idx.x);
24 |
25 | float variance = params.sigma * params.sigma;
26 | int filter_rad = 2 * ceil(params.sigma * 2.5) + 1;
27 |
28 | float accum_w = 0.0f;
29 | float3 accum_col = make_float3(0.0f);
30 | for (int32_t fy = -filter_rad; fy <= filter_rad; ++fy)
31 | {
32 | for (int32_t fx = -filter_rad; fx <= filter_rad; ++fx)
33 | {
34 | // Compute tap coordinates, used for input activations and bilateral guides
35 | int32_t y = idx.y + fy;
36 | int32_t x = idx.x + fx;
37 |
38 | if (y < 0 || x < 0 || y >= params.col.size(1) || x >= params.col.size(2))
39 | continue;
40 |
41 | // Fetch current tap
42 | float3 t_col = fetch3(params.col, idx.z, y, x);
43 | float3 t_nrm = fetch3(params.nrm, idx.z, y, x);
44 | float2 t_zdz = fetch2(params.zdz, idx.z, y, x);
45 |
46 | /////////////////////////////////////////////////////////
47 | // Compute bilateral weight
48 | /////////////////////////////////////////////////////////
49 |
50 | // Distance
51 | float dist_sqr = fx * fx + fy * fy;
52 | float dist = sqrtf(dist_sqr);
53 | float w_xy = expf(-dist_sqr / (2.0f * variance));
54 |
55 | // Normal
56 | float w_normal = powf(min(max(dot(t_nrm, c_nrm), FLT_EPS), 1.0f), 128.0f);
57 |
58 | // Depth
59 | float w_depth = expf(-(abs(t_zdz.x - c_zdz.x) / max(c_zdz.y * dist, FLT_EPS)));
60 |
61 | float w = w_xy * w_normal * w_depth;
62 |
63 | accum_col = accum_col + t_col * w;
64 | accum_w += w;
65 | }
66 | }
67 |
68 | params.out[idx.z][idx.y][idx.x][0] = accum_col.x;
69 | params.out[idx.z][idx.y][idx.x][1] = accum_col.y;
70 | params.out[idx.z][idx.y][idx.x][2] = accum_col.z;
71 | params.out[idx.z][idx.y][idx.x][3] = max(accum_w, 0.0001f);
72 | }
73 |
74 | __global__ void bilateral_denoiser_bwd_kernel(BilateralDenoiserParams params)
75 | {
76 | uint3 idx = make_uint3(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z);
77 |
78 | if (idx.z >= params.col.size(0) || idx.y >= params.col.size(1) || idx.x >= params.col.size(2))
79 | return;
80 |
81 | // Fetch central tap
82 | float3 c_nrm = fetch3(params.nrm, idx.z, idx.y, idx.x);
83 | float2 c_zdz = fetch2(params.zdz, idx.z, idx.y, idx.x);
84 |
85 | float variance = params.sigma * params.sigma;
86 | int filter_rad = 2 * ceil(params.sigma * 2.5) + 1;
87 |
88 | float3 accum_grad = make_float3(0.0f);
89 | for (int32_t fy = -filter_rad; fy <= filter_rad; ++fy)
90 | {
91 | for (int32_t fx = -filter_rad; fx <= filter_rad; ++fx)
92 | {
93 | // Compute tap coordinates, used for input activations and bilateral guides
94 | int32_t y = idx.y + fy;
95 | int32_t x = idx.x + fx;
96 |
97 | if (y < 0 || x < 0 || y >= params.col.size(1) || x >= params.col.size(2))
98 | continue;
99 |
100 | // Fetch current tap
101 | float3 t_col = fetch3(params.col, idx.z, y, x);
102 | float3 t_nrm = fetch3(params.nrm, idx.z, y, x);
103 | float2 t_zdz = fetch2(params.zdz, idx.z, y, x);
104 |
105 | /////////////////////////////////////////////////////////
106 | // Compute bilateral weight
107 | /////////////////////////////////////////////////////////
108 |
109 | // Distance, transposing fx & fy doesn't affect distance
110 | float dist_sqr = fx * fx + fy * fy;
111 | float dist = sqrtf(dist_sqr);
112 | float w_xy = expf(-dist_sqr / (2.0f * variance));
113 |
114 | // Normal, transpose c_ and t_ (it's symmetric so doesn't matter)
115 | float w_normal = powf(min(max(dot(t_nrm, c_nrm), FLT_EPS), 1.0f), 128.0f);
116 |
117 | // Depth, transpose c_ and t_ (matters for the denominator)
118 | float w_depth = expf(-(abs(t_zdz.x - c_zdz.x) / max(t_zdz.y * dist, FLT_EPS)));
119 |
120 | float w = w_xy * w_normal * w_depth;
121 |
122 | float3 t_col_grad = w * fetch3(params.out_grad, idx.z, y, x);
123 | accum_grad += t_col_grad;
124 | }
125 | }
126 |
127 | params.col_grad[idx.z][idx.y][idx.x][0] = accum_grad.x;
128 | params.col_grad[idx.z][idx.y][idx.x][1] = accum_grad.y;
129 | params.col_grad[idx.z][idx.y][idx.x][2] = accum_grad.z;
130 | }
131 |
--------------------------------------------------------------------------------
/render/optixutils/c_src/denoising.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 | #include "accessor.h"
11 |
12 | struct BilateralDenoiserParams
13 | {
14 | PackedTensorAccessor32 col;
15 | PackedTensorAccessor32 col_grad;
16 | PackedTensorAccessor32 nrm;
17 | PackedTensorAccessor32 zdz;
18 | PackedTensorAccessor32 out;
19 | PackedTensorAccessor32 out_grad;
20 | float sigma;
21 | };
22 |
--------------------------------------------------------------------------------
/render/optixutils/c_src/envsampling/params.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #include "../accessor.h"
10 |
11 | struct EnvSamplingParams
12 | {
13 | // Ray data
14 | PackedTensorAccessor32 ro; // ray origin
15 |
16 | // GBuffer
17 | PackedTensorAccessor32 mask;
18 | PackedTensorAccessor32 gb_pos;
19 | PackedTensorAccessor32 gb_pos_grad;
20 | PackedTensorAccessor32 gb_normal;
21 | PackedTensorAccessor32 gb_normal_grad;
22 | PackedTensorAccessor32 gb_view_pos;
23 | PackedTensorAccessor32 gb_kd;
24 | PackedTensorAccessor32 gb_kd_grad;
25 | PackedTensorAccessor32 gb_ks;
26 | PackedTensorAccessor32 gb_ks_grad;
27 |
28 | // Light
29 | PackedTensorAccessor32 light;
30 | PackedTensorAccessor32 light_grad;
31 | PackedTensorAccessor32 pdf; // light pdf
32 | PackedTensorAccessor32 rows; // light sampling cdf
33 | PackedTensorAccessor32 cols; // light sampling cdf
34 |
35 | // Output
36 | PackedTensorAccessor32 diff;
37 | PackedTensorAccessor32 diff_grad;
38 | PackedTensorAccessor32 spec;
39 | PackedTensorAccessor32 spec_grad;
40 |
41 | // Table with random permutations for stratified sampling
42 | PackedTensorAccessor32 perms;
43 |
44 | OptixTraversableHandle handle;
45 | unsigned int BSDF;
46 | unsigned int n_samples_x;
47 | unsigned int rnd_seed;
48 | unsigned int backward;
49 | float shadow_scale;
50 | };
--------------------------------------------------------------------------------
/render/optixutils/c_src/optix_wrapper.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #pragma once
10 |
11 | #include
12 | #include
13 |
14 | //------------------------------------------------------------------------
15 | // Python OptiX state wrapper.
16 |
17 | struct OptiXState
18 | {
19 | OptixDeviceContext context;
20 | OptixTraversableHandle gas_handle;
21 | CUdeviceptr d_gas_output_buffer;
22 |
23 | // Differentiable env sampling
24 | OptixPipeline pipelineEnvSampling;
25 | OptixShaderBindingTable sbtEnvSampling;
26 | OptixModule moduleEnvSampling;
27 | };
28 |
29 |
30 | class OptiXStateWrapper
31 | {
32 | public:
33 | OptiXStateWrapper (const std::string &path, const std::string &cuda_path);
34 | ~OptiXStateWrapper (void);
35 |
36 | OptiXState* pState;
37 | };
38 |
39 |
--------------------------------------------------------------------------------
/render/optixutils/include/optix.h:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
4 | *
5 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
6 | * rights in and to this software, related documentation and any modifications thereto.
7 | * Any use, reproduction, disclosure or distribution of this software and related
8 | * documentation without an express license agreement from NVIDIA Corporation is strictly
9 | * prohibited.
10 | *
11 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
12 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
13 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
15 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
16 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
17 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
19 | * SUCH DAMAGES
20 | */
21 |
22 | /// @file
23 | /// @author NVIDIA Corporation
24 | /// @brief OptiX public API header
25 | ///
26 | /// Includes the host api if compiling host code, includes the cuda api if compiling device code.
27 | /// For the math library routines include optix_math.h
28 |
29 | #ifndef __optix_optix_h__
30 | #define __optix_optix_h__
31 |
32 | /// The OptiX version.
33 | ///
34 | /// - major = OPTIX_VERSION/10000
35 | /// - minor = (OPTIX_VERSION%10000)/100
36 | /// - micro = OPTIX_VERSION%100
37 | #define OPTIX_VERSION 70300
38 |
39 |
40 | #ifdef __CUDACC__
41 | #include "optix_device.h"
42 | #else
43 | #include "optix_host.h"
44 | #endif
45 |
46 |
47 | #endif // __optix_optix_h__
48 |
--------------------------------------------------------------------------------
/render/optixutils/include/optix_device.h:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
4 | *
5 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
6 | * rights in and to this software, related documentation and any modifications thereto.
7 | * Any use, reproduction, disclosure or distribution of this software and related
8 | * documentation without an express license agreement from NVIDIA Corporation is strictly
9 | * prohibited.
10 | *
11 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
12 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
13 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
15 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
16 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
17 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
19 | * SUCH DAMAGES
20 | */
21 |
22 | /**
23 | * @file optix_device.h
24 | * @author NVIDIA Corporation
25 | * @brief OptiX public API
26 | *
27 | * OptiX public API Reference - Host/Device side
28 | */
29 |
30 | /******************************************************************************\
31 | * optix_cuda.h
32 | *
33 | * This file provides the nvcc interface for generating PTX that the OptiX is
34 | * capable of parsing and weaving into the final kernel. This is included by
35 | * optix.h automatically if compiling device code. It can be included explicitly
36 | * in host code if desired.
37 | *
38 | \******************************************************************************/
39 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__)
40 | # define __OPTIX_INCLUDE_INTERNAL_HEADERS__
41 | # define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__
42 | #endif
43 | #include "optix_7_device.h"
44 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ )
45 | # undef __OPTIX_INCLUDE_INTERNAL_HEADERS__
46 | # undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__
47 | #endif
48 |
--------------------------------------------------------------------------------
/render/optixutils/include/optix_function_table_definition.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
3 | *
4 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
5 | * rights in and to this software, related documentation and any modifications thereto.
6 | * Any use, reproduction, disclosure or distribution of this software and related
7 | * documentation without an express license agreement from NVIDIA Corporation is strictly
8 | * prohibited.
9 | *
10 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
11 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
12 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
13 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
14 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
15 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
16 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
17 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
18 | * SUCH DAMAGES
19 | */
20 |
21 | /// @file
22 | /// @author NVIDIA Corporation
23 | /// @brief OptiX public API header
24 |
25 | #ifndef __optix_optix_function_table_definition_h__
26 | #define __optix_optix_function_table_definition_h__
27 |
28 | #include "optix_function_table.h"
29 |
30 | #ifdef __cplusplus
31 | extern "C" {
32 | #endif
33 |
34 | /** \addtogroup optix_function_table
35 | @{
36 | */
37 |
38 | /// If the stubs in optix_stubs.h are used, then the function table needs to be defined in exactly
39 | /// one translation unit. This can be achieved by including this header file in that translation
40 | /// unit.
41 | OptixFunctionTable g_optixFunctionTable;
42 |
43 | /*@}*/ // end group optix_function_table
44 |
45 | #ifdef __cplusplus
46 | }
47 | #endif
48 |
49 | #endif // __optix_optix_function_table_definition_h__
50 |
--------------------------------------------------------------------------------
/render/optixutils/include/optix_host.h:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
4 | *
5 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
6 | * rights in and to this software, related documentation and any modifications thereto.
7 | * Any use, reproduction, disclosure or distribution of this software and related
8 | * documentation without an express license agreement from NVIDIA Corporation is strictly
9 | * prohibited.
10 | *
11 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
12 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
13 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
15 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
16 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
17 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
19 | * SUCH DAMAGES
20 | */
21 |
22 | /**
23 | * @file optix_host.h
24 | * @author NVIDIA Corporation
25 | * @brief OptiX public API
26 | *
27 | * OptiX public API Reference - Host side
28 | */
29 |
30 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__)
31 | # define __OPTIX_INCLUDE_INTERNAL_HEADERS__
32 | # define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__
33 | #endif
34 | #include "optix_7_host.h"
35 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ )
36 | # undef __OPTIX_INCLUDE_INTERNAL_HEADERS__
37 | # undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__
38 | #endif
39 |
--------------------------------------------------------------------------------
/render/optixutils/include/optix_types.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
3 | *
4 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
5 | * rights in and to this software, related documentation and any modifications thereto.
6 | * Any use, reproduction, disclosure or distribution of this software and related
7 | * documentation without an express license agreement from NVIDIA Corporation is strictly
8 | * prohibited.
9 | *
10 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
11 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
12 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
13 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
14 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
15 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
16 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
17 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
18 | * SUCH DAMAGES
19 | */
20 |
21 | /**
22 | * @file optix_types.h
23 | * @author NVIDIA Corporation
24 | * @brief OptiX public API header
25 | *
26 | */
27 |
28 | #ifndef __optix_optix_types_h__
29 | #define __optix_optix_types_h__
30 |
31 | // clang-format off
32 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__)
33 | # define __OPTIX_INCLUDE_INTERNAL_HEADERS__
34 | # define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__
35 | #endif
36 | #include "optix_7_types.h"
37 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ )
38 | # undef __OPTIX_INCLUDE_INTERNAL_HEADERS__
39 | # undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__
40 | #endif
41 | // clang-format on
42 |
43 | #endif // #ifndef __optix_optix_types_h__
44 |
--------------------------------------------------------------------------------
/render/optixutils/ops.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | import numpy as np
10 | import os
11 | import sys
12 | import torch
13 | import torch.utils.cpp_extension
14 |
15 | #----------------------------------------------------------------------------
16 | # C++/Cuda plugin compiler/loader.
17 |
18 | _plugin = None
19 | if _plugin is None:
20 |
21 | # Make sure we can find the necessary compiler and libary binaries.
22 | if os.name == 'nt':
23 | optix_include_dir = os.path.dirname(__file__) + r"\include"
24 |
25 | def find_cl_path():
26 | import glob
27 | for edition in ['Enterprise', 'Professional', 'BuildTools', 'Community']:
28 | vs_editions = glob.glob(r"C:\Program Files (x86)\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition) \
29 | + glob.glob(r"C:\Program Files\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition)
30 | paths = sorted(vs_editions, reverse=True)
31 | if paths:
32 | return paths[0]
33 |
34 | # If cl.exe is not on path, try to find it.
35 | if os.system("where cl.exe >nul 2>nul") != 0:
36 | cl_path = find_cl_path()
37 | if cl_path is None:
38 | raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
39 | os.environ['PATH'] += ';' + cl_path
40 |
41 | elif os.name == 'posix':
42 | optix_include_dir = os.path.dirname(__file__) + r"/include"
43 |
44 | include_paths = [optix_include_dir]
45 |
46 | # Compiler options.
47 | opts = ['-DNVDR_TORCH']
48 |
49 | # Linker options.
50 | if os.name == 'posix':
51 | ldflags = ['-lcuda', '-lnvrtc']
52 | elif os.name == 'nt':
53 | ldflags = ['cuda.lib', 'advapi32.lib', 'nvrtc.lib']
54 |
55 | # List of sources.
56 | source_files = [
57 | 'c_src/denoising.cu',
58 | 'c_src/optix_wrapper.cpp',
59 | 'c_src/torch_bindings.cpp'
60 | ]
61 |
62 | # Some containers set this to contain old architectures that won't compile. We only need the one installed in the machine.
63 | os.environ['TORCH_CUDA_ARCH_LIST'] = ''
64 |
65 | # Compile and load.
66 | build_dir = os.path.join(os. path. dirname(__file__), 'build')
67 | os.makedirs(build_dir, exist_ok=True)
68 | source_paths = [os.path.join(os.path.dirname(__file__), fn) for fn in source_files]
69 | torch.utils.cpp_extension.load(name='optixutils_plugin', sources=source_paths, extra_cflags=opts,
70 | build_directory=build_dir,
71 | extra_cuda_cflags=opts, extra_ldflags=ldflags, extra_include_paths=include_paths, with_cuda=True, verbose=True)
72 |
73 | # Import, cache, and return the compiled module.
74 | import optixutils_plugin
75 | _plugin = optixutils_plugin
76 |
77 | #----------------------------------------------------------------------------
78 | # OptiX autograd func
79 | #----------------------------------------------------------------------------
80 |
81 | class _optix_env_shade_func(torch.autograd.Function):
82 | _random_perm = {}
83 |
84 | @staticmethod
85 | def forward(ctx, optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, BSDF, n_samples_x, rnd_seed, shadow_scale):
86 | _rnd_seed = np.random.randint(2**31) if rnd_seed is None else rnd_seed
87 | if n_samples_x not in _optix_env_shade_func._random_perm:
88 | # Generate (32k) tables with random permutations to decorrelate the BSDF and light stratified samples
89 | _optix_env_shade_func._random_perm[n_samples_x] = torch.argsort(torch.rand(32768, n_samples_x * n_samples_x, device="cuda"), dim=-1).int()
90 |
91 | diff, spec = _plugin.env_shade_fwd(optix_ctx.cpp_wrapper, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, _optix_env_shade_func._random_perm[n_samples_x], BSDF, n_samples_x, _rnd_seed, shadow_scale)
92 | ctx.save_for_backward(mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols)
93 | ctx.optix_ctx = optix_ctx
94 | ctx.BSDF = BSDF
95 | ctx.n_samples_x = n_samples_x
96 | ctx.rnd_seed = rnd_seed
97 | ctx.shadow_scale = shadow_scale
98 | return diff, spec
99 |
100 | @staticmethod
101 | def backward(ctx, diff_grad, spec_grad):
102 | optix_ctx = ctx.optix_ctx
103 | _rnd_seed = np.random.randint(2**31) if ctx.rnd_seed is None else ctx.rnd_seed
104 | mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols = ctx.saved_variables
105 | gb_pos_grad, gb_normal_grad, gb_kd_grad, gb_ks_grad, light_grad = _plugin.env_shade_bwd(
106 | optix_ctx.cpp_wrapper, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, _optix_env_shade_func._random_perm[ctx.n_samples_x],
107 | ctx.BSDF, ctx.n_samples_x, _rnd_seed, ctx.shadow_scale, diff_grad, spec_grad)
108 | return None, None, None, gb_pos_grad, gb_normal_grad, None, gb_kd_grad, gb_ks_grad, light_grad, None, None, None, None, None, None, None
109 |
110 | class _bilateral_denoiser_func(torch.autograd.Function):
111 | @staticmethod
112 | def forward(ctx, col, nrm, zdz, sigma):
113 | ctx.save_for_backward(col, nrm, zdz)
114 | ctx.sigma = sigma
115 | out = _plugin.bilateral_denoiser_fwd(col, nrm, zdz, sigma)
116 | return out
117 |
118 | @staticmethod
119 | def backward(ctx, out_grad):
120 | col, nrm, zdz = ctx.saved_variables
121 | col_grad = _plugin.bilateral_denoiser_bwd(col, nrm, zdz, ctx.sigma, out_grad)
122 | return col_grad, None, None, None
123 |
124 | #----------------------------------------------------------------------------
125 | # OptiX ray tracing utils
126 | #----------------------------------------------------------------------------
127 |
128 | class OptiXContext:
129 | def __init__(self):
130 | print("Cuda path", torch.utils.cpp_extension.CUDA_HOME)
131 | self.cpp_wrapper = _plugin.OptiXStateWrapper(os.path.dirname(__file__), torch.utils.cpp_extension.CUDA_HOME)
132 |
133 | def optix_build_bvh(optix_ctx, verts, tris, rebuild):
134 | '''
135 | choose not to raise error since we may have msdf supervision.. should clean the code later
136 | '''
137 | # assert tris.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)"
138 | # assert verts.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)"
139 | _plugin.optix_build_bvh(optix_ctx.cpp_wrapper, verts.view(-1, 3), tris.view(-1, 3), rebuild)
140 |
141 | def optix_env_shade(optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, BSDF='pbr', n_samples_x=8, rnd_seed=None, shadow_scale=1.0):
142 | iBSDF = ['pbr', 'diffuse', 'white'].index(BSDF) # Ordering important, must match the order of the fwd/bwdPbrBSDF kernel.
143 | return _optix_env_shade_func.apply(optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, iBSDF, n_samples_x, rnd_seed, shadow_scale)
144 |
145 | def bilateral_denoiser(col, nrm, zdz, sigma):
146 | col_w = _bilateral_denoiser_func.apply(col, nrm, zdz, sigma)
147 | return col_w[..., 0:3] / col_w[..., 3:4]
148 |
--------------------------------------------------------------------------------
/render/optixutils/tests/filter_test.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | from pickletools import read_float8
10 | import torch
11 |
12 | import os
13 | import sys
14 | import math
15 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
16 | import optixutils as ou
17 | import numpy as np
18 |
19 | RES = 1024
20 | DTYPE = torch.float32
21 |
22 | def length(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor:
23 | return torch.sqrt(torch.clamp(dot(x,x), min=eps)) # Clamp to avoid nan gradients because grad(sqrt(0)) = NaN
24 |
25 | def safe_normalize(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor:
26 | return x / length(x, eps)
27 |
28 | def dot(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
29 | return torch.sum(x*y, -1, keepdim=True)
30 |
31 | class BilateralDenoiser(torch.nn.Module):
32 | def __init__(self, sigma=1.0):
33 | super(BilateralDenoiser, self).__init__()
34 | self.set_sigma(sigma)
35 |
36 | def set_sigma(self, sigma):
37 | self.sigma = max(sigma, 0.0001)
38 | self.variance = self.sigma**2.
39 | self.N = 2 * math.ceil(self.sigma * 2.5) + 1
40 |
41 | def forward(self, input):
42 | eps = 0.0001
43 | col = input[..., 0:3]
44 | nrm = input[..., 3:6]
45 | kd = input[..., 6:9]
46 | zdz = input[..., 9:11]
47 |
48 | accum_col = torch.zeros_like(col)
49 | accum_w = torch.zeros_like(col[..., 0:1])
50 | for y in range(-self.N, self.N+1):
51 | for x in range(-self.N, self.N+1):
52 |
53 | ty, tx = torch.meshgrid(torch.arange(0, input.shape[1], dtype=torch.float32, device="cuda"), torch.arange(0, input.shape[2], dtype=torch.float32, device="cuda"))
54 | tx = tx[None, ..., None] + x
55 | ty = ty[None, ..., None] + y
56 |
57 | dist_sqr = (x**2 + y**2)
58 | dist = np.sqrt(dist_sqr)
59 | w_xy = np.exp(-dist_sqr / (2 * self.variance))
60 |
61 | with torch.no_grad():
62 | nrm_tap = torch.roll(nrm, (-y, -x), (1, 2))
63 | w_normal = torch.pow(torch.clamp(dot(nrm_tap, nrm), min=eps, max=1.0), 128.0) # From SVGF
64 |
65 | zdz_tap = torch.roll(zdz, (-y, -x), (1, 2))
66 | w_depth = torch.exp(-(torch.abs(zdz_tap[..., 0:1] - zdz[..., 0:1]) / torch.clamp(zdz[..., 1:2] * dist, min=eps)) ) # From SVGF
67 |
68 | w = w_xy * w_normal * w_depth
69 | w = torch.where((tx >= 0) & (tx < input.shape[2]) & (ty >= 0) & (ty < input.shape[1]), w, torch.zeros_like(w))
70 |
71 | col_tap = torch.roll(col, (-y, -x), (1, 2))
72 | accum_col += col_tap * w
73 | accum_w += w
74 | return accum_col / torch.clamp(accum_w, min=eps)
75 |
76 | def relative_loss(name, ref, cuda):
77 | ref = ref.float()
78 | cuda = cuda.float()
79 | denom = torch.where(ref > 1e-7, ref, torch.ones_like(ref))
80 | relative = torch.abs(ref - cuda) / denom
81 | print(name, torch.max(relative).item())
82 |
83 |
84 | def test_filter():
85 | img_cuda = torch.rand(1, RES, RES, 11, dtype=DTYPE, device='cuda')
86 | img_cuda[..., 3:6] = safe_normalize(img_cuda[..., 3:6])
87 | img_ref = img_cuda.clone().detach().requires_grad_(True)
88 | img_cuda = img_cuda.clone().detach().requires_grad_(True)
89 | target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
90 | target_ref = target_cuda.clone().detach().requires_grad_(True)
91 |
92 | SIGMA = 2.0
93 |
94 | start = torch.cuda.Event(enable_timing=True)
95 | end = torch.cuda.Event(enable_timing=True)
96 |
97 | start.record()
98 | denoiser = BilateralDenoiser(sigma=SIGMA)
99 | denoised_ref = denoiser.forward(img_ref)
100 | ref_loss = torch.nn.MSELoss()(denoised_ref, target_ref)
101 | ref_loss.backward()
102 | end.record()
103 | torch.cuda.synchronize()
104 | print("Python:", start.elapsed_time(end))
105 |
106 | start.record()
107 | denoised_cuda = ou.svgf(img_cuda[..., 0:3], img_cuda[..., 3:6], img_cuda[..., 9:11], img_cuda[..., 6:9], SIGMA)
108 | cuda_loss = torch.nn.MSELoss()(denoised_cuda, target_cuda)
109 | cuda_loss.backward()
110 | end.record()
111 | torch.cuda.synchronize()
112 | print("CUDA:", start.elapsed_time(end))
113 |
114 | print("-------------------------------------------------------------")
115 | print(" Filter loss:")
116 | print("-------------------------------------------------------------")
117 |
118 | relative_loss("denoised:", denoised_ref[..., 0:3], denoised_cuda[..., 0:3])
119 | relative_loss("grad:", img_ref.grad[..., 0:3], img_cuda.grad[..., 0:3])
120 |
121 | test_filter()
--------------------------------------------------------------------------------
/render/regularizer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
4 | # property and proprietary rights in and to this material, related
5 | # documentation and any modifications thereto. Any use, reproduction,
6 | # disclosure or distribution of this material and related documentation
7 | # without an express license agreement from NVIDIA CORPORATION or
8 | # its affiliates is strictly prohibited.
9 |
10 | import torch
11 | import nvdiffrast.torch as dr
12 | import torch.nn.functional as F
13 |
14 | from render import util
15 | from . import mesh
16 |
17 | def luma(x):
18 | return ((x[..., 0:1] + x[..., 1:2] + x[..., 2:3]) / 3).repeat(1, 1, 1, 3)
19 | def value(x):
20 | return torch.max(x[..., 0:3], dim=-1, keepdim=True)[0].repeat(1, 1, 1, 3)
21 |
22 | def chroma_loss(kd, color_ref, lambda_chroma):
23 | eps = 0.001
24 | ref_chroma = color_ref[..., 0:3] / torch.clip(value(color_ref), min=eps)
25 | opt_chroma = kd[..., 0:3] / torch.clip(value(kd), min=eps)
26 | return torch.mean(torch.abs((opt_chroma - ref_chroma) * color_ref[..., 3:])) * lambda_chroma
27 |
28 | # Diffuse luma regularizer + specular
29 | def shading_loss(diffuse_light, specular_light, color_ref, lambda_diffuse, lambda_specular):
30 | diffuse_luma = luma(diffuse_light)
31 | specular_luma = luma(specular_light)
32 | ref_luma = value(color_ref)
33 |
34 | eps = 0.001
35 | img = util.rgb_to_srgb(torch.log(torch.clamp((diffuse_luma + specular_luma) * color_ref[..., 3:], min=0, max=65535) + 1))
36 | target = util.rgb_to_srgb(torch.log(torch.clamp(ref_luma * color_ref[..., 3:], min=0, max=65535) + 1))
37 | # error = torch.abs(img - target) * diffuse_luma / torch.clamp(diffuse_luma + specular_luma, min=eps) ### encourage specular component to take control
38 | error = torch.abs(img - target) ### the original version in the paper
39 | loss = torch.mean(error) * lambda_diffuse
40 | loss += torch.mean(specular_luma) / torch.clamp(torch.mean(diffuse_luma), min=eps) * lambda_specular
41 | return loss
42 |
43 | ######################################################################################
44 | # Material smoothness loss
45 | ######################################################################################
46 |
47 | def material_smoothness_grad(kd_grad, ks_grad, nrm_grad, lambda_kd=0.25, lambda_ks=0.1, lambda_nrm=0.0):
48 | kd_luma_grad = (kd_grad[..., 0] + kd_grad[..., 1] + kd_grad[..., 2]) / 3
49 | loss = torch.mean(kd_luma_grad * kd_grad[..., -1]) * lambda_kd
50 | loss += torch.mean(ks_grad[..., :-1] * ks_grad[..., -1:]) * lambda_ks
51 | loss += torch.mean(nrm_grad[..., :-1] * nrm_grad[..., -1:]) * lambda_nrm
52 | return loss
53 |
54 | ######################################################################################
55 | # Computes the image gradient, useful for kd/ks smoothness losses
56 | ######################################################################################
57 | def image_grad(buf, std=0.01):
58 | t, s = torch.meshgrid(torch.linspace(-1.0 + 1.0 / buf.shape[1], 1.0 - 1.0 / buf.shape[1], buf.shape[1], device="cuda"),
59 | torch.linspace(-1.0 + 1.0 / buf.shape[2], 1.0 - 1.0 / buf.shape[2], buf.shape[2], device="cuda"),
60 | indexing='ij')
61 | tc = torch.normal(mean=0, std=std, size=(buf.shape[0], buf.shape[1], buf.shape[2], 2), device="cuda") + torch.stack((s, t), dim=-1)[None, ...]
62 | tap = dr.texture(buf, tc, filter_mode='linear', boundary_mode='clamp')
63 | return torch.abs(tap[..., :-1] - buf[..., :-1]) * tap[..., -1:] * buf[..., -1:]
64 |
65 | ######################################################################################
66 | # Computes the avergage edge length of a mesh.
67 | # Rough estimate of the tessellation of a mesh. Can be used e.g. to clamp gradients
68 | ######################################################################################
69 | def avg_edge_length(v_pos, t_pos_idx):
70 | e_pos_idx = mesh.compute_edges(t_pos_idx)
71 | edge_len = util.length(v_pos[e_pos_idx[:, 0]] - v_pos[e_pos_idx[:, 1]])
72 | return torch.mean(edge_len)
73 |
74 | ######################################################################################
75 | # Laplacian regularization using umbrella operator (Fujiwara / Desbrun).
76 | # https://mgarland.org/class/geom04/material/smoothing.pdf
77 | ######################################################################################
78 | def laplace_regularizer_const(v_pos, t_pos_idx):
79 | term = torch.zeros_like(v_pos)
80 | norm = torch.zeros_like(v_pos[..., 0:1])
81 |
82 | v0 = v_pos[t_pos_idx[:, 0], :]
83 | v1 = v_pos[t_pos_idx[:, 1], :]
84 | v2 = v_pos[t_pos_idx[:, 2], :]
85 |
86 | term.scatter_add_(0, t_pos_idx[:, 0:1].repeat(1,3), (v1 - v0) + (v2 - v0))
87 | term.scatter_add_(0, t_pos_idx[:, 1:2].repeat(1,3), (v0 - v1) + (v2 - v1))
88 | term.scatter_add_(0, t_pos_idx[:, 2:3].repeat(1,3), (v0 - v2) + (v1 - v2))
89 |
90 | two = torch.ones_like(v0) * 2.0
91 | norm.scatter_add_(0, t_pos_idx[:, 0:1], two)
92 | norm.scatter_add_(0, t_pos_idx[:, 1:2], two)
93 | norm.scatter_add_(0, t_pos_idx[:, 2:3], two)
94 |
95 | term = term / torch.clamp(norm, min=1.0)
96 |
97 | return torch.mean(term**2)
98 |
99 | ######################################################################################
100 | # Smooth vertex normals
101 | ######################################################################################
102 | def normal_consistency(v_pos, t_pos_idx):
103 | # Compute face normals
104 | v0 = v_pos[t_pos_idx[:, 0], :]
105 | v1 = v_pos[t_pos_idx[:, 1], :]
106 | v2 = v_pos[t_pos_idx[:, 2], :]
107 |
108 | face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0))
109 |
110 | tris_per_edge = mesh.compute_edge_to_face_mapping(t_pos_idx)
111 |
112 | # Fetch normals for both faces sharind an edge
113 | n0 = face_normals[tris_per_edge[:, 0], :]
114 | n1 = face_normals[tris_per_edge[:, 1], :]
115 |
116 | # Compute error metric based on normal difference
117 | term = torch.clamp(util.dot(n0, n1), min=-1.0, max=1.0)
118 | term = (1.0 - term) * 0.5
119 |
120 | return torch.mean(torch.abs(term))
121 |
122 | def ssim_loss(pred, target):
123 | # 定义窗口大小
124 | window_size = 11
125 | # 计算均值
126 | mu_x = F.conv2d(pred, window, padding=window_size//2, groups=3)
127 | mu_y = F.conv2d(target, window, padding=window_size//2, groups=3)
128 |
129 | # 计算方差
130 | sigma_x = F.conv2d(pred**2, window, padding=window_size//2, groups=3) - mu_x**2
131 | sigma_y = F.conv2d(target**2, window, padding=window_size//2, groups=3) - mu_y**2
132 | sigma_xy = F.conv2d(pred*target, window, padding=window_size//2, groups=3) - mu_x*mu_y
133 |
134 | # 常数,为了数值稳定性
135 | C1 = 0.01**2
136 | C2 = 0.03**2
137 |
138 | # 计算 SSIM
139 | ssim = ((2 * mu_x * mu_y + C1) * (2 * sigma_xy + C2)) / ((mu_x**2 + mu_y**2 + C1) * (sigma_x + sigma_y + C2))
140 |
141 | return 1 - ssim.mean()
--------------------------------------------------------------------------------
/render/renderutils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
4 | # property and proprietary rights in and to this material, related
5 | # documentation and any modifications thereto. Any use, reproduction,
6 | # disclosure or distribution of this material and related documentation
7 | # without an express license agreement from NVIDIA CORPORATION or
8 | # its affiliates is strictly prohibited.
9 |
10 | from .ops import xfm_points, xfm_vectors, image_loss, diffuse_cubemap, specular_cubemap, prepare_shading_normal, lambert, frostbite_diffuse, pbr_specular, pbr_bsdf, _fresnel_shlick, _ndf_ggx, _lambda_ggx, _masking_smith
11 | __all__ = ["xfm_vectors", "xfm_points", "image_loss", "diffuse_cubemap","specular_cubemap", "prepare_shading_normal", "lambert", "frostbite_diffuse", "pbr_specular", "pbr_bsdf", "_fresnel_shlick", "_ndf_ggx", "_lambda_ggx", "_masking_smith", ]
12 |
--------------------------------------------------------------------------------
/render/renderutils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/render/renderutils/__pycache__/bsdf.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/bsdf.cpython-38.pyc
--------------------------------------------------------------------------------
/render/renderutils/__pycache__/loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/loss.cpython-38.pyc
--------------------------------------------------------------------------------
/render/renderutils/__pycache__/ops.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/ops.cpython-38.pyc
--------------------------------------------------------------------------------
/render/renderutils/bsdf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
4 | # property and proprietary rights in and to this material, related
5 | # documentation and any modifications thereto. Any use, reproduction,
6 | # disclosure or distribution of this material and related documentation
7 | # without an express license agreement from NVIDIA CORPORATION or
8 | # its affiliates is strictly prohibited.
9 |
10 | import math
11 | import torch
12 |
13 | NORMAL_THRESHOLD = 0.1
14 |
15 | ################################################################################
16 | # Vector utility functions
17 | ################################################################################
18 |
19 | def _dot(x, y):
20 | return torch.sum(x*y, -1, keepdim=True)
21 |
22 | def _reflect(x, n):
23 | return 2*_dot(x, n)*n - x
24 |
25 | def _safe_normalize(x):
26 | return torch.nn.functional.normalize(x, dim = -1)
27 |
28 | def _bend_normal(view_vec, smooth_nrm, geom_nrm, two_sided_shading):
29 | # Swap normal direction for backfacing surfaces
30 | if two_sided_shading:
31 | smooth_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, smooth_nrm, -smooth_nrm)
32 | geom_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, geom_nrm, -geom_nrm)
33 |
34 | t = torch.clamp(_dot(view_vec, smooth_nrm) / NORMAL_THRESHOLD, min=0, max=1)
35 | return torch.lerp(geom_nrm, smooth_nrm, t)
36 |
37 |
38 | def _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl):
39 | smooth_bitang = _safe_normalize(torch.cross(smooth_tng, smooth_nrm))
40 | if opengl:
41 | shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] - smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
42 | else:
43 | shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] + smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
44 | return _safe_normalize(shading_nrm)
45 |
46 | def bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl):
47 | smooth_nrm = _safe_normalize(smooth_nrm)
48 | smooth_tng = _safe_normalize(smooth_tng)
49 | view_vec = _safe_normalize(view_pos - pos)
50 | shading_nrm = _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl)
51 | return _bend_normal(view_vec, shading_nrm, geom_nrm, two_sided_shading)
52 |
53 | ################################################################################
54 | # Simple lambertian diffuse BSDF
55 | ################################################################################
56 |
57 | def bsdf_lambert(nrm, wi):
58 | return torch.clamp(_dot(nrm, wi), min=0.0) / math.pi
59 |
60 | ################################################################################
61 | # Frostbite diffuse
62 | ################################################################################
63 |
64 | def bsdf_frostbite(nrm, wi, wo, linearRoughness):
65 | wiDotN = _dot(wi, nrm)
66 | woDotN = _dot(wo, nrm)
67 |
68 | h = _safe_normalize(wo + wi)
69 | wiDotH = _dot(wi, h)
70 |
71 | energyBias = 0.5 * linearRoughness
72 | energyFactor = 1.0 - (0.51 / 1.51) * linearRoughness
73 | f90 = energyBias + 2.0 * wiDotH * wiDotH * linearRoughness
74 | f0 = 1.0
75 |
76 | wiScatter = bsdf_fresnel_shlick(f0, f90, wiDotN)
77 | woScatter = bsdf_fresnel_shlick(f0, f90, woDotN)
78 | res = wiScatter * woScatter * energyFactor
79 | return torch.where((wiDotN > 0.0) & (woDotN > 0.0), res, torch.zeros_like(res))
80 |
81 | ################################################################################
82 | # Phong specular, loosely based on mitsuba implementation
83 | ################################################################################
84 |
85 | def bsdf_phong(nrm, wo, wi, N):
86 | dp_r = torch.clamp(_dot(_reflect(wo, nrm), wi), min=0.0, max=1.0)
87 | dp_l = torch.clamp(_dot(nrm, wi), min=0.0, max=1.0)
88 | return (dp_r ** N) * dp_l * (N + 2) / (2 * math.pi)
89 |
90 | ################################################################################
91 | # PBR's implementation of GGX specular
92 | ################################################################################
93 |
94 | specular_epsilon = 1e-4
95 |
96 | def bsdf_fresnel_shlick(f0, f90, cosTheta):
97 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
98 | return f0 + (f90 - f0) * (1.0 - _cosTheta) ** 5.0
99 |
100 | def bsdf_ndf_ggx(alphaSqr, cosTheta):
101 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
102 | d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1
103 | return alphaSqr / (d * d * math.pi)
104 |
105 | def bsdf_lambda_ggx(alphaSqr, cosTheta):
106 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
107 | cosThetaSqr = _cosTheta * _cosTheta
108 | tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr
109 | res = 0.5 * (torch.sqrt(1 + alphaSqr * tanThetaSqr) - 1.0)
110 | return res
111 |
112 | def bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO):
113 | lambdaI = bsdf_lambda_ggx(alphaSqr, cosThetaI)
114 | lambdaO = bsdf_lambda_ggx(alphaSqr, cosThetaO)
115 | return 1 / (1 + lambdaI + lambdaO)
116 |
117 | def bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08):
118 | _alpha = torch.clamp(alpha, min=min_roughness*min_roughness, max=1.0)
119 | alphaSqr = _alpha * _alpha
120 |
121 | h = _safe_normalize(wo + wi)
122 | woDotN = _dot(wo, nrm)
123 | wiDotN = _dot(wi, nrm)
124 | woDotH = _dot(wo, h)
125 | nDotH = _dot(nrm, h)
126 |
127 | D = bsdf_ndf_ggx(alphaSqr, nDotH)
128 | G = bsdf_masking_smith_ggx_correlated(alphaSqr, woDotN, wiDotN)
129 | F = bsdf_fresnel_shlick(col, 1, woDotH)
130 |
131 | w = F * D * G * 0.25 / torch.clamp(woDotN, min=specular_epsilon)
132 |
133 | frontfacing = (woDotN > specular_epsilon) & (wiDotN > specular_epsilon)
134 | return torch.where(frontfacing, w, torch.zeros_like(w))
135 |
136 | def bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF):
137 | wo = _safe_normalize(view_pos - pos)
138 | wi = _safe_normalize(light_pos - pos)
139 |
140 | spec_str = arm[..., 0:1] # x component
141 | roughness = arm[..., 1:2] # y component
142 | metallic = arm[..., 2:3] # z component
143 | ks = (0.04 * (1.0 - metallic) + kd * metallic) * (1 - spec_str)
144 | kd = kd * (1.0 - metallic)
145 |
146 | if BSDF == 0:
147 | diffuse = kd * bsdf_lambert(nrm, wi)
148 | else:
149 | diffuse = kd * bsdf_frostbite(nrm, wi, wo, roughness)
150 | specular = bsdf_pbr_specular(ks, nrm, wo, wi, roughness*roughness, min_roughness=min_roughness)
151 | return diffuse + specular
152 |
--------------------------------------------------------------------------------
/render/renderutils/build/bsdf.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/bsdf.cuda.o
--------------------------------------------------------------------------------
/render/renderutils/build/build.ninja:
--------------------------------------------------------------------------------
1 | ninja_required_version = 1.3
2 | cxx = c++
3 | nvcc = /usr/local/cuda/bin/nvcc
4 |
5 | cflags = -DTORCH_EXTENSION_NAME=renderutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -DNVDR_TORCH
6 | post_cflags =
7 | cuda_cflags = -DTORCH_EXTENSION_NAME=renderutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -DNVDR_TORCH -std=c++14
8 | cuda_post_cflags =
9 | cuda_dlink_post_cflags =
10 | ldflags = -shared -lcuda -lnvrtc -L/home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart
11 |
12 | rule compile
13 | command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
14 | depfile = $out.d
15 | deps = gcc
16 |
17 | rule cuda_compile
18 | depfile = $out.d
19 | deps = gcc
20 | command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags
21 |
22 |
23 |
24 | rule link
25 | command = $cxx $in $ldflags -o $out
26 |
27 | build mesh.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/mesh.cu
28 | build loss.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/loss.cu
29 | build bsdf.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/bsdf.cu
30 | build normal.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/normal.cu
31 | build cubemap.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/cubemap.cu
32 | build common.o: compile /nas_data/chh/D3Human_main/render/renderutils/c_src/common.cpp
33 | build torch_bindings.o: compile /nas_data/chh/D3Human_main/render/renderutils/c_src/torch_bindings.cpp
34 |
35 |
36 |
37 | build renderutils_plugin.so: link mesh.cuda.o loss.cuda.o bsdf.cuda.o normal.cuda.o cubemap.cuda.o common.o torch_bindings.o
38 |
39 | default renderutils_plugin.so
40 |
41 |
--------------------------------------------------------------------------------
/render/renderutils/build/common.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/common.o
--------------------------------------------------------------------------------
/render/renderutils/build/cubemap.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/cubemap.cuda.o
--------------------------------------------------------------------------------
/render/renderutils/build/loss.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/loss.cuda.o
--------------------------------------------------------------------------------
/render/renderutils/build/mesh.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/mesh.cuda.o
--------------------------------------------------------------------------------
/render/renderutils/build/normal.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/normal.cuda.o
--------------------------------------------------------------------------------
/render/renderutils/build/renderutils_plugin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/renderutils_plugin.so
--------------------------------------------------------------------------------
/render/renderutils/build/torch_bindings.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/torch_bindings.o
--------------------------------------------------------------------------------
/render/renderutils/c_src/bsdf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #pragma once
13 |
14 | #include "common.h"
15 |
16 | struct LambertKernelParams
17 | {
18 | Tensor nrm;
19 | Tensor wi;
20 | Tensor out;
21 | dim3 gridSize;
22 | };
23 |
24 | struct FrostbiteDiffuseKernelParams
25 | {
26 | Tensor nrm;
27 | Tensor wi;
28 | Tensor wo;
29 | Tensor linearRoughness;
30 | Tensor out;
31 | dim3 gridSize;
32 | };
33 |
34 | struct FresnelShlickKernelParams
35 | {
36 | Tensor f0;
37 | Tensor f90;
38 | Tensor cosTheta;
39 | Tensor out;
40 | dim3 gridSize;
41 | };
42 |
43 | struct NdfGGXParams
44 | {
45 | Tensor alphaSqr;
46 | Tensor cosTheta;
47 | Tensor out;
48 | dim3 gridSize;
49 | };
50 |
51 | struct MaskingSmithParams
52 | {
53 | Tensor alphaSqr;
54 | Tensor cosThetaI;
55 | Tensor cosThetaO;
56 | Tensor out;
57 | dim3 gridSize;
58 | };
59 |
60 | struct PbrSpecular
61 | {
62 | Tensor col;
63 | Tensor nrm;
64 | Tensor wo;
65 | Tensor wi;
66 | Tensor alpha;
67 | Tensor out;
68 | dim3 gridSize;
69 | float min_roughness;
70 | };
71 |
72 | struct PbrBSDF
73 | {
74 | Tensor kd;
75 | Tensor arm;
76 | Tensor pos;
77 | Tensor nrm;
78 | Tensor view_pos;
79 | Tensor light_pos;
80 | Tensor out;
81 | dim3 gridSize;
82 | float min_roughness;
83 | int BSDF;
84 | };
85 |
--------------------------------------------------------------------------------
/render/renderutils/c_src/common.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #include
13 | #include
14 |
15 | //------------------------------------------------------------------------
16 | // Block and grid size calculators for kernel launches.
17 |
18 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims)
19 | {
20 | int maxThreads = maxWidth * maxHeight;
21 | if (maxThreads <= 1 || (dims.x * dims.y) <= 1)
22 | return dim3(1, 1, 1); // Degenerate.
23 |
24 | // Start from max size.
25 | int bw = maxWidth;
26 | int bh = maxHeight;
27 |
28 | // Optimizations for weirdly sized buffers.
29 | if (dims.x < bw)
30 | {
31 | // Decrease block width to smallest power of two that covers the buffer width.
32 | while ((bw >> 1) >= dims.x)
33 | bw >>= 1;
34 |
35 | // Maximize height.
36 | bh = maxThreads / bw;
37 | if (bh > dims.y)
38 | bh = dims.y;
39 | }
40 | else if (dims.y < bh)
41 | {
42 | // Halve height and double width until fits completely inside buffer vertically.
43 | while (bh > dims.y)
44 | {
45 | bh >>= 1;
46 | if (bw < dims.x)
47 | bw <<= 1;
48 | }
49 | }
50 |
51 | // Done.
52 | return dim3(bw, bh, 1);
53 | }
54 |
55 | // returns the size of a block that can be reduced using horizontal SIMD operations (e.g. __shfl_xor_sync)
56 | dim3 getWarpSize(dim3 blockSize)
57 | {
58 | return dim3(
59 | std::min(blockSize.x, 32u),
60 | std::min(std::max(32u / blockSize.x, 1u), std::min(32u, blockSize.y)),
61 | std::min(std::max(32u / (blockSize.x * blockSize.y), 1u), std::min(32u, blockSize.z))
62 | );
63 | }
64 |
65 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims)
66 | {
67 | dim3 gridSize;
68 | gridSize.x = (dims.x - 1) / blockSize.x + 1;
69 | gridSize.y = (dims.y - 1) / blockSize.y + 1;
70 | gridSize.z = (dims.z - 1) / blockSize.z + 1;
71 | return gridSize;
72 | }
73 |
74 | //------------------------------------------------------------------------
75 |
--------------------------------------------------------------------------------
/render/renderutils/c_src/common.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #pragma once
13 | #include
14 | #include
15 |
16 | #include "vec3f.h"
17 | #include "vec4f.h"
18 | #include "tensor.h"
19 |
20 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims);
21 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims);
22 |
23 | #ifdef __CUDACC__
24 |
25 | #ifdef _MSC_VER
26 | #define M_PI 3.14159265358979323846f
27 | #endif
28 |
29 | __host__ __device__ static inline dim3 getWarpSize(dim3 blockSize)
30 | {
31 | return dim3(
32 | min(blockSize.x, 32u),
33 | min(max(32u / blockSize.x, 1u), min(32u, blockSize.y)),
34 | min(max(32u / (blockSize.x * blockSize.y), 1u), min(32u, blockSize.z))
35 | );
36 | }
37 |
38 | __device__ static inline float clamp(float val, float mn, float mx) { return min(max(val, mn), mx); }
39 | #else
40 | dim3 getWarpSize(dim3 blockSize);
41 | #endif
--------------------------------------------------------------------------------
/render/renderutils/c_src/cubemap.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #pragma once
13 |
14 | #include "common.h"
15 |
16 | struct DiffuseCubemapKernelParams
17 | {
18 | Tensor cubemap;
19 | Tensor out;
20 | dim3 gridSize;
21 | };
22 |
23 | struct SpecularCubemapKernelParams
24 | {
25 | Tensor cubemap;
26 | Tensor bounds;
27 | Tensor out;
28 | dim3 gridSize;
29 | float costheta_cutoff;
30 | float roughness;
31 | };
32 |
33 | struct SpecularBoundsKernelParams
34 | {
35 | float costheta_cutoff;
36 | Tensor out;
37 | dim3 gridSize;
38 | };
39 |
--------------------------------------------------------------------------------
/render/renderutils/c_src/loss.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #pragma once
13 |
14 | #include "common.h"
15 |
16 | enum TonemapperType
17 | {
18 | TONEMAPPER_NONE = 0,
19 | TONEMAPPER_LOG_SRGB = 1
20 | };
21 |
22 | enum LossType
23 | {
24 | LOSS_L1 = 0,
25 | LOSS_MSE = 1,
26 | LOSS_RELMSE = 2,
27 | LOSS_SMAPE = 3
28 | };
29 |
30 | struct LossKernelParams
31 | {
32 | Tensor img;
33 | Tensor target;
34 | Tensor out;
35 | dim3 gridSize;
36 | TonemapperType tonemapper;
37 | LossType loss;
38 | };
39 |
--------------------------------------------------------------------------------
/render/renderutils/c_src/mesh.cu:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #include
13 | #include
14 |
15 | #include "common.h"
16 | #include "mesh.h"
17 |
18 |
19 | //------------------------------------------------------------------------
20 | // Kernels
21 |
22 | __global__ void xfmPointsFwdKernel(XfmKernelParams p)
23 | {
24 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
25 | unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z;
26 |
27 | __shared__ float mtx[4][4];
28 | if (threadIdx.x < 16)
29 | mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0));
30 | __syncthreads();
31 |
32 | if (px >= p.gridSize.x)
33 | return;
34 |
35 | vec3f pos(
36 | p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)),
37 | p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)),
38 | p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0))
39 | );
40 |
41 | if (p.isPoints)
42 | {
43 | p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0] + mtx[3][0]);
44 | p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1] + mtx[3][1]);
45 | p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2] + mtx[3][2]);
46 | p.out.store(p.out.nhwcIndex(pz, px, 3, 0), pos.x * mtx[0][3] + pos.y * mtx[1][3] + pos.z * mtx[2][3] + mtx[3][3]);
47 | }
48 | else
49 | {
50 | p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0]);
51 | p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1]);
52 | p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2]);
53 | }
54 | }
55 |
56 | __global__ void xfmPointsBwdKernel(XfmKernelParams p)
57 | {
58 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
59 | unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z;
60 |
61 | __shared__ float mtx[4][4];
62 | if (threadIdx.x < 16)
63 | mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0));
64 | __syncthreads();
65 |
66 | if (px >= p.gridSize.x)
67 | return;
68 |
69 | vec3f pos(
70 | p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)),
71 | p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)),
72 | p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0))
73 | );
74 |
75 | vec4f d_out(
76 | p.out.fetch(p.out.nhwcIndex(pz, px, 0, 0)),
77 | p.out.fetch(p.out.nhwcIndex(pz, px, 1, 0)),
78 | p.out.fetch(p.out.nhwcIndex(pz, px, 2, 0)),
79 | p.out.fetch(p.out.nhwcIndex(pz, px, 3, 0))
80 | );
81 |
82 | if (p.isPoints)
83 | {
84 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2] + d_out.w * mtx[0][3]);
85 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2] + d_out.w * mtx[1][3]);
86 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2] + d_out.w * mtx[2][3]);
87 | }
88 | else
89 | {
90 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2]);
91 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2]);
92 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2]);
93 | }
94 | }
--------------------------------------------------------------------------------
/render/renderutils/c_src/mesh.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #pragma once
13 |
14 | #include "common.h"
15 |
16 | struct XfmKernelParams
17 | {
18 | bool isPoints;
19 | Tensor points;
20 | Tensor matrix;
21 | Tensor out;
22 | dim3 gridSize;
23 | };
24 |
--------------------------------------------------------------------------------
/render/renderutils/c_src/normal.cu:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #include "common.h"
13 | #include "normal.h"
14 |
15 | #define NORMAL_THRESHOLD 0.1f
16 |
17 | //------------------------------------------------------------------------
18 | // Perturb shading normal by tangent frame
19 |
20 | __device__ vec3f fwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, bool opengl)
21 | {
22 | vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm);
23 | vec3f smooth_bitng = safeNormalize(_smooth_bitng);
24 | vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f);
25 | return safeNormalize(_shading_nrm);
26 | }
27 |
28 | __device__ void bwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, vec3f &d_perturbed_nrm, vec3f &d_smooth_nrm, vec3f &d_smooth_tng, const vec3f d_out, bool opengl)
29 | {
30 | ////////////////////////////////////////////////////////////////////////
31 | // FWD
32 | vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm);
33 | vec3f smooth_bitng = safeNormalize(_smooth_bitng);
34 | vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f);
35 |
36 | ////////////////////////////////////////////////////////////////////////
37 | // BWD
38 | vec3f d_shading_nrm(0);
39 | bwdSafeNormalize(_shading_nrm, d_shading_nrm, d_out);
40 |
41 | vec3f d_smooth_bitng(0);
42 |
43 | if (perturbed_nrm.z > 0.0f)
44 | {
45 | d_smooth_nrm += d_shading_nrm * perturbed_nrm.z;
46 | d_perturbed_nrm.z += sum(d_shading_nrm * smooth_nrm);
47 | }
48 |
49 | d_smooth_bitng += (opengl ? -1 : 1) * d_shading_nrm * perturbed_nrm.y;
50 | d_perturbed_nrm.y += (opengl ? -1 : 1) * sum(d_shading_nrm * smooth_bitng);
51 |
52 | d_smooth_tng += d_shading_nrm * perturbed_nrm.x;
53 | d_perturbed_nrm.x += sum(d_shading_nrm * smooth_tng);
54 |
55 | vec3f d__smooth_bitng(0);
56 | bwdSafeNormalize(_smooth_bitng, d__smooth_bitng, d_smooth_bitng);
57 |
58 | bwdCross(smooth_tng, smooth_nrm, d_smooth_tng, d_smooth_nrm, d__smooth_bitng);
59 | }
60 |
61 | //------------------------------------------------------------------------
62 | #define bent_nrm_eps 0.001f
63 |
64 | __device__ vec3f fwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm)
65 | {
66 | float dp = dot(view_vec, smooth_nrm);
67 | float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f);
68 | return geom_nrm * (1.0f - t) + smooth_nrm * t;
69 | }
70 |
71 | __device__ void bwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm, vec3f& d_view_vec, vec3f& d_smooth_nrm, vec3f& d_geom_nrm, const vec3f d_out)
72 | {
73 | ////////////////////////////////////////////////////////////////////////
74 | // FWD
75 | float dp = dot(view_vec, smooth_nrm);
76 | float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f);
77 |
78 | ////////////////////////////////////////////////////////////////////////
79 | // BWD
80 | if (dp > NORMAL_THRESHOLD)
81 | d_smooth_nrm += d_out;
82 | else
83 | {
84 | // geom_nrm * (1.0f - t) + smooth_nrm * t;
85 | d_geom_nrm += d_out * (1.0f - t);
86 | d_smooth_nrm += d_out * t;
87 | float d_t = sum(d_out * (smooth_nrm - geom_nrm));
88 |
89 | float d_dp = dp < 0.0f || dp > NORMAL_THRESHOLD ? 0.0f : d_t / NORMAL_THRESHOLD;
90 |
91 | bwdDot(view_vec, smooth_nrm, d_view_vec, d_smooth_nrm, d_dp);
92 | }
93 | }
94 |
95 | //------------------------------------------------------------------------
96 | // Kernels
97 |
98 | __global__ void PrepareShadingNormalFwdKernel(PrepareShadingNormalKernelParams p)
99 | {
100 | // Calculate pixel position.
101 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
102 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
103 | unsigned int pz = blockIdx.z;
104 | if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
105 | return;
106 |
107 | vec3f pos = p.pos.fetch3(px, py, pz);
108 | vec3f view_pos = p.view_pos.fetch3(px, py, pz);
109 | vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz);
110 | vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz);
111 | vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz);
112 | vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz);
113 |
114 | vec3f smooth_nrm = safeNormalize(_smooth_nrm);
115 | vec3f smooth_tng = safeNormalize(_smooth_tng);
116 | vec3f view_vec = safeNormalize(view_pos - pos);
117 | vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl);
118 |
119 | vec3f res;
120 | if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f)
121 | res = fwdBendNormal(view_vec, -shading_nrm, -geom_nrm);
122 | else
123 | res = fwdBendNormal(view_vec, shading_nrm, geom_nrm);
124 |
125 | p.out.store(px, py, pz, res);
126 | }
127 |
128 | __global__ void PrepareShadingNormalBwdKernel(PrepareShadingNormalKernelParams p)
129 | {
130 | // Calculate pixel position.
131 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
132 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
133 | unsigned int pz = blockIdx.z;
134 | if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
135 | return;
136 |
137 | vec3f pos = p.pos.fetch3(px, py, pz);
138 | vec3f view_pos = p.view_pos.fetch3(px, py, pz);
139 | vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz);
140 | vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz);
141 | vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz);
142 | vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz);
143 | vec3f d_out = p.out.fetch3(px, py, pz);
144 |
145 | ///////////////////////////////////////////////////////////////////////////////////////////////////
146 | // FWD
147 |
148 | vec3f smooth_nrm = safeNormalize(_smooth_nrm);
149 | vec3f smooth_tng = safeNormalize(_smooth_tng);
150 | vec3f _view_vec = view_pos - pos;
151 | vec3f view_vec = safeNormalize(view_pos - pos);
152 |
153 | vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl);
154 |
155 | ///////////////////////////////////////////////////////////////////////////////////////////////////
156 | // BWD
157 |
158 | vec3f d_view_vec(0), d_shading_nrm(0), d_geom_nrm(0);
159 | if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f)
160 | {
161 | bwdBendNormal(view_vec, -shading_nrm, -geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out);
162 | d_shading_nrm = -d_shading_nrm;
163 | d_geom_nrm = -d_geom_nrm;
164 | }
165 | else
166 | bwdBendNormal(view_vec, shading_nrm, geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out);
167 |
168 | vec3f d_perturbed_nrm(0), d_smooth_nrm(0), d_smooth_tng(0);
169 | bwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, d_perturbed_nrm, d_smooth_nrm, d_smooth_tng, d_shading_nrm, p.opengl);
170 |
171 | vec3f d__view_vec(0), d__smooth_nrm(0), d__smooth_tng(0);
172 | bwdSafeNormalize(_view_vec, d__view_vec, d_view_vec);
173 | bwdSafeNormalize(_smooth_nrm, d__smooth_nrm, d_smooth_nrm);
174 | bwdSafeNormalize(_smooth_tng, d__smooth_tng, d_smooth_tng);
175 |
176 | p.pos.store_grad(px, py, pz, -d__view_vec);
177 | p.view_pos.store_grad(px, py, pz, d__view_vec);
178 | p.perturbed_nrm.store_grad(px, py, pz, d_perturbed_nrm);
179 | p.smooth_nrm.store_grad(px, py, pz, d__smooth_nrm);
180 | p.smooth_tng.store_grad(px, py, pz, d__smooth_tng);
181 | p.geom_nrm.store_grad(px, py, pz, d_geom_nrm);
182 | }
--------------------------------------------------------------------------------
/render/renderutils/c_src/normal.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #pragma once
13 |
14 | #include "common.h"
15 |
16 | struct PrepareShadingNormalKernelParams
17 | {
18 | Tensor pos;
19 | Tensor view_pos;
20 | Tensor perturbed_nrm;
21 | Tensor smooth_nrm;
22 | Tensor smooth_tng;
23 | Tensor geom_nrm;
24 | Tensor out;
25 | dim3 gridSize;
26 | bool two_sided_shading, opengl;
27 | };
28 |
--------------------------------------------------------------------------------
/render/renderutils/c_src/tensor.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #pragma once
13 | #if defined(__CUDACC__) && defined(BFLOAT16)
14 | #include // bfloat16 is float32 compatible with less mantissa bits
15 | #endif
16 |
17 | //---------------------------------------------------------------------------------
18 | // CUDA-side Tensor class for in/out parameter parsing. Can be float32 or bfloat16
19 |
20 | struct Tensor
21 | {
22 | void* val;
23 | void* d_val;
24 | int dims[4], _dims[4];
25 | int strides[4];
26 | bool fp16;
27 |
28 | #if defined(__CUDA__) && !defined(__CUDA_ARCH__)
29 | Tensor() : val(nullptr), d_val(nullptr), fp16(true), dims{ 0, 0, 0, 0 }, _dims{ 0, 0, 0, 0 }, strides{ 0, 0, 0, 0 } {}
30 | #endif
31 |
32 | #ifdef __CUDACC__
33 | // Helpers to index and read/write a single element
34 | __device__ inline int _nhwcIndex(int n, int h, int w, int c) const { return n * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]; }
35 | __device__ inline int nhwcIndex(int n, int h, int w, int c) const { return (dims[0] == 1 ? 0 : n * strides[0]) + (dims[1] == 1 ? 0 : h * strides[1]) + (dims[2] == 1 ? 0 : w * strides[2]) + (dims[3] == 1 ? 0 : c * strides[3]); }
36 | __device__ inline int nhwcIndexContinuous(int n, int h, int w, int c) const { return ((n * _dims[1] + h) * _dims[2] + w) * _dims[3] + c; }
37 | #ifdef BFLOAT16
38 | __device__ inline float fetch(unsigned int idx) const { return fp16 ? __bfloat162float(((__nv_bfloat16*)val)[idx]) : ((float*)val)[idx]; }
39 | __device__ inline void store(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)val)[idx] = __float2bfloat16(_val); else ((float*)val)[idx] = _val; }
40 | __device__ inline void store_grad(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)d_val)[idx] = __float2bfloat16(_val); else ((float*)d_val)[idx] = _val; }
41 | #else
42 | __device__ inline float fetch(unsigned int idx) const { return ((float*)val)[idx]; }
43 | __device__ inline void store(unsigned int idx, float _val) { ((float*)val)[idx] = _val; }
44 | __device__ inline void store_grad(unsigned int idx, float _val) { ((float*)d_val)[idx] = _val; }
45 | #endif
46 |
47 | //////////////////////////////////////////////////////////////////////////////////////////
48 | // Fetch, use broadcasting for tensor dimensions of size 1
49 | __device__ inline float fetch1(unsigned int x, unsigned int y, unsigned int z) const
50 | {
51 | return fetch(nhwcIndex(z, y, x, 0));
52 | }
53 |
54 | __device__ inline vec3f fetch3(unsigned int x, unsigned int y, unsigned int z) const
55 | {
56 | return vec3f(
57 | fetch(nhwcIndex(z, y, x, 0)),
58 | fetch(nhwcIndex(z, y, x, 1)),
59 | fetch(nhwcIndex(z, y, x, 2))
60 | );
61 | }
62 |
63 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////
64 | // Store, no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside
65 | __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, float _val)
66 | {
67 | store(_nhwcIndex(z, y, x, 0), _val);
68 | }
69 |
70 | __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, vec3f _val)
71 | {
72 | store(_nhwcIndex(z, y, x, 0), _val.x);
73 | store(_nhwcIndex(z, y, x, 1), _val.y);
74 | store(_nhwcIndex(z, y, x, 2), _val.z);
75 | }
76 |
77 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////
78 | // Store gradient , no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside
79 | __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, float _val)
80 | {
81 | store_grad(nhwcIndexContinuous(z, y, x, 0), _val);
82 | }
83 |
84 | __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, vec3f _val)
85 | {
86 | store_grad(nhwcIndexContinuous(z, y, x, 0), _val.x);
87 | store_grad(nhwcIndexContinuous(z, y, x, 1), _val.y);
88 | store_grad(nhwcIndexContinuous(z, y, x, 2), _val.z);
89 | }
90 | #endif
91 |
92 | };
93 |
--------------------------------------------------------------------------------
/render/renderutils/c_src/vec3f.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #pragma once
13 |
14 | struct vec3f
15 | {
16 | float x, y, z;
17 |
18 | #ifdef __CUDACC__
19 | __device__ vec3f() { }
20 | __device__ vec3f(float v) { x = v; y = v; z = v; }
21 | __device__ vec3f(float _x, float _y, float _z) { x = _x; y = _y; z = _z; }
22 | __device__ vec3f(float3 v) { x = v.x; y = v.y; z = v.z; }
23 |
24 | __device__ inline vec3f& operator+=(const vec3f& b) { x += b.x; y += b.y; z += b.z; return *this; }
25 | __device__ inline vec3f& operator-=(const vec3f& b) { x -= b.x; y -= b.y; z -= b.z; return *this; }
26 | __device__ inline vec3f& operator*=(const vec3f& b) { x *= b.x; y *= b.y; z *= b.z; return *this; }
27 | __device__ inline vec3f& operator/=(const vec3f& b) { x /= b.x; y /= b.y; z /= b.z; return *this; }
28 | #endif
29 | };
30 |
31 | #ifdef __CUDACC__
32 | __device__ static inline vec3f operator+(const vec3f& a, const vec3f& b) { return vec3f(a.x + b.x, a.y + b.y, a.z + b.z); }
33 | __device__ static inline vec3f operator-(const vec3f& a, const vec3f& b) { return vec3f(a.x - b.x, a.y - b.y, a.z - b.z); }
34 | __device__ static inline vec3f operator*(const vec3f& a, const vec3f& b) { return vec3f(a.x * b.x, a.y * b.y, a.z * b.z); }
35 | __device__ static inline vec3f operator/(const vec3f& a, const vec3f& b) { return vec3f(a.x / b.x, a.y / b.y, a.z / b.z); }
36 | __device__ static inline vec3f operator-(const vec3f& a) { return vec3f(-a.x, -a.y, -a.z); }
37 |
38 | __device__ static inline float sum(vec3f a)
39 | {
40 | return a.x + a.y + a.z;
41 | }
42 |
43 | __device__ static inline vec3f cross(vec3f a, vec3f b)
44 | {
45 | vec3f out;
46 | out.x = a.y * b.z - a.z * b.y;
47 | out.y = a.z * b.x - a.x * b.z;
48 | out.z = a.x * b.y - a.y * b.x;
49 | return out;
50 | }
51 |
52 | __device__ static inline void bwdCross(vec3f a, vec3f b, vec3f &d_a, vec3f &d_b, vec3f d_out)
53 | {
54 | d_a.x += d_out.z * b.y - d_out.y * b.z;
55 | d_a.y += d_out.x * b.z - d_out.z * b.x;
56 | d_a.z += d_out.y * b.x - d_out.x * b.y;
57 |
58 | d_b.x += d_out.y * a.z - d_out.z * a.y;
59 | d_b.y += d_out.z * a.x - d_out.x * a.z;
60 | d_b.z += d_out.x * a.y - d_out.y * a.x;
61 | }
62 |
63 | __device__ static inline float dot(vec3f a, vec3f b)
64 | {
65 | return a.x * b.x + a.y * b.y + a.z * b.z;
66 | }
67 |
68 | __device__ static inline void bwdDot(vec3f a, vec3f b, vec3f& d_a, vec3f& d_b, float d_out)
69 | {
70 | d_a.x += d_out * b.x; d_a.y += d_out * b.y; d_a.z += d_out * b.z;
71 | d_b.x += d_out * a.x; d_b.y += d_out * a.y; d_b.z += d_out * a.z;
72 | }
73 |
74 | __device__ static inline vec3f reflect(vec3f x, vec3f n)
75 | {
76 | return n * 2.0f * dot(n, x) - x;
77 | }
78 |
79 | __device__ static inline void bwdReflect(vec3f x, vec3f n, vec3f& d_x, vec3f& d_n, const vec3f d_out)
80 | {
81 | d_x.x += d_out.x * (2 * n.x * n.x - 1) + d_out.y * (2 * n.x * n.y) + d_out.z * (2 * n.x * n.z);
82 | d_x.y += d_out.x * (2 * n.x * n.y) + d_out.y * (2 * n.y * n.y - 1) + d_out.z * (2 * n.y * n.z);
83 | d_x.z += d_out.x * (2 * n.x * n.z) + d_out.y * (2 * n.y * n.z) + d_out.z * (2 * n.z * n.z - 1);
84 |
85 | d_n.x += d_out.x * (2 * (2 * n.x * x.x + n.y * x.y + n.z * x.z)) + d_out.y * (2 * n.y * x.x) + d_out.z * (2 * n.z * x.x);
86 | d_n.y += d_out.x * (2 * n.x * x.y) + d_out.y * (2 * (n.x * x.x + 2 * n.y * x.y + n.z * x.z)) + d_out.z * (2 * n.z * x.y);
87 | d_n.z += d_out.x * (2 * n.x * x.z) + d_out.y * (2 * n.y * x.z) + d_out.z * (2 * (n.x * x.x + n.y * x.y + 2 * n.z * x.z));
88 | }
89 |
90 | __device__ static inline vec3f safeNormalize(vec3f v)
91 | {
92 | float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
93 | return l > 0.0f ? (v / l) : vec3f(0.0f);
94 | }
95 |
96 | __device__ static inline void bwdSafeNormalize(const vec3f v, vec3f& d_v, const vec3f d_out)
97 | {
98 |
99 | float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
100 | if (l > 0.0f)
101 | {
102 | float fac = 1.0 / powf(v.x * v.x + v.y * v.y + v.z * v.z, 1.5f);
103 | d_v.x += (d_out.x * (v.y * v.y + v.z * v.z) - d_out.y * (v.x * v.y) - d_out.z * (v.x * v.z)) * fac;
104 | d_v.y += (d_out.y * (v.x * v.x + v.z * v.z) - d_out.x * (v.y * v.x) - d_out.z * (v.y * v.z)) * fac;
105 | d_v.z += (d_out.z * (v.x * v.x + v.y * v.y) - d_out.x * (v.z * v.x) - d_out.y * (v.z * v.y)) * fac;
106 | }
107 | }
108 |
109 | #endif
--------------------------------------------------------------------------------
/render/renderutils/c_src/vec4f.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | *
4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
5 | * property and proprietary rights in and to this material, related
6 | * documentation and any modifications thereto. Any use, reproduction,
7 | * disclosure or distribution of this material and related documentation
8 | * without an express license agreement from NVIDIA CORPORATION or
9 | * its affiliates is strictly prohibited.
10 | */
11 |
12 | #pragma once
13 |
14 | struct vec4f
15 | {
16 | float x, y, z, w;
17 |
18 | #ifdef __CUDACC__
19 | __device__ vec4f() { }
20 | __device__ vec4f(float v) { x = v; y = v; z = v; w = v; }
21 | __device__ vec4f(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; }
22 | __device__ vec4f(float4 v) { x = v.x; y = v.y; z = v.z; w = v.w; }
23 | #endif
24 | };
25 |
26 |
--------------------------------------------------------------------------------
/render/renderutils/loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
4 | # property and proprietary rights in and to this material, related
5 | # documentation and any modifications thereto. Any use, reproduction,
6 | # disclosure or distribution of this material and related documentation
7 | # without an express license agreement from NVIDIA CORPORATION or
8 | # its affiliates is strictly prohibited.
9 |
10 | import torch
11 |
12 | #----------------------------------------------------------------------------
13 | # HDR image losses
14 | #----------------------------------------------------------------------------
15 |
16 | def _tonemap_srgb(f, exposure=5):
17 | f = f * exposure
18 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
19 |
20 | def _SMAPE(img, target, eps=0.01):
21 | nom = torch.abs(img - target)
22 | denom = torch.abs(img) + torch.abs(target) + 0.01
23 | return torch.mean(nom / denom)
24 |
25 | def _RELMSE(img, target, eps=0.1):
26 | nom = (img - target) * (img - target)
27 | denom = img * img + target * target + 0.1
28 | return torch.mean(nom / denom)
29 |
30 | def image_loss_fn(img, target, loss, tonemapper):
31 | if tonemapper == 'log_srgb':
32 | img = _tonemap_srgb(torch.log(torch.clamp(img, min=0, max=65535) + 1))
33 | target = _tonemap_srgb(torch.log(torch.clamp(target, min=0, max=65535) + 1))
34 |
35 | if loss == 'mse':
36 | return torch.nn.functional.mse_loss(img, target)
37 | elif loss == 'smape':
38 | return _SMAPE(img, target)
39 | elif loss == 'relmse':
40 | return _RELMSE(img, target)
41 | else:
42 | return torch.nn.functional.l1_loss(img, target)
43 |
--------------------------------------------------------------------------------
/render/renderutils/tests/test_loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
4 | # property and proprietary rights in and to this material, related
5 | # documentation and any modifications thereto. Any use, reproduction,
6 | # disclosure or distribution of this material and related documentation
7 | # without an express license agreement from NVIDIA CORPORATION or
8 | # its affiliates is strictly prohibited.
9 |
10 | import torch
11 |
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 |
17 | RES = 8
18 | DTYPE = torch.float32
19 |
20 | def tonemap_srgb(f):
21 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
22 |
23 | def l1(output, target):
24 | x = torch.clamp(output, min=0, max=65535)
25 | r = torch.clamp(target, min=0, max=65535)
26 | x = tonemap_srgb(torch.log(x + 1))
27 | r = tonemap_srgb(torch.log(r + 1))
28 | return torch.nn.functional.l1_loss(x,r)
29 |
30 | def relative_loss(name, ref, cuda):
31 | ref = ref.float()
32 | cuda = cuda.float()
33 | print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item())
34 |
35 | def test_loss(loss, tonemapper):
36 | img_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
37 | img_ref = img_cuda.clone().detach().requires_grad_(True)
38 | target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
39 | target_ref = target_cuda.clone().detach().requires_grad_(True)
40 |
41 | ref_loss = ru.image_loss(img_ref, target_ref, loss=loss, tonemapper=tonemapper, use_python=True)
42 | ref_loss.backward()
43 |
44 | cuda_loss = ru.image_loss(img_cuda, target_cuda, loss=loss, tonemapper=tonemapper)
45 | cuda_loss.backward()
46 |
47 | print("-------------------------------------------------------------")
48 | print(" Loss: %s, %s" % (loss, tonemapper))
49 | print("-------------------------------------------------------------")
50 |
51 | relative_loss("res:", ref_loss, cuda_loss)
52 | relative_loss("img:", img_ref.grad, img_cuda.grad)
53 | relative_loss("target:", target_ref.grad, target_cuda.grad)
54 |
55 |
56 | test_loss('l1', 'none')
57 | test_loss('l1', 'log_srgb')
58 | test_loss('mse', 'log_srgb')
59 | test_loss('smape', 'none')
60 | test_loss('relmse', 'none')
61 | test_loss('mse', 'none')
--------------------------------------------------------------------------------
/render/renderutils/tests/test_mesh.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
4 | # property and proprietary rights in and to this material, related
5 | # documentation and any modifications thereto. Any use, reproduction,
6 | # disclosure or distribution of this material and related documentation
7 | # without an express license agreement from NVIDIA CORPORATION or
8 | # its affiliates is strictly prohibited.
9 |
10 | import torch
11 |
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 |
17 | BATCH = 8
18 | RES = 1024
19 | DTYPE = torch.float32
20 |
21 | torch.manual_seed(0)
22 |
23 | def tonemap_srgb(f):
24 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
25 |
26 | def l1(output, target):
27 | x = torch.clamp(output, min=0, max=65535)
28 | r = torch.clamp(target, min=0, max=65535)
29 | x = tonemap_srgb(torch.log(x + 1))
30 | r = tonemap_srgb(torch.log(r + 1))
31 | return torch.nn.functional.l1_loss(x,r)
32 |
33 | def relative_loss(name, ref, cuda):
34 | ref = ref.float()
35 | cuda = cuda.float()
36 | print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref)).item())
37 |
38 | def test_xfm_points():
39 | points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
40 | points_ref = points_cuda.clone().detach().requires_grad_(True)
41 | mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False)
42 | mtx_ref = mtx_cuda.clone().detach().requires_grad_(True)
43 | target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True)
44 |
45 | ref_out = ru.xfm_points(points_ref, mtx_ref, use_python=True)
46 | ref_loss = torch.nn.MSELoss()(ref_out, target)
47 | ref_loss.backward()
48 |
49 | cuda_out = ru.xfm_points(points_cuda, mtx_cuda)
50 | cuda_loss = torch.nn.MSELoss()(cuda_out, target)
51 | cuda_loss.backward()
52 |
53 | print("-------------------------------------------------------------")
54 |
55 | relative_loss("res:", ref_out, cuda_out)
56 | relative_loss("points:", points_ref.grad, points_cuda.grad)
57 |
58 | def test_xfm_vectors():
59 | points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
60 | points_ref = points_cuda.clone().detach().requires_grad_(True)
61 | points_cuda_p = points_cuda.clone().detach().requires_grad_(True)
62 | points_ref_p = points_cuda.clone().detach().requires_grad_(True)
63 | mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False)
64 | mtx_ref = mtx_cuda.clone().detach().requires_grad_(True)
65 | target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True)
66 |
67 | ref_out = ru.xfm_vectors(points_ref.contiguous(), mtx_ref, use_python=True)
68 | ref_loss = torch.nn.MSELoss()(ref_out, target[..., 0:3])
69 | ref_loss.backward()
70 |
71 | cuda_out = ru.xfm_vectors(points_cuda.contiguous(), mtx_cuda)
72 | cuda_loss = torch.nn.MSELoss()(cuda_out, target[..., 0:3])
73 | cuda_loss.backward()
74 |
75 | ref_out_p = ru.xfm_points(points_ref_p.contiguous(), mtx_ref, use_python=True)
76 | ref_loss_p = torch.nn.MSELoss()(ref_out_p, target)
77 | ref_loss_p.backward()
78 |
79 | cuda_out_p = ru.xfm_points(points_cuda_p.contiguous(), mtx_cuda)
80 | cuda_loss_p = torch.nn.MSELoss()(cuda_out_p, target)
81 | cuda_loss_p.backward()
82 |
83 | print("-------------------------------------------------------------")
84 |
85 | relative_loss("res:", ref_out, cuda_out)
86 | relative_loss("points:", points_ref.grad, points_cuda.grad)
87 | relative_loss("points_p:", points_ref_p.grad, points_cuda_p.grad)
88 |
89 | test_xfm_points()
90 | test_xfm_vectors()
91 |
--------------------------------------------------------------------------------
/render/renderutils/tests/test_perf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
4 | # property and proprietary rights in and to this material, related
5 | # documentation and any modifications thereto. Any use, reproduction,
6 | # disclosure or distribution of this material and related documentation
7 | # without an express license agreement from NVIDIA CORPORATION or
8 | # its affiliates is strictly prohibited.
9 |
10 | import torch
11 |
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 |
17 | DTYPE=torch.float32
18 |
19 | def test_bsdf(BATCH, RES, ITR):
20 | kd_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
21 | kd_ref = kd_cuda.clone().detach().requires_grad_(True)
22 | arm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
23 | arm_ref = arm_cuda.clone().detach().requires_grad_(True)
24 | pos_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
25 | pos_ref = pos_cuda.clone().detach().requires_grad_(True)
26 | nrm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
27 | nrm_ref = nrm_cuda.clone().detach().requires_grad_(True)
28 | view_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
29 | view_ref = view_cuda.clone().detach().requires_grad_(True)
30 | light_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
31 | light_ref = light_cuda.clone().detach().requires_grad_(True)
32 | target = torch.rand(BATCH, RES, RES, 3, device='cuda')
33 |
34 | start = torch.cuda.Event(enable_timing=True)
35 | end = torch.cuda.Event(enable_timing=True)
36 |
37 | ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
38 |
39 | print("--- Testing: [%d, %d, %d] ---" % (BATCH, RES, RES))
40 |
41 | start.record()
42 | for i in range(ITR):
43 | ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True)
44 | end.record()
45 | torch.cuda.synchronize()
46 | print("Pbr BSDF python:", start.elapsed_time(end))
47 |
48 | start.record()
49 | for i in range(ITR):
50 | cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
51 | end.record()
52 | torch.cuda.synchronize()
53 | print("Pbr BSDF cuda:", start.elapsed_time(end))
54 |
55 | test_bsdf(1, 512, 1000)
56 | test_bsdf(16, 512, 1000)
57 | test_bsdf(1, 2048, 1000)
58 |
--------------------------------------------------------------------------------
/script/__pycache__/connet_face_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/script/__pycache__/connet_face_head.cpython-38.pyc
--------------------------------------------------------------------------------
/script/__pycache__/get_tet_smpl.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/script/__pycache__/get_tet_smpl.cpython-38.pyc
--------------------------------------------------------------------------------
/script/__pycache__/process_body_cloth_head_msdfcut.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/script/__pycache__/process_body_cloth_head_msdfcut.cpython-38.pyc
--------------------------------------------------------------------------------
/script/get_tet_smpl.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import pyvista as pv
4 | import pytetwild
5 |
6 | import numpy as np
7 | import tetgen
8 |
9 | def get_tet_mesh(mesh_path, save_npz_path):
10 |
11 | surface_mesh = pv.read(mesh_path)
12 | tet = tetgen.TetGen(surface_mesh)
13 | tet.make_manifold(verbose=True)
14 | tet_grid_volume=6e-3
15 | vertices, indices = tet.tetrahedralize( fixedvolume=1,
16 | maxvolume=tet_grid_volume,
17 | regionattrib=1,
18 | nobisect=False, steinerleft=-1, order=1, metric=1, meditview=1, nonodewritten=0, verbose=2)
19 | # shell = tet.grid.extract_surface()
20 | # tet_path = "tet_smpl2.obj"
21 | # vertices = vertices.to(np.float64)
22 | vertices = vertices.astype(np.float32)
23 | tet_path = save_npz_path.replace("npz", "obj")
24 | save_tet_mesh_as_obj(vertices, indices, tet_path)
25 | np.savez(save_npz_path, v=vertices, f=indices)
26 |
27 | return vertices, indices
28 |
29 |
30 | def get_tet_mesh_test(mesh_path, save_npz_path):
31 |
32 | surface_mesh = pv.read(mesh_path)
33 | tetrahedral_mesh = pytetwild.tetrahedralize_pv(surface_mesh, edge_length_fac=0.1)
34 | tetrahedral_mesh.explode(1).plot(show_edges=True)
35 |
36 | v = tetrahedral_mesh.points
37 | f = tetrahedral_mesh.cells.reshape(-1, 5)[:, 1:]
38 |
39 | np.savez(save_npz_path, v=v, f=f)
40 |
41 | return v, f
42 |
43 |
44 | def save_tet_mesh_as_obj(vertices, tetrahedra, filename):
45 | with open(filename, 'w') as f:
46 | for vertex in vertices:
47 | f.write(f"v {vertex[0]} {vertex[1]} {vertex[2]}\n")
48 |
49 | if tetrahedra is not None:
50 | for tet in tetrahedra:
51 | f.write(f"f {tet[0]+1} {tet[1]+1} {tet[2]+1} {tet[3]+1}\n")
52 |
53 |
--------------------------------------------------------------------------------
/ssim_loss.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (C) 2023, Inria
3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
4 | # All rights reserved.
5 | #
6 | # This software is free for non-commercial, research and evaluation use
7 | # under the terms of the LICENSE.md file.
8 | #
9 | # For inquiries contact george.drettakis@inria.fr
10 | #
11 |
12 | import torch
13 | import torch.nn.functional as F
14 | from torch.autograd import Variable
15 | from math import exp
16 |
17 | def l1_loss(network_output, gt):
18 | return torch.abs((network_output - gt)).mean()
19 |
20 | def l2_loss(network_output, gt):
21 | return ((network_output - gt) ** 2).mean()
22 |
23 | def gaussian(window_size, sigma):
24 | gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
25 | return gauss / gauss.sum()
26 |
27 | def create_window(window_size, channel):
28 | _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
29 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
30 | window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
31 | return window
32 |
33 | def ssim(img1, img2, window_size=11, size_average=True):
34 | channel = img1.size(-3)
35 | window = create_window(window_size, channel)
36 |
37 | if img1.is_cuda:
38 | window = window.cuda(img1.get_device())
39 | window = window.type_as(img1)
40 |
41 | return _ssim(img1, img2, window, window_size, channel, size_average)
42 |
43 | def _ssim(img1, img2, window, window_size, channel, size_average=True):
44 | mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
45 | mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
46 |
47 | mu1_sq = mu1.pow(2)
48 | mu2_sq = mu2.pow(2)
49 | mu1_mu2 = mu1 * mu2
50 |
51 | sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
52 | sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
53 | sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
54 |
55 | C1 = 0.01 ** 2
56 | C2 = 0.03 ** 2
57 |
58 | ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
59 |
60 | if size_average:
61 | return ssim_map.mean()
62 | else:
63 | return ssim_map.mean(1).mean(1).mean(1)
64 |
65 |
--------------------------------------------------------------------------------
/third_parties/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/__init__.py
--------------------------------------------------------------------------------
/third_parties/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/third_parties/lpips/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import absolute_import
3 | from __future__ import division
4 | from __future__ import print_function
5 |
6 | import numpy as np
7 | import torch
8 | # from torch.autograd import Variable
9 |
10 | from lpips.trainer import *
11 | from lpips.lpips import *
12 |
13 | def normalize_tensor(in_feat,eps=1e-10):
14 | norm_factor = torch.sqrt(torch.sum(in_feat**2,dim=1,keepdim=True))
15 | return in_feat/(norm_factor+eps)
16 |
17 | def l2(p0, p1, range=255.):
18 | return .5*np.mean((p0 / range - p1 / range)**2)
19 |
20 | def psnr(p0, p1, peak=255.):
21 | return 10*np.log10(peak**2/np.mean((1.*p0-1.*p1)**2))
22 |
23 | def dssim(p0, p1, range=255.):
24 | from skimage.measure import compare_ssim
25 | return (1 - compare_ssim(p0, p1, data_range=range, multichannel=True)) / 2.
26 |
27 | def rgb2lab(in_img,mean_cent=False):
28 | from skimage import color
29 | img_lab = color.rgb2lab(in_img)
30 | if(mean_cent):
31 | img_lab[:,:,0] = img_lab[:,:,0]-50
32 | return img_lab
33 |
34 | def tensor2np(tensor_obj):
35 | # change dimension of a tensor object into a numpy array
36 | return tensor_obj[0].cpu().float().numpy().transpose((1,2,0))
37 |
38 | def np2tensor(np_obj):
39 | # change dimenion of np array into tensor array
40 | return torch.Tensor(np_obj[:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
41 |
42 | def tensor2tensorlab(image_tensor,to_norm=True,mc_only=False):
43 | # image tensor to lab tensor
44 | from skimage import color
45 |
46 | img = tensor2im(image_tensor)
47 | img_lab = color.rgb2lab(img)
48 | if(mc_only):
49 | img_lab[:,:,0] = img_lab[:,:,0]-50
50 | if(to_norm and not mc_only):
51 | img_lab[:,:,0] = img_lab[:,:,0]-50
52 | img_lab = img_lab/100.
53 |
54 | return np2tensor(img_lab)
55 |
56 | def tensorlab2tensor(lab_tensor,return_inbnd=False):
57 | from skimage import color
58 | import warnings
59 | warnings.filterwarnings("ignore")
60 |
61 | lab = tensor2np(lab_tensor)*100.
62 | lab[:,:,0] = lab[:,:,0]+50
63 |
64 | rgb_back = 255.*np.clip(color.lab2rgb(lab.astype('float')),0,1)
65 | if(return_inbnd):
66 | # convert back to lab, see if we match
67 | lab_back = color.rgb2lab(rgb_back.astype('uint8'))
68 | mask = 1.*np.isclose(lab_back,lab,atol=2.)
69 | mask = np2tensor(np.prod(mask,axis=2)[:,:,np.newaxis])
70 | return (im2tensor(rgb_back),mask)
71 | else:
72 | return im2tensor(rgb_back)
73 |
74 | def load_image(path):
75 | if(path[-3:] == 'dng'):
76 | import rawpy
77 | with rawpy.imread(path) as raw:
78 | img = raw.postprocess()
79 | elif(path[-3:]=='bmp' or path[-3:]=='jpg' or path[-3:]=='png' or path[-4:]=='jpeg'):
80 | import cv2
81 | return cv2.imread(path)[:,:,::-1]
82 | else:
83 | img = (255*plt.imread(path)[:,:,:3]).astype('uint8')
84 |
85 | return img
86 |
87 | def rgb2lab(input):
88 | from skimage import color
89 | return color.rgb2lab(input / 255.)
90 |
91 | def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.):
92 | image_numpy = image_tensor[0].cpu().float().numpy()
93 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
94 | return image_numpy.astype(imtype)
95 |
96 | def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.):
97 | return torch.Tensor((image / factor - cent)
98 | [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
99 |
100 | def tensor2vec(vector_tensor):
101 | return vector_tensor.data.cpu().numpy()[:, :, 0, 0]
102 |
103 |
104 | def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.):
105 | # def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=1.):
106 | image_numpy = image_tensor[0].cpu().float().numpy()
107 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
108 | return image_numpy.astype(imtype)
109 |
110 | def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.):
111 | # def im2tensor(image, imtype=np.uint8, cent=1., factor=1.):
112 | return torch.Tensor((image / factor - cent)
113 | [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
114 |
115 |
116 |
117 | def voc_ap(rec, prec, use_07_metric=False):
118 | """ ap = voc_ap(rec, prec, [use_07_metric])
119 | Compute VOC AP given precision and recall.
120 | If use_07_metric is true, uses the
121 | VOC 07 11 point method (default:False).
122 | """
123 | if use_07_metric:
124 | # 11 point metric
125 | ap = 0.
126 | for t in np.arange(0., 1.1, 0.1):
127 | if np.sum(rec >= t) == 0:
128 | p = 0
129 | else:
130 | p = np.max(prec[rec >= t])
131 | ap = ap + p / 11.
132 | else:
133 | # correct AP calculation
134 | # first append sentinel values at the end
135 | mrec = np.concatenate(([0.], rec, [1.]))
136 | mpre = np.concatenate(([0.], prec, [0.]))
137 |
138 | # compute the precision envelope
139 | for i in range(mpre.size - 1, 0, -1):
140 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
141 |
142 | # to calculate area under PR curve, look for points
143 | # where X axis (recall) changes value
144 | i = np.where(mrec[1:] != mrec[:-1])[0]
145 |
146 | # and sum (\Delta recall) * prec
147 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
148 | return ap
149 |
150 |
--------------------------------------------------------------------------------
/third_parties/lpips/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/third_parties/lpips/pretrained_networks.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 | import torch
3 | from torchvision import models as tv
4 |
5 | class squeezenet(torch.nn.Module):
6 | def __init__(self, requires_grad=False, pretrained=True):
7 | super(squeezenet, self).__init__()
8 | pretrained_features = tv.squeezenet1_1(pretrained=pretrained).features
9 | self.slice1 = torch.nn.Sequential()
10 | self.slice2 = torch.nn.Sequential()
11 | self.slice3 = torch.nn.Sequential()
12 | self.slice4 = torch.nn.Sequential()
13 | self.slice5 = torch.nn.Sequential()
14 | self.slice6 = torch.nn.Sequential()
15 | self.slice7 = torch.nn.Sequential()
16 | self.N_slices = 7
17 | for x in range(2):
18 | self.slice1.add_module(str(x), pretrained_features[x])
19 | for x in range(2,5):
20 | self.slice2.add_module(str(x), pretrained_features[x])
21 | for x in range(5, 8):
22 | self.slice3.add_module(str(x), pretrained_features[x])
23 | for x in range(8, 10):
24 | self.slice4.add_module(str(x), pretrained_features[x])
25 | for x in range(10, 11):
26 | self.slice5.add_module(str(x), pretrained_features[x])
27 | for x in range(11, 12):
28 | self.slice6.add_module(str(x), pretrained_features[x])
29 | for x in range(12, 13):
30 | self.slice7.add_module(str(x), pretrained_features[x])
31 | if not requires_grad:
32 | for param in self.parameters():
33 | param.requires_grad = False
34 |
35 | def forward(self, X):
36 | h = self.slice1(X)
37 | h_relu1 = h
38 | h = self.slice2(h)
39 | h_relu2 = h
40 | h = self.slice3(h)
41 | h_relu3 = h
42 | h = self.slice4(h)
43 | h_relu4 = h
44 | h = self.slice5(h)
45 | h_relu5 = h
46 | h = self.slice6(h)
47 | h_relu6 = h
48 | h = self.slice7(h)
49 | h_relu7 = h
50 | vgg_outputs = namedtuple("SqueezeOutputs", ['relu1','relu2','relu3','relu4','relu5','relu6','relu7'])
51 | out = vgg_outputs(h_relu1,h_relu2,h_relu3,h_relu4,h_relu5,h_relu6,h_relu7)
52 |
53 | return out
54 |
55 |
56 | class alexnet(torch.nn.Module):
57 | def __init__(self, requires_grad=False, pretrained=True):
58 | super(alexnet, self).__init__()
59 | weights = tv.AlexNet_Weights.DEFAULT if pretrained else None
60 | alexnet_pretrained_features = tv.alexnet(weights=weights).features
61 | self.slice1 = torch.nn.Sequential()
62 | self.slice2 = torch.nn.Sequential()
63 | self.slice3 = torch.nn.Sequential()
64 | self.slice4 = torch.nn.Sequential()
65 | self.slice5 = torch.nn.Sequential()
66 | self.N_slices = 5
67 | for x in range(2):
68 | self.slice1.add_module(str(x), alexnet_pretrained_features[x])
69 | for x in range(2, 5):
70 | self.slice2.add_module(str(x), alexnet_pretrained_features[x])
71 | for x in range(5, 8):
72 | self.slice3.add_module(str(x), alexnet_pretrained_features[x])
73 | for x in range(8, 10):
74 | self.slice4.add_module(str(x), alexnet_pretrained_features[x])
75 | for x in range(10, 12):
76 | self.slice5.add_module(str(x), alexnet_pretrained_features[x])
77 | if not requires_grad:
78 | for param in self.parameters():
79 | param.requires_grad = False
80 |
81 | def forward(self, X):
82 | h = self.slice1(X)
83 | h_relu1 = h
84 | h = self.slice2(h)
85 | h_relu2 = h
86 | h = self.slice3(h)
87 | h_relu3 = h
88 | h = self.slice4(h)
89 | h_relu4 = h
90 | h = self.slice5(h)
91 | h_relu5 = h
92 | alexnet_outputs = namedtuple("AlexnetOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5'])
93 | out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5)
94 |
95 | return out
96 |
97 | class vgg16(torch.nn.Module):
98 | def __init__(self, requires_grad=False, pretrained=True):
99 | super(vgg16, self).__init__()
100 | weights = tv.VGG16_Weights.DEFAULT if pretrained else None
101 | vgg_pretrained_features = tv.vgg16(weights=weights).features
102 | self.slice1 = torch.nn.Sequential()
103 | self.slice2 = torch.nn.Sequential()
104 | self.slice3 = torch.nn.Sequential()
105 | self.slice4 = torch.nn.Sequential()
106 | self.slice5 = torch.nn.Sequential()
107 | self.N_slices = 5
108 | for x in range(4):
109 | self.slice1.add_module(str(x), vgg_pretrained_features[x])
110 | for x in range(4, 9):
111 | self.slice2.add_module(str(x), vgg_pretrained_features[x])
112 | for x in range(9, 16):
113 | self.slice3.add_module(str(x), vgg_pretrained_features[x])
114 | for x in range(16, 23):
115 | self.slice4.add_module(str(x), vgg_pretrained_features[x])
116 | for x in range(23, 30):
117 | self.slice5.add_module(str(x), vgg_pretrained_features[x])
118 | if not requires_grad:
119 | for param in self.parameters():
120 | param.requires_grad = False
121 |
122 | def forward(self, X):
123 | h = self.slice1(X)
124 | h_relu1_2 = h
125 | h = self.slice2(h)
126 | h_relu2_2 = h
127 | h = self.slice3(h)
128 | h_relu3_3 = h
129 | h = self.slice4(h)
130 | h_relu4_3 = h
131 | h = self.slice5(h)
132 | h_relu5_3 = h
133 | vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'])
134 | out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
135 |
136 | return out
137 |
138 |
139 |
140 | class resnet(torch.nn.Module):
141 | def __init__(self, requires_grad=False, pretrained=True, num=18):
142 | super(resnet, self).__init__()
143 | if(num==18):
144 | self.net = tv.resnet18(pretrained=pretrained)
145 | elif(num==34):
146 | self.net = tv.resnet34(pretrained=pretrained)
147 | elif(num==50):
148 | self.net = tv.resnet50(pretrained=pretrained)
149 | elif(num==101):
150 | self.net = tv.resnet101(pretrained=pretrained)
151 | elif(num==152):
152 | self.net = tv.resnet152(pretrained=pretrained)
153 | self.N_slices = 5
154 |
155 | self.conv1 = self.net.conv1
156 | self.bn1 = self.net.bn1
157 | self.relu = self.net.relu
158 | self.maxpool = self.net.maxpool
159 | self.layer1 = self.net.layer1
160 | self.layer2 = self.net.layer2
161 | self.layer3 = self.net.layer3
162 | self.layer4 = self.net.layer4
163 |
164 | def forward(self, X):
165 | h = self.conv1(X)
166 | h = self.bn1(h)
167 | h = self.relu(h)
168 | h_relu1 = h
169 | h = self.maxpool(h)
170 | h = self.layer1(h)
171 | h_conv2 = h
172 | h = self.layer2(h)
173 | h_conv3 = h
174 | h = self.layer3(h)
175 | h_conv4 = h
176 | h = self.layer4(h)
177 | h_conv5 = h
178 |
179 | outputs = namedtuple("Outputs", ['relu1','conv2','conv3','conv4','conv5'])
180 | out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5)
181 |
182 | return out
183 |
--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.0/alex.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.0/alex.pth
--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.0/squeeze.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.0/squeeze.pth
--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.0/vgg.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.0/vgg.pth
--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.1/alex.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.1/alex.pth
--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.1/squeeze.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.1/squeeze.pth
--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.1/vgg.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.1/vgg.pth
--------------------------------------------------------------------------------
/third_parties/pytorch3d/__init__.py:
--------------------------------------------------------------------------------
1 | from . import ops
2 |
--------------------------------------------------------------------------------
/third_parties/pytorch3d/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/pytorch3d/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/third_parties/pytorch3d/__pycache__/ops.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/pytorch3d/__pycache__/ops.cpython-38.pyc
--------------------------------------------------------------------------------
/third_parties/pytorch3d/cuda/knn.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Meta Platforms, Inc. and affiliates.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree.
7 | */
8 |
9 | #pragma once
10 | #include
11 | #include
12 | #include "utils/pytorch3d_cutils.h"
13 | #define WITH_CUDA true
14 |
15 | // Compute indices of K nearest neighbors in pointcloud p2 to points
16 | // in pointcloud p1.
17 | //
18 | // Args:
19 | // p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each
20 | // containing P1 points of dimension D.
21 | // p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each
22 | // containing P2 points of dimension D.
23 | // lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud.
24 | // lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud.
25 | // norm: int specifying the norm for the distance (1 for L1, 2 for L2)
26 | // K: int giving the number of nearest points to return.
27 | // version: Integer telling which implementation to use.
28 | //
29 | // Returns:
30 | // p1_neighbor_idx: LongTensor of shape (N, P1, K), where
31 | // p1_neighbor_idx[n, i, k] = j means that the kth nearest
32 | // neighbor to p1[n, i] in the cloud p2[n] is p2[n, j].
33 | // It is padded with zeros so that it can be used easily in a later
34 | // gather() operation.
35 | //
36 | // p1_neighbor_dists: FloatTensor of shape (N, P1, K) containing the squared
37 | // distance from each point p1[n, p, :] to its K neighbors
38 | // p2[n, p1_neighbor_idx[n, p, k], :].
39 |
40 | // CPU implementation.
41 | std::tuple KNearestNeighborIdxCpu(
42 | const at::Tensor& p1,
43 | const at::Tensor& p2,
44 | const at::Tensor& lengths1,
45 | const at::Tensor& lengths2,
46 | const int norm,
47 | const int K);
48 |
49 | // CUDA implementation
50 | std::tuple KNearestNeighborIdxCuda(
51 | const at::Tensor& p1,
52 | const at::Tensor& p2,
53 | const at::Tensor& lengths1,
54 | const at::Tensor& lengths2,
55 | const int norm,
56 | const int K,
57 | const int version);
58 |
59 | // Implementation which is exposed.
60 | std::tuple KNearestNeighborIdx(
61 | const at::Tensor& p1,
62 | const at::Tensor& p2,
63 | const at::Tensor& lengths1,
64 | const at::Tensor& lengths2,
65 | const int norm,
66 | const int K,
67 | const int version) {
68 | if (p1.is_cuda() || p2.is_cuda()) {
69 | #ifdef WITH_CUDA
70 | CHECK_CUDA(p1);
71 | CHECK_CUDA(p2);
72 | return KNearestNeighborIdxCuda(
73 | p1, p2, lengths1, lengths2, norm, K, version);
74 | #else
75 | AT_ERROR("Not compiled with GPU support.");
76 | #endif
77 | }
78 | return KNearestNeighborIdxCpu(p1, p2, lengths1, lengths2, norm, K);
79 | }
80 |
81 | // Compute gradients with respect to p1 and p2
82 | //
83 | // Args:
84 | // p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each
85 | // containing P1 points of dimension D.
86 | // p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each
87 | // containing P2 points of dimension D.
88 | // lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud.
89 | // lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud.
90 | // p1_neighbor_idx: LongTensor of shape (N, P1, K), where
91 | // p1_neighbor_idx[n, i, k] = j means that the kth nearest
92 | // neighbor to p1[n, i] in the cloud p2[n] is p2[n, j].
93 | // It is padded with zeros so that it can be used easily in a later
94 | // gather() operation. This is computed from the forward pass.
95 | // norm: int specifying the norm for the distance (1 for L1, 2 for L2)
96 | // grad_dists: FLoatTensor of shape (N, P1, K) which contains the input
97 | // gradients.
98 | //
99 | // Returns:
100 | // grad_p1: FloatTensor of shape (N, P1, D) containing the output gradients
101 | // wrt p1.
102 | // grad_p2: FloatTensor of shape (N, P2, D) containing the output gradients
103 | // wrt p2.
104 |
105 | // CPU implementation.
106 | std::tuple KNearestNeighborBackwardCpu(
107 | const at::Tensor& p1,
108 | const at::Tensor& p2,
109 | const at::Tensor& lengths1,
110 | const at::Tensor& lengths2,
111 | const at::Tensor& idxs,
112 | const int norm,
113 | const at::Tensor& grad_dists);
114 |
115 | // CUDA implementation
116 | std::tuple KNearestNeighborBackwardCuda(
117 | const at::Tensor& p1,
118 | const at::Tensor& p2,
119 | const at::Tensor& lengths1,
120 | const at::Tensor& lengths2,
121 | const at::Tensor& idxs,
122 | const int norm,
123 | const at::Tensor& grad_dists);
124 |
125 | // Implementation which is exposed.
126 | std::tuple KNearestNeighborBackward(
127 | const at::Tensor& p1,
128 | const at::Tensor& p2,
129 | const at::Tensor& lengths1,
130 | const at::Tensor& lengths2,
131 | const at::Tensor& idxs,
132 | const int norm,
133 | const at::Tensor& grad_dists) {
134 | if (p1.is_cuda() || p2.is_cuda()) {
135 | #ifdef WITH_CUDA
136 | CHECK_CUDA(p1);
137 | CHECK_CUDA(p2);
138 | return KNearestNeighborBackwardCuda(
139 | p1, p2, lengths1, lengths2, idxs, norm, grad_dists);
140 | #else
141 | AT_ERROR("Not compiled with GPU support.");
142 | #endif
143 | }
144 | return KNearestNeighborBackwardCpu(
145 | p1, p2, lengths1, lengths2, idxs, norm, grad_dists);
146 | }
147 |
148 | // Utility to check whether a KNN version can be used.
149 | //
150 | // Args:
151 | // version: Integer in the range 0 <= version <= 3 indicating one of our
152 | // KNN implementations.
153 | // D: Number of dimensions for the input and query point clouds
154 | // K: Number of neighbors to be found
155 | //
156 | // Returns:
157 | // Whether the indicated KNN version can be used.
158 | bool KnnCheckVersion(int version, const int64_t D, const int64_t K);
159 |
160 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
161 | m.def("knn_points_idx", &KNearestNeighborIdx);
162 | m.def("knn_points_backward", &KNearestNeighborBackward);
163 | }
164 |
--------------------------------------------------------------------------------
/third_parties/pytorch3d/cuda/knn_cpu.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Meta Platforms, Inc. and affiliates.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree.
7 | */
8 |
9 | #include
10 | #include
11 | #include
12 |
13 | std::tuple KNearestNeighborIdxCpu(
14 | const at::Tensor& p1,
15 | const at::Tensor& p2,
16 | const at::Tensor& lengths1,
17 | const at::Tensor& lengths2,
18 | const int norm,
19 | const int K) {
20 | const int N = p1.size(0);
21 | const int P1 = p1.size(1);
22 | const int D = p1.size(2);
23 |
24 | auto long_opts = lengths1.options().dtype(torch::kInt64);
25 | torch::Tensor idxs = torch::full({N, P1, K}, 0, long_opts);
26 | torch::Tensor dists = torch::full({N, P1, K}, 0, p1.options());
27 |
28 | auto p1_a = p1.accessor();
29 | auto p2_a = p2.accessor();
30 | auto lengths1_a = lengths1.accessor();
31 | auto lengths2_a = lengths2.accessor();
32 | auto idxs_a = idxs.accessor();
33 | auto dists_a = dists.accessor();
34 |
35 | for (int n = 0; n < N; ++n) {
36 | const int64_t length1 = lengths1_a[n];
37 | const int64_t length2 = lengths2_a[n];
38 | for (int64_t i1 = 0; i1 < length1; ++i1) {
39 | // Use a priority queue to store (distance, index) tuples.
40 | std::priority_queue> q;
41 | for (int64_t i2 = 0; i2 < length2; ++i2) {
42 | float dist = 0;
43 | for (int d = 0; d < D; ++d) {
44 | float diff = p1_a[n][i1][d] - p2_a[n][i2][d];
45 | if (norm == 1) {
46 | dist += abs(diff);
47 | } else { // norm is 2 (default)
48 | dist += diff * diff;
49 | }
50 | }
51 | int size = static_cast(q.size());
52 | if (size < K || dist < std::get<0>(q.top())) {
53 | q.emplace(dist, i2);
54 | if (size >= K) {
55 | q.pop();
56 | }
57 | }
58 | }
59 | while (!q.empty()) {
60 | auto t = q.top();
61 | q.pop();
62 | const int k = q.size();
63 | dists_a[n][i1][k] = std::get<0>(t);
64 | idxs_a[n][i1][k] = std::get<1>(t);
65 | }
66 | }
67 | }
68 | return std::make_tuple(idxs, dists);
69 | }
70 |
71 | // ------------------------------------------------------------- //
72 | // Backward Operators //
73 | // ------------------------------------------------------------- //
74 |
75 | std::tuple KNearestNeighborBackwardCpu(
76 | const at::Tensor& p1,
77 | const at::Tensor& p2,
78 | const at::Tensor& lengths1,
79 | const at::Tensor& lengths2,
80 | const at::Tensor& idxs,
81 | const int norm,
82 | const at::Tensor& grad_dists) {
83 | const int N = p1.size(0);
84 | const int P1 = p1.size(1);
85 | const int D = p1.size(2);
86 | const int P2 = p2.size(1);
87 | const int K = idxs.size(2);
88 |
89 | torch::Tensor grad_p1 = torch::full({N, P1, D}, 0, p1.options());
90 | torch::Tensor grad_p2 = torch::full({N, P2, D}, 0, p2.options());
91 |
92 | auto p1_a = p1.accessor();
93 | auto p2_a = p2.accessor();
94 | auto lengths1_a = lengths1.accessor();
95 | auto lengths2_a = lengths2.accessor();
96 | auto idxs_a = idxs.accessor();
97 | auto grad_dists_a = grad_dists.accessor();
98 | auto grad_p1_a = grad_p1.accessor();
99 | auto grad_p2_a = grad_p2.accessor();
100 |
101 | for (int n = 0; n < N; ++n) {
102 | const int64_t length1 = lengths1_a[n];
103 | int64_t length2 = lengths2_a[n];
104 | length2 = (length2 < K) ? length2 : K;
105 | for (int64_t i1 = 0; i1 < length1; ++i1) {
106 | for (int64_t k = 0; k < length2; ++k) {
107 | const int64_t i2 = idxs_a[n][i1][k];
108 | // If the index is the pad value of -1 then ignore it
109 | if (i2 == -1) {
110 | continue;
111 | }
112 | for (int64_t d = 0; d < D; ++d) {
113 | float diff = 0.0;
114 | if (norm == 1) {
115 | float sign = (p1_a[n][i1][d] > p2_a[n][i2][d]) ? 1.0 : -1.0;
116 | diff = grad_dists_a[n][i1][k] * sign;
117 | } else { // norm is 2 (default)
118 | diff = 2.0f * grad_dists_a[n][i1][k] *
119 | (p1_a[n][i1][d] - p2_a[n][i2][d]);
120 | }
121 | grad_p1_a[n][i1][d] += diff;
122 | grad_p2_a[n][i2][d] += -1.0f * diff;
123 | }
124 | }
125 | }
126 | }
127 | return std::make_tuple(grad_p1, grad_p2);
128 | }
129 |
--------------------------------------------------------------------------------
/third_parties/pytorch3d/cuda/utils/index_utils.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Meta Platforms, Inc. and affiliates.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree.
7 | */
8 |
9 | // This converts dynamic array lookups into static array lookups, for small
10 | // arrays up to size 32.
11 | //
12 | // Suppose we have a small thread-local array:
13 | //
14 | // float vals[10];
15 | //
16 | // Ideally we should only index this array using static indices:
17 | //
18 | // for (int i = 0; i < 10; ++i) vals[i] = i * i;
19 | //
20 | // If we do so, then the CUDA compiler may be able to place the array into
21 | // registers, which can have a big performance improvement. However if we
22 | // access the array dynamically, the the compiler may force the array into
23 | // local memory, which has the same latency as global memory.
24 | //
25 | // These functions convert dynamic array access into static array access
26 | // using a brute-force lookup table. It can be used like this:
27 | //
28 | // float vals[10];
29 | // int idx = 3;
30 | // float val = 3.14f;
31 | // RegisterIndexUtils::set(vals, idx, val);
32 | // float val2 = RegisterIndexUtils::get(vals, idx);
33 | //
34 | // The implementation is based on fbcuda/RegisterUtils.cuh:
35 | // https://github.com/facebook/fbcuda/blob/master/RegisterUtils.cuh
36 | // To avoid depending on the entire library, we just reimplement these two
37 | // functions. The fbcuda implementation is a bit more sophisticated, and uses
38 | // the preprocessor to generate switch statements that go up to N for each
39 | // value of N. We are lazy and just have a giant explicit switch statement.
40 | //
41 | // We might be able to use a template metaprogramming approach similar to
42 | // DispatchKernel1D for this. However DispatchKernel1D is intended to be used
43 | // for dispatching to the correct CUDA kernel on the host, while this is
44 | // is intended to run on the device. I was concerned that a metaprogramming
45 | // approach for this might lead to extra function calls at runtime if the
46 | // compiler fails to optimize them away, which could be very slow on device.
47 | // However I didn't actually benchmark or test this.
48 | template
49 | struct RegisterIndexUtils {
50 | __device__ __forceinline__ static T get(const T arr[N], int idx) {
51 | if (idx < 0 || idx >= N)
52 | return T();
53 | switch (idx) {
54 | case 0:
55 | return arr[0];
56 | case 1:
57 | return arr[1];
58 | case 2:
59 | return arr[2];
60 | case 3:
61 | return arr[3];
62 | case 4:
63 | return arr[4];
64 | case 5:
65 | return arr[5];
66 | case 6:
67 | return arr[6];
68 | case 7:
69 | return arr[7];
70 | case 8:
71 | return arr[8];
72 | case 9:
73 | return arr[9];
74 | case 10:
75 | return arr[10];
76 | case 11:
77 | return arr[11];
78 | case 12:
79 | return arr[12];
80 | case 13:
81 | return arr[13];
82 | case 14:
83 | return arr[14];
84 | case 15:
85 | return arr[15];
86 | case 16:
87 | return arr[16];
88 | case 17:
89 | return arr[17];
90 | case 18:
91 | return arr[18];
92 | case 19:
93 | return arr[19];
94 | case 20:
95 | return arr[20];
96 | case 21:
97 | return arr[21];
98 | case 22:
99 | return arr[22];
100 | case 23:
101 | return arr[23];
102 | case 24:
103 | return arr[24];
104 | case 25:
105 | return arr[25];
106 | case 26:
107 | return arr[26];
108 | case 27:
109 | return arr[27];
110 | case 28:
111 | return arr[28];
112 | case 29:
113 | return arr[29];
114 | case 30:
115 | return arr[30];
116 | case 31:
117 | return arr[31];
118 | };
119 | return T();
120 | }
121 |
122 | __device__ __forceinline__ static void set(T arr[N], int idx, T val) {
123 | if (idx < 0 || idx >= N)
124 | return;
125 | switch (idx) {
126 | case 0:
127 | arr[0] = val;
128 | break;
129 | case 1:
130 | arr[1] = val;
131 | break;
132 | case 2:
133 | arr[2] = val;
134 | break;
135 | case 3:
136 | arr[3] = val;
137 | break;
138 | case 4:
139 | arr[4] = val;
140 | break;
141 | case 5:
142 | arr[5] = val;
143 | break;
144 | case 6:
145 | arr[6] = val;
146 | break;
147 | case 7:
148 | arr[7] = val;
149 | break;
150 | case 8:
151 | arr[8] = val;
152 | break;
153 | case 9:
154 | arr[9] = val;
155 | break;
156 | case 10:
157 | arr[10] = val;
158 | break;
159 | case 11:
160 | arr[11] = val;
161 | break;
162 | case 12:
163 | arr[12] = val;
164 | break;
165 | case 13:
166 | arr[13] = val;
167 | break;
168 | case 14:
169 | arr[14] = val;
170 | break;
171 | case 15:
172 | arr[15] = val;
173 | break;
174 | case 16:
175 | arr[16] = val;
176 | break;
177 | case 17:
178 | arr[17] = val;
179 | break;
180 | case 18:
181 | arr[18] = val;
182 | break;
183 | case 19:
184 | arr[19] = val;
185 | break;
186 | case 20:
187 | arr[20] = val;
188 | break;
189 | case 21:
190 | arr[21] = val;
191 | break;
192 | case 22:
193 | arr[22] = val;
194 | break;
195 | case 23:
196 | arr[23] = val;
197 | break;
198 | case 24:
199 | arr[24] = val;
200 | break;
201 | case 25:
202 | arr[25] = val;
203 | break;
204 | case 26:
205 | arr[26] = val;
206 | break;
207 | case 27:
208 | arr[27] = val;
209 | break;
210 | case 28:
211 | arr[28] = val;
212 | break;
213 | case 29:
214 | arr[29] = val;
215 | break;
216 | case 30:
217 | arr[30] = val;
218 | break;
219 | case 31:
220 | arr[31] = val;
221 | break;
222 | }
223 | }
224 | };
225 |
--------------------------------------------------------------------------------
/third_parties/pytorch3d/cuda/utils/mink.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Meta Platforms, Inc. and affiliates.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree.
7 | */
8 |
9 | #pragma once
10 | #define MINK_H
11 |
12 | #include "index_utils.cuh"
13 |
14 | // A data structure to keep track of the smallest K keys seen so far as well
15 | // as their associated values, intended to be used in device code.
16 | // This data structure doesn't allocate any memory; keys and values are stored
17 | // in arrays passed to the constructor.
18 | //
19 | // The implementation is generic; it can be used for any key type that supports
20 | // the < operator, and can be used with any value type.
21 | //
22 | // Example usage:
23 | //
24 | // float keys[K];
25 | // int values[K];
26 | // MinK mink(keys, values, K);
27 | // for (...) {
28 | // // Produce some key and value from somewhere
29 | // mink.add(key, value);
30 | // }
31 | // mink.sort();
32 | //
33 | // Now keys and values store the smallest K keys seen so far and the values
34 | // associated to these keys:
35 | //
36 | // for (int k = 0; k < K; ++k) {
37 | // float key_k = keys[k];
38 | // int value_k = values[k];
39 | // }
40 | template
41 | class MinK {
42 | public:
43 | // Constructor.
44 | //
45 | // Arguments:
46 | // keys: Array in which to store keys
47 | // values: Array in which to store values
48 | // K: How many values to keep track of
49 | __device__ MinK(key_t* keys, value_t* vals, int K)
50 | : keys(keys), vals(vals), K(K), _size(0) {}
51 |
52 | // Try to add a new key and associated value to the data structure. If the key
53 | // is one of the smallest K seen so far then it will be kept; otherwise it
54 | // it will not be kept.
55 | //
56 | // This takes O(1) operations if the new key is not kept, or if the structure
57 | // currently contains fewer than K elements. Otherwise this takes O(K) time.
58 | //
59 | // Arguments:
60 | // key: The key to add
61 | // val: The value associated to the key
62 | __device__ __forceinline__ void add(const key_t& key, const value_t& val) {
63 | if (_size < K) {
64 | keys[_size] = key;
65 | vals[_size] = val;
66 | if (_size == 0 || key > max_key) {
67 | max_key = key;
68 | max_idx = _size;
69 | }
70 | _size++;
71 | } else if (key < max_key) {
72 | keys[max_idx] = key;
73 | vals[max_idx] = val;
74 | max_key = key;
75 | for (int k = 0; k < K; ++k) {
76 | key_t cur_key = keys[k];
77 | if (cur_key > max_key) {
78 | max_key = cur_key;
79 | max_idx = k;
80 | }
81 | }
82 | }
83 | }
84 |
85 | // Get the number of items currently stored in the structure.
86 | // This takes O(1) time.
87 | __device__ __forceinline__ int size() {
88 | return _size;
89 | }
90 |
91 | // Sort the items stored in the structure using bubble sort.
92 | // This takes O(K^2) time.
93 | __device__ __forceinline__ void sort() {
94 | for (int i = 0; i < _size - 1; ++i) {
95 | for (int j = 0; j < _size - i - 1; ++j) {
96 | if (keys[j + 1] < keys[j]) {
97 | key_t key = keys[j];
98 | value_t val = vals[j];
99 | keys[j] = keys[j + 1];
100 | vals[j] = vals[j + 1];
101 | keys[j + 1] = key;
102 | vals[j + 1] = val;
103 | }
104 | }
105 | }
106 | }
107 |
108 | private:
109 | key_t* keys;
110 | value_t* vals;
111 | int K;
112 | int _size;
113 | key_t max_key;
114 | int max_idx;
115 | };
116 |
117 | // This is a version of MinK that only touches the arrays using static indexing
118 | // via RegisterIndexUtils. If the keys and values are stored in thread-local
119 | // arrays, then this may allow the compiler to place them in registers for
120 | // fast access.
121 | //
122 | // This has the same API as RegisterMinK, but doesn't support sorting.
123 | // We found that sorting via RegisterIndexUtils gave very poor performance,
124 | // and suspect it may have prevented the compiler from placing the arrays
125 | // into registers.
126 | template
127 | class RegisterMinK {
128 | public:
129 | __device__ RegisterMinK(key_t* keys, value_t* vals)
130 | : keys(keys), vals(vals), _size(0) {}
131 |
132 | __device__ __forceinline__ void add(const key_t& key, const value_t& val) {
133 | if (_size < K) {
134 | RegisterIndexUtils::set(keys, _size, key);
135 | RegisterIndexUtils::set(vals, _size, val);
136 | if (_size == 0 || key > max_key) {
137 | max_key = key;
138 | max_idx = _size;
139 | }
140 | _size++;
141 | } else if (key < max_key) {
142 | RegisterIndexUtils::set(keys, max_idx, key);
143 | RegisterIndexUtils::set(vals, max_idx, val);
144 | max_key = key;
145 | for (int k = 0; k < K; ++k) {
146 | key_t cur_key = RegisterIndexUtils::get(keys, k);
147 | if (cur_key > max_key) {
148 | max_key = cur_key;
149 | max_idx = k;
150 | }
151 | }
152 | }
153 | }
154 |
155 | __device__ __forceinline__ int size() {
156 | return _size;
157 | }
158 |
159 | private:
160 | key_t* keys;
161 | value_t* vals;
162 | int _size;
163 | key_t max_key;
164 | int max_idx;
165 | };
166 |
--------------------------------------------------------------------------------
/third_parties/pytorch3d/cuda/utils/pytorch3d_cutils.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) Meta Platforms, Inc. and affiliates.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under the BSD-style license found in the
6 | * LICENSE file in the root directory of this source tree.
7 | */
8 |
9 | #pragma once
10 | #include
11 |
12 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor.")
13 | #define CHECK_CONTIGUOUS(x) \
14 | TORCH_CHECK(x.is_contiguous(), #x " must be contiguous.")
15 | #define CHECK_CONTIGUOUS_CUDA(x) \
16 | CHECK_CUDA(x); \
17 | CHECK_CONTIGUOUS(x)
18 |
--------------------------------------------------------------------------------