├── README.md ├── checkpoints ├── init_deform_deform_cond_pe8.pth └── script │ ├── midpoint.mlx │ ├── midpoint_head.mlx │ ├── remesh.mlx │ ├── remesh_bac.mlx │ └── wt.mlx ├── configs └── f3c.json ├── dataset ├── __pycache__ │ ├── dataset.cpython-38.pyc │ └── dataset_split.cpython-38.pyc ├── dataset.py └── dataset_split.py ├── deform ├── __pycache__ │ └── smplx_exavatar_deformer.cpython-38.pyc ├── smplx_exavatar │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── body_models.cpython-38.pyc │ │ ├── lbs.cpython-38.pyc │ │ ├── utils.cpython-38.pyc │ │ ├── vertex_ids.cpython-38.pyc │ │ └── vertex_joint_selector.cpython-38.pyc │ ├── body_models.py │ ├── joint_names.py │ ├── lbs.py │ ├── utils.py │ ├── vertex_ids.py │ └── vertex_joint_selector.py └── smplx_exavatar_deformer.py ├── denoiser ├── __pycache__ │ └── denoiser.cpython-38.pyc └── denoiser.py ├── figs └── pipe.jpg ├── geometry ├── __pycache__ │ ├── embedding.cpython-38.pyc │ ├── gshell_tets.cpython-38.pyc │ ├── hmsdf.cpython-38.pyc │ ├── hmsdf_tets_split.cpython-38.pyc │ └── mlp.cpython-38.pyc ├── embedding.py ├── gshell_tets.py ├── hmsdf.py ├── hmsdf_tets_split.py └── mlp.py ├── lap_loss.py ├── render ├── __pycache__ │ ├── light.cpython-38.pyc │ ├── material.cpython-38.pyc │ ├── mesh.cpython-38.pyc │ ├── mlptexture.cpython-38.pyc │ ├── obj.cpython-38.pyc │ ├── regularizer.cpython-38.pyc │ ├── render.cpython-38.pyc │ ├── render_mask.cpython-38.pyc │ ├── texture.cpython-38.pyc │ └── util.cpython-38.pyc ├── light.py ├── material.py ├── mesh.py ├── mlptexture.py ├── obj.py ├── optixutils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ └── ops.cpython-38.pyc │ ├── build │ │ ├── build.ninja │ │ ├── denoising.cuda.o │ │ ├── optix_wrapper.o │ │ ├── optixutils_plugin.so │ │ └── torch_bindings.o │ ├── c_src │ │ ├── accessor.h │ │ ├── bsdf.h │ │ ├── common.h │ │ ├── denoising.cu │ │ ├── denoising.h │ │ ├── envsampling │ │ │ ├── kernel.cu │ │ │ └── params.h │ │ ├── math_utils.h │ │ ├── optix_wrapper.cpp │ │ ├── optix_wrapper.h │ │ └── torch_bindings.cpp │ ├── include │ │ ├── internal │ │ │ ├── optix_7_device_impl.h │ │ │ ├── optix_7_device_impl_exception.h │ │ │ └── optix_7_device_impl_transformations.h │ │ ├── optix.h │ │ ├── optix_7_device.h │ │ ├── optix_7_host.h │ │ ├── optix_7_types.h │ │ ├── optix_denoiser_tiling.h │ │ ├── optix_device.h │ │ ├── optix_function_table.h │ │ ├── optix_function_table_definition.h │ │ ├── optix_host.h │ │ ├── optix_stack_size.h │ │ ├── optix_stubs.h │ │ └── optix_types.h │ ├── ops.py │ └── tests │ │ └── filter_test.py ├── regularizer.py ├── render.py ├── render_mask.py ├── renderutils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── bsdf.cpython-38.pyc │ │ ├── loss.cpython-38.pyc │ │ └── ops.cpython-38.pyc │ ├── bsdf.py │ ├── build │ │ ├── bsdf.cuda.o │ │ ├── build.ninja │ │ ├── common.o │ │ ├── cubemap.cuda.o │ │ ├── loss.cuda.o │ │ ├── mesh.cuda.o │ │ ├── normal.cuda.o │ │ ├── renderutils_plugin.so │ │ └── torch_bindings.o │ ├── c_src │ │ ├── bsdf.cu │ │ ├── bsdf.h │ │ ├── common.cpp │ │ ├── common.h │ │ ├── cubemap.cu │ │ ├── cubemap.h │ │ ├── loss.cu │ │ ├── loss.h │ │ ├── mesh.cu │ │ ├── mesh.h │ │ ├── normal.cu │ │ ├── normal.h │ │ ├── tensor.h │ │ ├── torch_bindings.cpp │ │ ├── vec3f.h │ │ └── vec4f.h │ ├── loss.py │ ├── ops.py │ └── tests │ │ ├── test_bsdf.py │ │ ├── test_loss.py │ │ ├── test_mesh.py │ │ └── test_perf.py ├── texture.py └── util.py ├── script ├── __pycache__ │ ├── connet_face_head.cpython-38.pyc │ ├── get_tet_smpl.cpython-38.pyc │ └── process_body_cloth_head_msdfcut.cpython-38.pyc ├── connet_face_head.py ├── get_tet_smpl.py └── process_body_cloth_head_msdfcut.py ├── ssim_loss.py ├── third_parties ├── __init__.py ├── __pycache__ │ └── __init__.cpython-38.pyc ├── lpips │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-38.pyc │ ├── lpips.py │ ├── pretrained_networks.py │ ├── trainer.py │ └── weights │ │ ├── v0.0 │ │ ├── alex.pth │ │ ├── squeeze.pth │ │ └── vgg.pth │ │ └── v0.1 │ │ ├── alex.pth │ │ ├── squeeze.pth │ │ └── vgg.pth └── pytorch3d │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-38.pyc │ └── ops.cpython-38.pyc │ ├── cuda │ ├── knn.cpp │ ├── knn.cu │ ├── knn_cpu.cpp │ └── utils │ │ ├── dispatch.cuh │ │ ├── index_utils.cuh │ │ ├── mink.cuh │ │ └── pytorch3d_cutils.h │ └── ops.py └── train.py /README.md: -------------------------------------------------------------------------------- 1 | # D3-Human: Dynamic Disentangled Digital Human from Monocular Video 2 | 3 | PyTorch implementation of the paper "D3-Human: Dynamic Disentangled Digital Human from Monocular Video". This repository contains the reconstructing code and data. 4 | 5 | **|[Project Page](https://ustc3dv.github.io/D3Human/)|** **|[Paper](https://arxiv.org/html/2501.01589v1)|** 6 | 7 | This method can reconstruct disentangled garment and body geometry from monocular videos. 8 | 9 | ## Pipeline 10 | Neural-ABC is a neural implicit parametric model with latent spaces of human identity, clothing, shape and pose. 11 | It can generate various human identities and different clothes. 12 | The clothed human body can deform into different body shapes and poses. 13 | 14 | ![pipeline](figs/pipe.jpg) 15 | 16 | ## Setup 17 | 18 | This code has been tested on Tesla V100. 19 | 20 | Environment: 21 | * Ubuntu 20.04 22 | * python 3.8.19 23 | 24 | Run the following: 25 | ``` 26 | pip install ninja imageio PyOpenGL glfw xatlas gdown 27 | pip install git+https://github.com/NVlabs/nvdiffrast/ 28 | pip install --global-option="--no-networks" git+https://github.com/NVlabs/tiny-cuda-nn#subdirectory=bindings/torch 29 | ``` 30 | 31 | Download the female SMPL-X model from https://smpl-x.is.tue.mpg.de/ and place them in the folder of `./smplx`. 32 | 33 | Download the preprocess data from [here](https://drive.google.com/drive/folders/1-OY5X7pnt45XBMURVTM55xhOrKKUi7BX?usp=sharing) and place it in the folder of `./data`. 34 | 35 | ## Reconstruction 36 | 37 | Use the following code to reconstruct: 38 | 39 | ``` 40 | CUDA_VISIBLE_DEVICES=0 python train.py -o res/f3c --folder_name female-3-casual --config configs/f3c.json 41 | ``` 42 | 43 | 44 | ## Dataset Preparation 45 | If you wish to reconstruct your own monocular video, you can use ​​[ExAvatar​​](https://github.com/mks0601/ExAvatar_RELEASE) to obtain SMPL-X coefficients and camera parameters, ​​[Sapiens](https://github.com/facebookresearch/sapiens) to obtain normals, and ​​[SAM2](https://github.com/facebookresearch/sam2) to obtain masks for garments, the body, and the fully clothed human. 46 | 47 | ## Notes 48 | If MeshLab cannot be executed from the command line, you can manually perform remeshing and watertight processing within the software. 49 | 50 | ## Citation 51 | 52 | If you find our paper useful for your work please cite: 53 | 54 | ``` 55 | @article{Chen2024D3human, 56 | author = {Honghu, Chen and Bo, Peng and Yunfan, Tao and Juyong, Zhang}, 57 | title = {D$^3$-Human: Dynamic Disentangled Digital Human from Monocular Video}, 58 | journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 59 | year={2025} 60 | } 61 | ``` 62 | 63 | ## Contact 64 | For more questions, please contact honghuc@mail.ustc.edu.cn 65 | 66 | ## Acknowledgement 67 | 68 | Our data is processed with the help of [G-Shell](https://github.com/CrisHY1995/StereoPIFu_Code): 69 | ``` 70 | @inproceedings{Liu2024gshell, 71 | title={Ghost on the Shell: An Expressive Representation of General 3D Shapes}, 72 | author={Liu, Zhen and Feng, Yao and Xiu, Yuliang and Liu, Weiyang 73 | and Paull, Liam and Black, Michael J and Sch{\"o}lkopf, Bernhard}, 74 | booktitle={ICLR}, 75 | year={2024} 76 | } 77 | 78 | ``` 79 | -------------------------------------------------------------------------------- /checkpoints/init_deform_deform_cond_pe8.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/checkpoints/init_deform_deform_cond_pe8.pth -------------------------------------------------------------------------------- /checkpoints/script/midpoint.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /checkpoints/script/midpoint_head.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /checkpoints/script/remesh.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /checkpoints/script/remesh_bac.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /checkpoints/script/wt.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /configs/f3c.json: -------------------------------------------------------------------------------- 1 | { 2 | "ref_mesh": "data/spot/spot.obj", 3 | "random_textures": true, 4 | "iter": 10000, 5 | "save_interval": 100, 6 | "save_interval_fine": 100, 7 | "texture_res": [ 1080, 1080 ], 8 | "train_res": [1080, 1080], 9 | "batch": 1, 10 | "learning_rate": [0.03, 0.005], 11 | "ks_min" : [0, 0.001, 0.0], 12 | "ks_max" : [0, 1.0, 1.0], 13 | "lock_pos" : false, 14 | "display": [{"latlong" : true}], 15 | "background" : "white", 16 | "denoiser": "bilateral", 17 | "n_samples" : 24, 18 | "env_scale" : 2.0, 19 | "gshell_grid" : 128, 20 | "validate" : true, 21 | "laplace_scale" : 6000, 22 | "boxscale": [1, 1, 1], 23 | "aabb": [-1, -1, -1, 1, 1, 1] 24 | } 25 | -------------------------------------------------------------------------------- /dataset/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/dataset/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/dataset_split.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/dataset/__pycache__/dataset_split.cpython-38.pyc -------------------------------------------------------------------------------- /deform/__pycache__/smplx_exavatar_deformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/__pycache__/smplx_exavatar_deformer.cpython-38.pyc -------------------------------------------------------------------------------- /deform/smplx_exavatar/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from .body_models import ( 18 | create, 19 | SMPL, 20 | SMPLH, 21 | SMPLX, 22 | MANO, 23 | FLAME, 24 | build_layer, 25 | SMPLLayer, 26 | SMPLHLayer, 27 | SMPLXLayer, 28 | MANOLayer, 29 | FLAMELayer, 30 | ) 31 | -------------------------------------------------------------------------------- /deform/smplx_exavatar/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /deform/smplx_exavatar/__pycache__/body_models.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/body_models.cpython-38.pyc -------------------------------------------------------------------------------- /deform/smplx_exavatar/__pycache__/lbs.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/lbs.cpython-38.pyc -------------------------------------------------------------------------------- /deform/smplx_exavatar/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /deform/smplx_exavatar/__pycache__/vertex_ids.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/vertex_ids.cpython-38.pyc -------------------------------------------------------------------------------- /deform/smplx_exavatar/__pycache__/vertex_joint_selector.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/vertex_joint_selector.cpython-38.pyc -------------------------------------------------------------------------------- /deform/smplx_exavatar/joint_names.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | import numpy as np 18 | 19 | JOINT_NAMES = [ 20 | "pelvis", 21 | "left_hip", 22 | "right_hip", 23 | "spine1", 24 | "left_knee", 25 | "right_knee", 26 | "spine2", 27 | "left_ankle", 28 | "right_ankle", 29 | "spine3", 30 | "left_foot", 31 | "right_foot", 32 | "neck", 33 | "left_collar", 34 | "right_collar", 35 | "head", 36 | "left_shoulder", 37 | "right_shoulder", 38 | "left_elbow", 39 | "right_elbow", 40 | "left_wrist", 41 | "right_wrist", 42 | "jaw", 43 | "left_eye_smplhf", 44 | "right_eye_smplhf", 45 | "left_index1", 46 | "left_index2", 47 | "left_index3", 48 | "left_middle1", 49 | "left_middle2", 50 | "left_middle3", 51 | "left_pinky1", 52 | "left_pinky2", 53 | "left_pinky3", 54 | "left_ring1", 55 | "left_ring2", 56 | "left_ring3", 57 | "left_thumb1", 58 | "left_thumb2", 59 | "left_thumb3", 60 | "right_index1", 61 | "right_index2", 62 | "right_index3", 63 | "right_middle1", 64 | "right_middle2", 65 | "right_middle3", 66 | "right_pinky1", 67 | "right_pinky2", 68 | "right_pinky3", 69 | "right_ring1", 70 | "right_ring2", 71 | "right_ring3", 72 | "right_thumb1", 73 | "right_thumb2", 74 | "right_thumb3", 75 | "nose", 76 | "right_eye", 77 | "left_eye", 78 | "right_ear", 79 | "left_ear", 80 | "left_big_toe", 81 | "left_small_toe", 82 | "left_heel", 83 | "right_big_toe", 84 | "right_small_toe", 85 | "right_heel", 86 | "left_thumb", 87 | "left_index", 88 | "left_middle", 89 | "left_ring", 90 | "left_pinky", 91 | "right_thumb", 92 | "right_index", 93 | "right_middle", 94 | "right_ring", 95 | "right_pinky", 96 | "right_eye_brow1", 97 | "right_eye_brow2", 98 | "right_eye_brow3", 99 | "right_eye_brow4", 100 | "right_eye_brow5", 101 | "left_eye_brow5", 102 | "left_eye_brow4", 103 | "left_eye_brow3", 104 | "left_eye_brow2", 105 | "left_eye_brow1", 106 | "nose1", 107 | "nose2", 108 | "nose3", 109 | "nose4", 110 | "right_nose_2", 111 | "right_nose_1", 112 | "nose_middle", 113 | "left_nose_1", 114 | "left_nose_2", 115 | "right_eye1", 116 | "right_eye2", 117 | "right_eye3", 118 | "right_eye4", 119 | "right_eye5", 120 | "right_eye6", 121 | "left_eye4", 122 | "left_eye3", 123 | "left_eye2", 124 | "left_eye1", 125 | "left_eye6", 126 | "left_eye5", 127 | "right_mouth_1", 128 | "right_mouth_2", 129 | "right_mouth_3", 130 | "mouth_top", 131 | "left_mouth_3", 132 | "left_mouth_2", 133 | "left_mouth_1", 134 | "left_mouth_5", # 59 in OpenPose output 135 | "left_mouth_4", # 58 in OpenPose output 136 | "mouth_bottom", 137 | "right_mouth_4", 138 | "right_mouth_5", 139 | "right_lip_1", 140 | "right_lip_2", 141 | "lip_top", 142 | "left_lip_2", 143 | "left_lip_1", 144 | "left_lip_3", 145 | "lip_bottom", 146 | "right_lip_3", 147 | # Face contour 148 | "right_contour_1", 149 | "right_contour_2", 150 | "right_contour_3", 151 | "right_contour_4", 152 | "right_contour_5", 153 | "right_contour_6", 154 | "right_contour_7", 155 | "right_contour_8", 156 | "contour_middle", 157 | "left_contour_8", 158 | "left_contour_7", 159 | "left_contour_6", 160 | "left_contour_5", 161 | "left_contour_4", 162 | "left_contour_3", 163 | "left_contour_2", 164 | "left_contour_1", 165 | ] 166 | 167 | 168 | SMPLH_JOINT_NAMES = [ 169 | "pelvis", 170 | "left_hip", 171 | "right_hip", 172 | "spine1", 173 | "left_knee", 174 | "right_knee", 175 | "spine2", 176 | "left_ankle", 177 | "right_ankle", 178 | "spine3", 179 | "left_foot", 180 | "right_foot", 181 | "neck", 182 | "left_collar", 183 | "right_collar", 184 | "head", 185 | "left_shoulder", 186 | "right_shoulder", 187 | "left_elbow", 188 | "right_elbow", 189 | "left_wrist", 190 | "right_wrist", 191 | "left_index1", 192 | "left_index2", 193 | "left_index3", 194 | "left_middle1", 195 | "left_middle2", 196 | "left_middle3", 197 | "left_pinky1", 198 | "left_pinky2", 199 | "left_pinky3", 200 | "left_ring1", 201 | "left_ring2", 202 | "left_ring3", 203 | "left_thumb1", 204 | "left_thumb2", 205 | "left_thumb3", 206 | "right_index1", 207 | "right_index2", 208 | "right_index3", 209 | "right_middle1", 210 | "right_middle2", 211 | "right_middle3", 212 | "right_pinky1", 213 | "right_pinky2", 214 | "right_pinky3", 215 | "right_ring1", 216 | "right_ring2", 217 | "right_ring3", 218 | "right_thumb1", 219 | "right_thumb2", 220 | "right_thumb3", 221 | "nose", 222 | "right_eye", 223 | "left_eye", 224 | "right_ear", 225 | "left_ear", 226 | "left_big_toe", 227 | "left_small_toe", 228 | "left_heel", 229 | "right_big_toe", 230 | "right_small_toe", 231 | "right_heel", 232 | "left_thumb", 233 | "left_index", 234 | "left_middle", 235 | "left_ring", 236 | "left_pinky", 237 | "right_thumb", 238 | "right_index", 239 | "right_middle", 240 | "right_ring", 241 | "right_pinky", 242 | ] 243 | 244 | SMPL_JOINT_NAMES = [ 245 | "pelvis", 246 | "left_hip", 247 | "right_hip", 248 | "spine1", 249 | "left_knee", 250 | "right_knee", 251 | "spine2", 252 | "left_ankle", 253 | "right_ankle", 254 | "spine3", 255 | "left_foot", 256 | "right_foot", 257 | "neck", 258 | "left_collar", 259 | "right_collar", 260 | "head", 261 | "left_shoulder", 262 | "right_shoulder", 263 | "left_elbow", 264 | "right_elbow", 265 | "left_wrist", 266 | "right_wrist", 267 | "left_hand", 268 | "right_hand", 269 | ] 270 | 271 | 272 | class Body: 273 | """ 274 | Class for storing a single body pose. 275 | """ 276 | 277 | def __init__(self, joints, joint_names): 278 | assert joints.ndim > 1 279 | assert joints.shape[0] == len(joint_names) 280 | self.joints = {} 281 | for i, j in enumerate(joint_names): 282 | self.joints[j] = joints[i] 283 | 284 | @staticmethod 285 | def from_smpl(joints): 286 | """ 287 | Create a Body object from SMPL joints. 288 | """ 289 | return Body(joints, SMPL_JOINT_NAMES) 290 | 291 | @staticmethod 292 | def from_smplh(joints): 293 | """ 294 | Create a Body object from SMPLH joints. 295 | """ 296 | return Body(joints, SMPLH_JOINT_NAMES) 297 | 298 | def _as(self, joint_names): 299 | """ 300 | Return a Body object with the specified joint names. 301 | """ 302 | joint_list = [] 303 | for j in joint_names: 304 | if j not in self.joints: 305 | joint_list.append(np.zeros_like(self.joints["spine1"])) 306 | else: 307 | joint_list.append(self.joints[j]) 308 | return np.stack(joint_list, axis=0) 309 | 310 | def as_smpl(self): 311 | """ 312 | Convert the body to SMPL joints. 313 | """ 314 | return self._as(SMPL_JOINT_NAMES) 315 | 316 | def as_smplh(self): 317 | """ 318 | Convert the body to SMPLH joints. 319 | """ 320 | return self._as(SMPLH_JOINT_NAMES) 321 | -------------------------------------------------------------------------------- /deform/smplx_exavatar/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from typing import NewType, Union, Optional 18 | from dataclasses import dataclass, asdict, fields 19 | import numpy as np 20 | import torch 21 | 22 | Tensor = NewType('Tensor', torch.Tensor) 23 | Array = NewType('Array', np.ndarray) 24 | 25 | 26 | @dataclass 27 | class ModelOutput: 28 | vertices: Optional[Tensor] = None 29 | joints: Optional[Tensor] = None 30 | full_pose: Optional[Tensor] = None 31 | global_orient: Optional[Tensor] = None 32 | transl: Optional[Tensor] = None 33 | v_shaped: Optional[Tensor] = None 34 | 35 | def __getitem__(self, key): 36 | return getattr(self, key) 37 | 38 | def get(self, key, default=None): 39 | return getattr(self, key, default) 40 | 41 | def __iter__(self): 42 | return self.keys() 43 | 44 | def keys(self): 45 | keys = [t.name for t in fields(self)] 46 | return iter(keys) 47 | 48 | def values(self): 49 | values = [getattr(self, t.name) for t in fields(self)] 50 | return iter(values) 51 | 52 | def items(self): 53 | data = [(t.name, getattr(self, t.name)) for t in fields(self)] 54 | return iter(data) 55 | 56 | 57 | @dataclass 58 | class SMPLOutput(ModelOutput): 59 | betas: Optional[Tensor] = None 60 | body_pose: Optional[Tensor] = None 61 | 62 | 63 | @dataclass 64 | class SMPLHOutput(SMPLOutput): 65 | left_hand_pose: Optional[Tensor] = None 66 | right_hand_pose: Optional[Tensor] = None 67 | transl: Optional[Tensor] = None 68 | 69 | 70 | @dataclass 71 | class SMPLXOutput(SMPLHOutput): 72 | expression: Optional[Tensor] = None 73 | jaw_pose: Optional[Tensor] = None 74 | 75 | 76 | @dataclass 77 | class MANOOutput(ModelOutput): 78 | betas: Optional[Tensor] = None 79 | hand_pose: Optional[Tensor] = None 80 | 81 | 82 | @dataclass 83 | class FLAMEOutput(ModelOutput): 84 | betas: Optional[Tensor] = None 85 | expression: Optional[Tensor] = None 86 | jaw_pose: Optional[Tensor] = None 87 | neck_pose: Optional[Tensor] = None 88 | 89 | 90 | def find_joint_kin_chain(joint_id, kinematic_tree): 91 | kin_chain = [] 92 | curr_idx = joint_id 93 | while curr_idx != -1: 94 | kin_chain.append(curr_idx) 95 | curr_idx = kinematic_tree[curr_idx] 96 | return kin_chain 97 | 98 | 99 | def to_tensor( 100 | array: Union[Array, Tensor], dtype=torch.float32 101 | ) -> Tensor: 102 | if torch.is_tensor(array): 103 | return array 104 | else: 105 | return torch.tensor(array, dtype=dtype) 106 | 107 | 108 | class Struct(object): 109 | def __init__(self, **kwargs): 110 | for key, val in kwargs.items(): 111 | setattr(self, key, val) 112 | 113 | 114 | def to_np(array, dtype=np.float32): 115 | if 'scipy.sparse' in str(type(array)): 116 | array = array.todense() 117 | return np.array(array, dtype=dtype) 118 | 119 | 120 | def rot_mat_to_euler(rot_mats): 121 | # Calculates rotation matrix to euler angles 122 | # Careful for extreme cases of eular angles like [0.0, pi, 0.0] 123 | 124 | sy = torch.sqrt(rot_mats[:, 0, 0] * rot_mats[:, 0, 0] + 125 | rot_mats[:, 1, 0] * rot_mats[:, 1, 0]) 126 | return torch.atan2(-rot_mats[:, 2, 0], sy) 127 | -------------------------------------------------------------------------------- /deform/smplx_exavatar/vertex_ids.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from __future__ import print_function 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | 21 | # Joint name to vertex mapping. SMPL/SMPL-H/SMPL-X vertices that correspond to 22 | # MSCOCO and OpenPose joints 23 | vertex_ids = { 24 | 'smplh': { 25 | 'nose': 332, 26 | 'reye': 6260, 27 | 'leye': 2800, 28 | 'rear': 4071, 29 | 'lear': 583, 30 | 'rthumb': 6191, 31 | 'rindex': 5782, 32 | 'rmiddle': 5905, 33 | 'rring': 6016, 34 | 'rpinky': 6133, 35 | 'lthumb': 2746, 36 | 'lindex': 2319, 37 | 'lmiddle': 2445, 38 | 'lring': 2556, 39 | 'lpinky': 2673, 40 | 'LBigToe': 3216, 41 | 'LSmallToe': 3226, 42 | 'LHeel': 3387, 43 | 'RBigToe': 6617, 44 | 'RSmallToe': 6624, 45 | 'RHeel': 6787 46 | }, 47 | 'smplx': { 48 | 'nose': 9120, 49 | 'reye': 9929, 50 | 'leye': 9448, 51 | 'rear': 616, 52 | 'lear': 6, 53 | 'rthumb': 8079, 54 | 'rindex': 7669, 55 | 'rmiddle': 7794, 56 | 'rring': 7905, 57 | 'rpinky': 8022, 58 | 'lthumb': 5361, 59 | 'lindex': 4933, 60 | 'lmiddle': 5058, 61 | 'lring': 5169, 62 | 'lpinky': 5286, 63 | 'LBigToe': 5770, 64 | 'LSmallToe': 5780, 65 | 'LHeel': 8846, 66 | 'RBigToe': 8463, 67 | 'RSmallToe': 8474, 68 | 'RHeel': 8635 69 | }, 70 | 'mano': { 71 | 'thumb': 744, 72 | 'index': 320, 73 | 'middle': 443, 74 | 'ring': 554, 75 | 'pinky': 671, 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /deform/smplx_exavatar/vertex_joint_selector.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems. All rights reserved. 14 | # 15 | # Contact: ps-license@tuebingen.mpg.de 16 | 17 | from __future__ import absolute_import 18 | from __future__ import print_function 19 | from __future__ import division 20 | 21 | import numpy as np 22 | 23 | import torch 24 | import torch.nn as nn 25 | 26 | from .utils import to_tensor 27 | 28 | 29 | class VertexJointSelector(nn.Module): 30 | 31 | def __init__(self, vertex_ids=None, 32 | use_hands=True, 33 | use_feet_keypoints=True, **kwargs): 34 | super(VertexJointSelector, self).__init__() 35 | 36 | extra_joints_idxs = [] 37 | 38 | face_keyp_idxs = np.array([ 39 | vertex_ids['nose'], 40 | vertex_ids['reye'], 41 | vertex_ids['leye'], 42 | vertex_ids['rear'], 43 | vertex_ids['lear']], dtype=np.int64) 44 | 45 | extra_joints_idxs = np.concatenate([extra_joints_idxs, 46 | face_keyp_idxs]) 47 | 48 | if use_feet_keypoints: 49 | feet_keyp_idxs = np.array([vertex_ids['LBigToe'], 50 | vertex_ids['LSmallToe'], 51 | vertex_ids['LHeel'], 52 | vertex_ids['RBigToe'], 53 | vertex_ids['RSmallToe'], 54 | vertex_ids['RHeel']], dtype=np.int32) 55 | 56 | extra_joints_idxs = np.concatenate( 57 | [extra_joints_idxs, feet_keyp_idxs]) 58 | 59 | if use_hands: 60 | self.tip_names = ['thumb', 'index', 'middle', 'ring', 'pinky'] 61 | 62 | tips_idxs = [] 63 | for hand_id in ['l', 'r']: 64 | for tip_name in self.tip_names: 65 | tips_idxs.append(vertex_ids[hand_id + tip_name]) 66 | 67 | extra_joints_idxs = np.concatenate( 68 | [extra_joints_idxs, tips_idxs]) 69 | 70 | self.register_buffer('extra_joints_idxs', 71 | to_tensor(extra_joints_idxs, dtype=torch.long)) 72 | 73 | def forward(self, vertices, joints): 74 | extra_joints = torch.index_select(vertices, 1, self.extra_joints_idxs.to(torch.long)) #The '.to(torch.long)'. 75 | # added to make the trace work in c++, 76 | # otherwise you get a runtime error in c++: 77 | # 'index_select(): Expected dtype int32 or int64 for index' 78 | joints = torch.cat([joints, extra_joints], dim=1) 79 | 80 | return joints 81 | -------------------------------------------------------------------------------- /denoiser/__pycache__/denoiser.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/denoiser/__pycache__/denoiser.cpython-38.pyc -------------------------------------------------------------------------------- /denoiser/denoiser.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import numpy as np 5 | import math 6 | 7 | from render import util 8 | if "TWOSIDED_TEXTURE" not in os.environ or os.environ["TWOSIDED_TEXTURE"] == "True": 9 | from render import optixutils as ou 10 | else: 11 | from render import optixutils_single_sided as ou 12 | 13 | 14 | ############################################################################### 15 | # Bilateral denoiser 16 | # 17 | # Loosely based on SVGF, but removing temporal components and variance stopping guides. 18 | # https://research.nvidia.com/publication/2017-07_spatiotemporal-variance-guided-filtering-real-time-reconstruction-path-traced 19 | ############################################################################### 20 | 21 | class BilateralDenoiser(torch.nn.Module): 22 | def __init__(self, influence=1.0): 23 | super(BilateralDenoiser, self).__init__() 24 | self.set_influence(influence) 25 | 26 | def set_influence(self, factor): 27 | self.sigma = max(factor * 2, 0.0001) 28 | self.variance = self.sigma**2. 29 | self.N = 2 * math.ceil(self.sigma * 2.5) + 1 30 | 31 | def forward(self, input): 32 | col = input[..., 0:3] 33 | nrm = util.safe_normalize(input[..., 3:6]) # Bent normals can produce normals of length < 1 here 34 | zdz = input[..., 6:8] 35 | return ou.bilateral_denoiser(col, nrm, zdz, self.sigma) 36 | -------------------------------------------------------------------------------- /figs/pipe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/figs/pipe.jpg -------------------------------------------------------------------------------- /geometry/__pycache__/embedding.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/embedding.cpython-38.pyc -------------------------------------------------------------------------------- /geometry/__pycache__/gshell_tets.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/gshell_tets.cpython-38.pyc -------------------------------------------------------------------------------- /geometry/__pycache__/hmsdf.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/hmsdf.cpython-38.pyc -------------------------------------------------------------------------------- /geometry/__pycache__/hmsdf_tets_split.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/hmsdf_tets_split.cpython-38.pyc -------------------------------------------------------------------------------- /geometry/__pycache__/mlp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/mlp.cpython-38.pyc -------------------------------------------------------------------------------- /geometry/embedding.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class Embedding(nn.Module): 5 | def __init__(self, in_channels, N_freqs, logscale=True): 6 | """ 7 | Defines a function that embeds x to (x, sin(2^k x), cos(2^k x), ...) 8 | in_channels: number of input channels (3 for both xyz and direction) 9 | """ 10 | super(Embedding, self).__init__() 11 | self.N_freqs = N_freqs 12 | self.in_channels = in_channels 13 | self.funcs = [torch.sin, torch.cos] 14 | self.out_channels = in_channels*(len(self.funcs)*N_freqs+1) 15 | 16 | if logscale: 17 | self.freq_bands = 2**torch.linspace(0, N_freqs-1, N_freqs) 18 | else: 19 | self.freq_bands = torch.linspace(1, 2**(N_freqs-1), N_freqs) 20 | 21 | def forward(self, x): 22 | """ 23 | Embeds x to (x, sin(2^k x), cos(2^k x), ...) 24 | Different from the paper, "x" is also in the output 25 | See https://github.com/bmild/nerf/issues/12 26 | 27 | Inputs: 28 | x: (B, self.in_channels) 29 | 30 | Outputs: 31 | out: (B, self.out_channels) 32 | """ 33 | out = [x] 34 | for freq in self.freq_bands: 35 | for func in self.funcs: 36 | out += [func(freq*x)] 37 | 38 | return torch.cat(out, -1) 39 | 40 | -------------------------------------------------------------------------------- /lap_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def compute_body_laplacian(self): 5 | edges_packed = self._body_edges.clone().detach() 6 | body_verts = self.get_xyz[:self.body_verts_num] 7 | V = body_verts.shape[0] 8 | 9 | e0, e1 = edges_packed.unbind(1) 10 | 11 | idx01 = torch.stack([e0, e1], dim=1) # (sum(E_n), 2) 12 | idx10 = torch.stack([e1, e0], dim=1) # (sum(E_n), 2) 13 | idx = torch.cat([idx01, idx10], dim=0).t() # (2, 2*sum(E_n)) 14 | 15 | # First, we construct the adjacency matrix, 16 | # i.e. A[i, j] = 1 if (i,j) is an edge, or 17 | # A[e0, e1] = 1 & A[e1, e0] = 1 18 | ones = torch.ones(idx.shape[1], dtype=torch.float32, device=self._xyz.device) 19 | A = torch.sparse.FloatTensor(idx, ones, (V, V)) 20 | 21 | # the sum of i-th row of A gives the degree of the i-th vertex 22 | deg = torch.sparse.sum(A, dim=1).to_dense() 23 | 24 | # We construct the Laplacian matrix by adding the non diagonal values 25 | # i.e. L[i, j] = 1 ./ deg(i) if (i, j) is an edge 26 | deg0 = deg[e0] 27 | deg0 = torch.where(deg0 > 0.0, 1.0 / deg0, deg0) 28 | deg1 = deg[e1] 29 | deg1 = torch.where(deg1 > 0.0, 1.0 / deg1, deg1) 30 | val = torch.cat([deg0, deg1]) 31 | L = torch.sparse.FloatTensor(idx, val, (V, V)) 32 | 33 | # Then we add the diagonal values L[i, i] = -1. 34 | idx = torch.arange(V, device=self._xyz.device) 35 | idx = torch.stack([idx, idx], dim=0) 36 | ones = torch.ones(idx.shape[1], dtype=torch.float32, device=self._xyz.device) 37 | L -= torch.sparse.FloatTensor(idx, ones, (V, V)) 38 | self.body_laplacian = L 39 | 40 | def body_laplacian_loss(mesh): 41 | 42 | L = mesh.laplacian 43 | V = mesh.v_pos 44 | 45 | loss = L.mm(V) 46 | loss = loss.norm(dim=1)**2 47 | return loss.mean() 48 | 49 | 50 | def body_normal_loss(mesh): 51 | 52 | 53 | # loss = 1 - torch.cosine_similarity(mesh.face_normals[mesh.connected_faces[:, 0]], mesh.face_normals[mesh.connected_faces[:, 1]], dim=1) 54 | 55 | return mesh.normal_consistency() 56 | 57 | def find_edges(indices, remove_duplicates=True): 58 | # Extract the three edges (in terms of vertex indices) for each face 59 | # edges_0 = [f0_e0, ..., fN_e0] 60 | # edges_1 = [f0_e1, ..., fN_e1] 61 | # edges_2 = [f0_e2, ..., fN_e2] 62 | edges_0 = torch.index_select(indices, 1, torch.tensor([0,1], device=indices.device)) 63 | edges_1 = torch.index_select(indices, 1, torch.tensor([1,2], device=indices.device)) 64 | edges_2 = torch.index_select(indices, 1, torch.tensor([2,0], device=indices.device)) 65 | 66 | # Merge the into one tensor so that the three edges of one face appear sequentially 67 | # edges = [f0_e0, f0_e1, f0_e2, ..., fN_e0, fN_e1, fN_e2] 68 | edges = torch.cat([edges_0, edges_1, edges_2], dim=1).view(indices.shape[0] * 3, -1) 69 | 70 | if remove_duplicates: 71 | edges, _ = torch.sort(edges, dim=1) 72 | edges = torch.unique(edges, dim=0) 73 | 74 | return edges 75 | 76 | def find_connected_faces(indices): 77 | edges = find_edges(indices, remove_duplicates=False) 78 | 79 | # Make sure that two edges that share the same vertices have the vertex ids appear in the same order 80 | edges, _ = torch.sort(edges, dim=1) 81 | 82 | # Now find edges that share the same vertices and make sure there are only manifold edges 83 | _, inverse_indices, counts = torch.unique(edges, dim=0, sorted=False, return_inverse=True, return_counts=True) 84 | 85 | # print("counts.max():", counts.max()) 86 | assert counts.max() == 2 87 | 88 | # We now create a tensor that contains corresponding faces. 89 | # If the faces with ids fi and fj share the same edge, the tensor contains them as 90 | # [..., [fi, fj], ...] 91 | face_ids = torch.arange(indices.shape[0]) 92 | face_ids = torch.repeat_interleave(face_ids, 3, dim=0) # Tensor with the face id for each edge 93 | 94 | face_correspondences = torch.zeros((counts.shape[0], 2), dtype=torch.int64) 95 | face_correspondences_indices = torch.zeros(counts.shape[0], dtype=torch.int64) 96 | 97 | # ei = edge index 98 | for ei, ei_unique in enumerate(list(inverse_indices.cpu().numpy())): 99 | face_correspondences[ei_unique, face_correspondences_indices[ei_unique]] = face_ids[ei] 100 | face_correspondences_indices[ei_unique] += 1 101 | 102 | face_correspondences = face_correspondences.cuda() 103 | 104 | return face_correspondences[counts == 2].to(device=indices.device), edges 105 | -------------------------------------------------------------------------------- /render/__pycache__/light.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/light.cpython-38.pyc -------------------------------------------------------------------------------- /render/__pycache__/material.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/material.cpython-38.pyc -------------------------------------------------------------------------------- /render/__pycache__/mesh.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/mesh.cpython-38.pyc -------------------------------------------------------------------------------- /render/__pycache__/mlptexture.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/mlptexture.cpython-38.pyc -------------------------------------------------------------------------------- /render/__pycache__/obj.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/obj.cpython-38.pyc -------------------------------------------------------------------------------- /render/__pycache__/regularizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/regularizer.cpython-38.pyc -------------------------------------------------------------------------------- /render/__pycache__/render.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/render.cpython-38.pyc -------------------------------------------------------------------------------- /render/__pycache__/render_mask.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/render_mask.cpython-38.pyc -------------------------------------------------------------------------------- /render/__pycache__/texture.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/texture.cpython-38.pyc -------------------------------------------------------------------------------- /render/__pycache__/util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/util.cpython-38.pyc -------------------------------------------------------------------------------- /render/light.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import numpy as np 11 | import torch 12 | import nvdiffrast.torch as dr 13 | 14 | from . import util 15 | from . import renderutils as ru 16 | 17 | ###################################################################################### 18 | # Monte-carlo sampled environment light with PDF / CDF computation 19 | ###################################################################################### 20 | 21 | class EnvironmentLight: 22 | LIGHT_MIN_RES = 16 23 | 24 | MIN_ROUGHNESS = 0.08 25 | MAX_ROUGHNESS = 0.5 26 | 27 | def __init__(self, base): 28 | self.mtx = None 29 | self.base = base 30 | 31 | self.pdf_scale = (self.base.shape[0] * self.base.shape[1]) / (2 * np.pi * np.pi) 32 | self.update_pdf() 33 | 34 | def xfm(self, mtx): 35 | self.mtx = mtx 36 | 37 | def parameters(self): 38 | return [self.base] 39 | 40 | def clone(self): 41 | return EnvironmentLight(self.base.clone().detach()) 42 | 43 | def clamp_(self, min=None, max=None): 44 | self.base.clamp_(min, max) 45 | 46 | def update_pdf(self): 47 | with torch.no_grad(): 48 | # Compute PDF 49 | Y = util.pixel_grid(self.base.shape[1], self.base.shape[0])[..., 1] 50 | self._pdf = torch.max(self.base, dim=-1)[0] * torch.sin(Y * np.pi) # Scale by sin(theta) for lat-long, https://cs184.eecs.berkeley.edu/sp18/article/25 51 | self._pdf = self._pdf / torch.sum(self._pdf) 52 | 53 | # Compute cumulative sums over the columns and rows 54 | self.cols = torch.cumsum(self._pdf, dim=1) 55 | self.rows = torch.cumsum(self.cols[:, -1:].repeat([1, self.cols.shape[1]]), dim=0) 56 | 57 | # Normalize 58 | self.cols = self.cols / torch.where(self.cols[:, -1:] > 0, self.cols[:, -1:], torch.ones_like(self.cols)) 59 | self.rows = self.rows / torch.where(self.rows[-1:, :] > 0, self.rows[-1:, :], torch.ones_like(self.rows)) 60 | 61 | @torch.no_grad() 62 | def generate_image(self, res): 63 | texcoord = util.pixel_grid(res[1], res[0]) 64 | return dr.texture(self.base[None, ...].contiguous(), texcoord[None, ...].contiguous(), filter_mode='linear')[0] 65 | 66 | ###################################################################################### 67 | # Load and store 68 | ###################################################################################### 69 | 70 | @torch.no_grad() 71 | def _load_env_hdr(fn, scale=1.0, res=None, trainable=False): 72 | latlong_img = torch.tensor(util.load_image(fn), dtype=torch.float32, device='cuda')*scale 73 | 74 | if res is not None: 75 | texcoord = util.pixel_grid(res[1], res[0]) 76 | latlong_img = torch.clamp(dr.texture(latlong_img[None, ...], texcoord[None, ...], filter_mode='linear')[0], min=0.0001) 77 | 78 | print("EnvProbe,", latlong_img.shape, ", min/max", torch.min(latlong_img).item(), torch.max(latlong_img).item()) 79 | if trainable: 80 | print("trainable light loaded") 81 | return EnvironmentLight(base=latlong_img.clone().detach().requires_grad_(True)) 82 | else: 83 | return EnvironmentLight(base=latlong_img) 84 | 85 | @torch.no_grad() 86 | def load_env(fn, scale=1.0, res=None, trainable=False): 87 | if os.path.splitext(fn)[1].lower() == ".hdr": 88 | return _load_env_hdr(fn, scale, res, trainable=trainable) 89 | else: 90 | assert False, "Unknown envlight extension %s" % os.path.splitext(fn)[1] 91 | 92 | @torch.no_grad() 93 | def save_env_map(fn, light): 94 | assert isinstance(light, EnvironmentLight) 95 | color = light.generate_image([512, 1024]) 96 | util.save_image_raw(fn, color.detach().cpu().numpy()) 97 | 98 | ###################################################################################### 99 | # Create trainable with random initialization 100 | ###################################################################################### 101 | 102 | def create_trainable_env_rnd(base_res, scale=0.5, bias=0.25): 103 | base = torch.rand(base_res, base_res, 3, dtype=torch.float32, device='cuda') * scale + bias 104 | l = EnvironmentLight(base.clone().detach().requires_grad_(True)) 105 | return l 106 | -------------------------------------------------------------------------------- /render/material.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import numpy as np 11 | import torch 12 | 13 | from . import util 14 | from . import texture 15 | from . import mlptexture 16 | 17 | ###################################################################################### 18 | # .mtl material format loading / storing 19 | ###################################################################################### 20 | 21 | def load_mtl(fn, clear_ks=True): 22 | import re 23 | mtl_path = os.path.dirname(fn) 24 | 25 | # Read file 26 | with open(fn, 'r') as f: 27 | lines = f.readlines() 28 | 29 | # Parse materials 30 | materials = [] 31 | for line in lines: 32 | split_line = re.split(' +|\t+|\n+', line.strip()) 33 | prefix = split_line[0].lower() 34 | data = split_line[1:] 35 | if 'newmtl' in prefix: 36 | material = {'name' : data[0]} 37 | materials += [material] 38 | elif materials: 39 | if 'bsdf' in prefix or 'map_kd' in prefix or 'map_ks' in prefix or 'bump' in prefix: 40 | material[prefix] = data[0] 41 | else: 42 | material[prefix] = torch.tensor(tuple(float(d) for d in data), dtype=torch.float32, device='cuda') 43 | 44 | # Convert everything to textures. Our code expects 'kd' and 'ks' to be texture maps. So replace constants with 1x1 maps 45 | for mat in materials: 46 | if not 'bsdf' in mat: 47 | mat['bsdf'] = 'pbr' 48 | 49 | if 'map_kd' in mat: 50 | mat['kd'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_kd'])) 51 | else: 52 | mat['kd'] = texture.Texture2D(mat['kd']) 53 | 54 | if 'map_ks' in mat: 55 | mat['ks'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_ks']), channels=3) 56 | else: 57 | mat['ks'] = texture.Texture2D(mat['ks']) 58 | 59 | if 'bump' in mat: 60 | mat['normal'] = texture.load_texture2D(os.path.join(mtl_path, mat['bump']), lambda_fn=lambda x: x * 2 - 1, channels=3) 61 | 62 | # Convert Kd from sRGB to linear RGB 63 | mat['kd'] = texture.srgb_to_rgb(mat['kd']) 64 | 65 | if clear_ks: 66 | # Override ORM occlusion (red) channel by zeros. We hijack this channel 67 | for mip in mat['ks'].getMips(): 68 | mip[..., 0] = 0.0 69 | 70 | return materials 71 | 72 | def save_mtl(fn, material): 73 | folder = os.path.dirname(fn) 74 | with open(fn, "w") as f: 75 | f.write('newmtl defaultMat\n') 76 | if material is not None: 77 | f.write('bsdf %s\n' % material['bsdf']) 78 | if 'kd' in material.keys(): 79 | f.write('map_Kd texture_kd.png\n') 80 | texture.save_texture2D(os.path.join(folder, 'texture_kd.png'), texture.rgb_to_srgb(material['kd'])) 81 | if 'ks' in material.keys(): 82 | f.write('map_Ks texture_ks.png\n') 83 | texture.save_texture2D(os.path.join(folder, 'texture_ks.png'), material['ks']) 84 | if 'normal' in material.keys(): 85 | f.write('bump texture_n.png\n') 86 | texture.save_texture2D(os.path.join(folder, 'texture_n.png'), material['normal'], lambda_fn=lambda x:(util.safe_normalize(x)+1)*0.5) 87 | else: 88 | f.write('Kd 1 1 1\n') 89 | f.write('Ks 0 0 0\n') 90 | f.write('Ka 0 0 0\n') 91 | f.write('Tf 1 1 1\n') 92 | f.write('Ni 1\n') 93 | f.write('Ns 0\n') 94 | 95 | ###################################################################################### 96 | # Utility function to convert an existing material and make all textures trainable 97 | ###################################################################################### 98 | 99 | def create_trainable(material): 100 | result = material.copy() 101 | for key, val in result.items(): 102 | if isinstance(val, texture.Texture2D): 103 | result[key] = texture.create_trainable(val) 104 | return result 105 | 106 | def get_parameters(material): 107 | trainable = [] 108 | for key, val in material.items(): 109 | if isinstance(val, texture.Texture2D) or isinstance(val, mlptexture.MLPTexture3D): 110 | trainable += val.parameters() 111 | return trainable 112 | 113 | ###################################################################################### 114 | # Merge multiple materials into a single uber-material 115 | ###################################################################################### 116 | 117 | def _upscale_replicate(x, full_res): 118 | x = x.permute(0, 3, 1, 2) 119 | x = torch.nn.functional.pad(x, (0, full_res[1] - x.shape[3], 0, full_res[0] - x.shape[2]), 'replicate') 120 | return x.permute(0, 2, 3, 1).contiguous() 121 | 122 | def merge_materials(materials, texcoords, tfaces, mfaces): 123 | assert len(materials) > 0 124 | for mat in materials: 125 | assert mat['bsdf'] == materials[0]['bsdf'], "All materials must have the same BSDF (uber shader)" 126 | assert ('normal' in mat) is ('normal' in materials[0]), "All materials must have either normal map enabled or disabled" 127 | 128 | uber_material = { 129 | 'name' : 'uber_material', 130 | 'bsdf' : materials[0]['bsdf'], 131 | } 132 | 133 | textures = ['kd', 'ks', 'normal'] 134 | 135 | # Find maximum texture resolution across all materials and textures 136 | max_res = None 137 | for mat in materials: 138 | for tex in textures: 139 | tex_res = np.array(mat[tex].getRes()) if tex in mat else np.array([1, 1]) 140 | max_res = np.maximum(max_res, tex_res) if max_res is not None else tex_res 141 | 142 | # Compute size of compund texture and round up to nearest PoT 143 | full_res = 2**np.ceil(np.log2(max_res * np.array([1, len(materials)]))).astype(np.int) 144 | 145 | # Normalize texture resolution across all materials & combine into a single large texture 146 | for tex in textures: 147 | if tex in materials[0]: 148 | tex_data = torch.cat(tuple(util.scale_img_nhwc(mat[tex].data, tuple(max_res)) for mat in materials), dim=2) # Lay out all textures horizontally, NHWC so dim2 is x 149 | tex_data = _upscale_replicate(tex_data, full_res) 150 | uber_material[tex] = texture.Texture2D(tex_data) 151 | 152 | # Compute scaling values for used / unused texture area 153 | s_coeff = [full_res[0] / max_res[0], full_res[1] / max_res[1]] 154 | 155 | # Recompute texture coordinates to cooincide with new composite texture 156 | new_tverts = {} 157 | new_tverts_data = [] 158 | for fi in range(len(tfaces)): 159 | matIdx = mfaces[fi] 160 | for vi in range(3): 161 | ti = tfaces[fi][vi] 162 | if not (ti in new_tverts): 163 | new_tverts[ti] = {} 164 | if not (matIdx in new_tverts[ti]): # create new vertex 165 | new_tverts_data.append([(matIdx + texcoords[ti][0]) / s_coeff[1], texcoords[ti][1] / s_coeff[0]]) # Offset texture coodrinate (x direction) by material id & scale to local space. Note, texcoords are (u,v) but texture is stored (w,h) so the indexes swap here 166 | new_tverts[ti][matIdx] = len(new_tverts_data) - 1 167 | tfaces[fi][vi] = new_tverts[ti][matIdx] # reindex vertex 168 | 169 | return uber_material, new_tverts_data, tfaces 170 | -------------------------------------------------------------------------------- /render/mlptexture.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | import tinycudann as tcnn 12 | import numpy as np 13 | 14 | ####################################################################################################################################################### 15 | # Small MLP using PyTorch primitives, internal helper class 16 | ####################################################################################################################################################### 17 | 18 | class _MLP(torch.nn.Module): 19 | def __init__(self, cfg, loss_scale=1.0): 20 | super(_MLP, self).__init__() 21 | self.loss_scale = loss_scale 22 | net = (torch.nn.Linear(cfg['n_input_dims'], cfg['n_neurons'], bias=False), torch.nn.ReLU()) 23 | for i in range(cfg['n_hidden_layers']-1): 24 | net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_neurons'], bias=False), torch.nn.ReLU()) 25 | net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_output_dims'], bias=False),) 26 | self.net = torch.nn.Sequential(*net).cuda() 27 | 28 | self.net.apply(self._init_weights) 29 | 30 | if self.loss_scale != 1.0: 31 | self.net.register_full_backward_hook(lambda module, grad_i, grad_o: (grad_i[0] * self.loss_scale, )) 32 | 33 | def forward(self, x): 34 | return self.net(x.to(torch.float32)) 35 | 36 | @staticmethod 37 | def _init_weights(m): 38 | if type(m) == torch.nn.Linear: 39 | torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu') 40 | if hasattr(m.bias, 'data'): 41 | m.bias.data.fill_(0.0) 42 | 43 | ############################################ 44 | 45 | 46 | 47 | ####################################################################################################################################################### 48 | # Outward visible MLP class 49 | ####################################################################################################################################################### 50 | 51 | class MLPTexture3D(torch.nn.Module): 52 | def __init__(self, AABB, channels = 3, internal_dims = 32, hidden = 2, min_max = None, use_float16=False): 53 | super(MLPTexture3D, self).__init__() 54 | 55 | self.channels = channels 56 | self.internal_dims = internal_dims 57 | self.AABB = AABB 58 | self.min_max = min_max 59 | self.use_float16 = use_float16 60 | 61 | # Setup positional encoding, see https://github.com/NVlabs/tiny-cuda-nn for details 62 | desired_resolution = 4096 63 | base_grid_resolution = 16 64 | num_levels = 16 65 | per_level_scale = np.exp(np.log(desired_resolution / base_grid_resolution) / (num_levels-1)) 66 | 67 | 68 | enc_cfg = { 69 | "otype": "HashGrid", 70 | "n_levels": 5, # 16 71 | "n_features_per_level": 2, #2 72 | "log2_hashmap_size": 21, # 21 73 | "base_resolution": base_grid_resolution, # 16 74 | "per_level_scale" : per_level_scale # 1.4472692374403782 75 | } 76 | 77 | 78 | gradient_scaling = 128.0 79 | self.encoder = tcnn.Encoding(3, enc_cfg) 80 | 81 | # Setup MLP 82 | mlp_cfg = { 83 | "n_input_dims" : self.encoder.n_output_dims, 84 | "n_output_dims" : self.channels, 85 | "n_hidden_layers" : hidden, 86 | "n_neurons" : self.internal_dims 87 | } 88 | self.net = _MLP(mlp_cfg, gradient_scaling) 89 | print("Encoder output: %d dims" % (self.encoder.n_output_dims)) 90 | 91 | def sample(self, texc, frame_id): 92 | 93 | ################################### 94 | bbox = torch.tensor([0.6, 0.6, 0.2]).cuda(), torch.tensor([-0.8, -1.2, -0.2]).cuda() 95 | _texc = (texc.view(-1, 3) - bbox[0][None, ...]) / (bbox[1][None, ...] - bbox[0][None, ...]) 96 | _texc = torch.clamp(_texc, min=0, max=1) 97 | 98 | p_enc = self.encoder(_texc.contiguous()) 99 | 100 | with torch.autocast('cuda', dtype=torch.float16, enabled=self.use_float16): 101 | out = self.net.forward(p_enc) 102 | 103 | # Sigmoid limit and scale to the allowed range 104 | out = torch.sigmoid(out) * (self.min_max[1][None, :] - self.min_max[0][None, :]) + self.min_max[0][None, :] 105 | 106 | 107 | return out.view(*texc.shape[:-1], self.channels) # Remap to [n, h, w, c] 108 | 109 | 110 | # In-place clamp with no derivative to make sure values are in valid range after training 111 | def clamp_(self): 112 | pass 113 | 114 | def cleanup(self): 115 | tcnn.free_temporary_memory() 116 | 117 | 118 | 119 | class MeshTexture3D(torch.nn.Module): 120 | def __init__(self, v): 121 | pass 122 | 123 | def __get_load_Texture3d(self, mesh, FLAGS): 124 | 125 | kd_min, kd_max = torch.tensor(FLAGS.kd_min, dtype=torch.float32, device='cuda'), torch.tensor(FLAGS.kd_max, dtype=torch.float32, device='cuda') 126 | ks_min, ks_max = torch.tensor(FLAGS.ks_min, dtype=torch.float32, device='cuda'), torch.tensor(FLAGS.ks_max, dtype=torch.float32, device='cuda') 127 | 128 | mlp_min = torch.cat((kd_min[0:3], ks_min), dim=0) 129 | mlp_max = torch.cat((kd_max[0:3], ks_max), dim=0) 130 | 131 | self.mlp_material = MLPTexture3D(mesh.getAABB(), channels=6, min_max=[mlp_min, mlp_max], use_float16=FLAGS.use_float16) 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /render/optixutils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from .ops import OptiXContext, optix_build_bvh, optix_env_shade, bilateral_denoiser 10 | __all__ = ["OptiXContext", "optix_build_bvh", "optix_env_shade", 'bilateral_denoiser'] 11 | -------------------------------------------------------------------------------- /render/optixutils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /render/optixutils/__pycache__/ops.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/__pycache__/ops.cpython-38.pyc -------------------------------------------------------------------------------- /render/optixutils/build/build.ninja: -------------------------------------------------------------------------------- 1 | ninja_required_version = 1.3 2 | cxx = c++ 3 | nvcc = /usr/local/cuda/bin/nvcc 4 | 5 | cflags = -DTORCH_EXTENSION_NAME=optixutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/nas_data/chh/D3Human_main/render/optixutils/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -DNVDR_TORCH 6 | post_cflags = 7 | cuda_cflags = -DTORCH_EXTENSION_NAME=optixutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/nas_data/chh/D3Human_main/render/optixutils/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -DNVDR_TORCH -std=c++14 8 | cuda_post_cflags = 9 | cuda_dlink_post_cflags = 10 | ldflags = -shared -lcuda -lnvrtc -L/home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart 11 | 12 | rule compile 13 | command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags 14 | depfile = $out.d 15 | deps = gcc 16 | 17 | rule cuda_compile 18 | depfile = $out.d 19 | deps = gcc 20 | command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags 21 | 22 | 23 | 24 | rule link 25 | command = $cxx $in $ldflags -o $out 26 | 27 | build denoising.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/optixutils/c_src/denoising.cu 28 | build optix_wrapper.o: compile /nas_data/chh/D3Human_main/render/optixutils/c_src/optix_wrapper.cpp 29 | build torch_bindings.o: compile /nas_data/chh/D3Human_main/render/optixutils/c_src/torch_bindings.cpp 30 | 31 | 32 | 33 | build optixutils_plugin.so: link denoising.cuda.o optix_wrapper.o torch_bindings.o 34 | 35 | default optixutils_plugin.so 36 | 37 | -------------------------------------------------------------------------------- /render/optixutils/build/denoising.cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/denoising.cuda.o -------------------------------------------------------------------------------- /render/optixutils/build/optix_wrapper.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/optix_wrapper.o -------------------------------------------------------------------------------- /render/optixutils/build/optixutils_plugin.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/optixutils_plugin.so -------------------------------------------------------------------------------- /render/optixutils/build/torch_bindings.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/torch_bindings.o -------------------------------------------------------------------------------- /render/optixutils/c_src/common.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | // Helper functions to do broadcast guarded fetches 12 | #if defined(__CUDACC__) 13 | template 14 | static __device__ inline float3 fetch3(const T &tensor, U idx, Args... args) { 15 | return tensor.size(0) == 1 ? fetch3(tensor[0], args...) : fetch3(tensor[idx], args...); 16 | } 17 | template static __device__ inline float3 fetch3(const T &tensor) { 18 | return tensor.size(0) == 1 ? make_float3(tensor[0], tensor[0], tensor[0]) : make_float3(tensor[0], tensor[1], tensor[2]); 19 | } 20 | 21 | template 22 | static __device__ inline float2 fetch2(const T &tensor, U idx, Args... args) { 23 | return tensor.size(0) == 1 ? fetch2(tensor[0], args...) : fetch2(tensor[idx], args...); 24 | } 25 | template static __device__ inline float2 fetch2(const T &tensor) { 26 | return tensor.size(0) == 1 ? make_float2(tensor[0], tensor[0]) : make_float2(tensor[0], tensor[1]); 27 | } 28 | 29 | #include "math_utils.h" 30 | #include "bsdf.h" 31 | #endif 32 | 33 | //------------------------------------------------------------------------------ 34 | // CUDA error-checking macros 35 | //------------------------------------------------------------------------------ 36 | 37 | #define CUDA_CHECK( call ) \ 38 | do \ 39 | { \ 40 | cudaError_t error = call; \ 41 | if( error != cudaSuccess ) \ 42 | { \ 43 | std::stringstream ss; \ 44 | ss << "CUDA call (" << #call << " ) failed with error: '" \ 45 | << cudaGetErrorString( error ) \ 46 | << "' (" __FILE__ << ":" << __LINE__ << ")\n"; \ 47 | } \ 48 | } while( 0 ) 49 | 50 | 51 | #define OPTIX_CHECK( call ) \ 52 | do \ 53 | { \ 54 | OptixResult res = call; \ 55 | if( res != OPTIX_SUCCESS ) \ 56 | { \ 57 | std::stringstream ss; \ 58 | ss << "Optix call '" << #call << "' failed: " __FILE__ ":" \ 59 | << __LINE__ << ")\n"; \ 60 | } \ 61 | } while( 0 ) 62 | 63 | #define OPTIX_CHECK_LOG( call ) \ 64 | do \ 65 | { \ 66 | OptixResult res = call; \ 67 | const size_t sizeof_log_returned = sizeof_log; \ 68 | sizeof_log = sizeof( log ); /* reset sizeof_log for future calls */ \ 69 | if( res != OPTIX_SUCCESS ) \ 70 | { \ 71 | std::stringstream ss; \ 72 | ss << "Optix call '" << #call << "' failed: " __FILE__ ":" \ 73 | << __LINE__ << ")\nLog:\n" << log \ 74 | << ( sizeof_log_returned > sizeof( log ) ? "" : "" ) \ 75 | << "\n"; \ 76 | } \ 77 | } while( 0 ) 78 | 79 | #define NVRTC_CHECK_ERROR( func ) \ 80 | do \ 81 | { \ 82 | nvrtcResult code = func; \ 83 | if( code != NVRTC_SUCCESS ) \ 84 | throw std::runtime_error( "ERROR: " __FILE__ "(): " + std::string( nvrtcGetErrorString( code ) ) ); \ 85 | } while( 0 ) 86 | -------------------------------------------------------------------------------- /render/optixutils/c_src/denoising.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "common.h" 10 | #include "denoising.h" 11 | 12 | #define FLT_EPS 0.0001f 13 | 14 | __global__ void bilateral_denoiser_fwd_kernel(BilateralDenoiserParams params) 15 | { 16 | uint3 idx = make_uint3(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z); 17 | 18 | if (idx.z >= params.col.size(0) || idx.y >= params.col.size(1) || idx.x >= params.col.size(2)) 19 | return; 20 | 21 | // Fetch central tap 22 | float3 c_nrm = fetch3(params.nrm, idx.z, idx.y, idx.x); 23 | float2 c_zdz = fetch2(params.zdz, idx.z, idx.y, idx.x); 24 | 25 | float variance = params.sigma * params.sigma; 26 | int filter_rad = 2 * ceil(params.sigma * 2.5) + 1; 27 | 28 | float accum_w = 0.0f; 29 | float3 accum_col = make_float3(0.0f); 30 | for (int32_t fy = -filter_rad; fy <= filter_rad; ++fy) 31 | { 32 | for (int32_t fx = -filter_rad; fx <= filter_rad; ++fx) 33 | { 34 | // Compute tap coordinates, used for input activations and bilateral guides 35 | int32_t y = idx.y + fy; 36 | int32_t x = idx.x + fx; 37 | 38 | if (y < 0 || x < 0 || y >= params.col.size(1) || x >= params.col.size(2)) 39 | continue; 40 | 41 | // Fetch current tap 42 | float3 t_col = fetch3(params.col, idx.z, y, x); 43 | float3 t_nrm = fetch3(params.nrm, idx.z, y, x); 44 | float2 t_zdz = fetch2(params.zdz, idx.z, y, x); 45 | 46 | ///////////////////////////////////////////////////////// 47 | // Compute bilateral weight 48 | ///////////////////////////////////////////////////////// 49 | 50 | // Distance 51 | float dist_sqr = fx * fx + fy * fy; 52 | float dist = sqrtf(dist_sqr); 53 | float w_xy = expf(-dist_sqr / (2.0f * variance)); 54 | 55 | // Normal 56 | float w_normal = powf(min(max(dot(t_nrm, c_nrm), FLT_EPS), 1.0f), 128.0f); 57 | 58 | // Depth 59 | float w_depth = expf(-(abs(t_zdz.x - c_zdz.x) / max(c_zdz.y * dist, FLT_EPS))); 60 | 61 | float w = w_xy * w_normal * w_depth; 62 | 63 | accum_col = accum_col + t_col * w; 64 | accum_w += w; 65 | } 66 | } 67 | 68 | params.out[idx.z][idx.y][idx.x][0] = accum_col.x; 69 | params.out[idx.z][idx.y][idx.x][1] = accum_col.y; 70 | params.out[idx.z][idx.y][idx.x][2] = accum_col.z; 71 | params.out[idx.z][idx.y][idx.x][3] = max(accum_w, 0.0001f); 72 | } 73 | 74 | __global__ void bilateral_denoiser_bwd_kernel(BilateralDenoiserParams params) 75 | { 76 | uint3 idx = make_uint3(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z); 77 | 78 | if (idx.z >= params.col.size(0) || idx.y >= params.col.size(1) || idx.x >= params.col.size(2)) 79 | return; 80 | 81 | // Fetch central tap 82 | float3 c_nrm = fetch3(params.nrm, idx.z, idx.y, idx.x); 83 | float2 c_zdz = fetch2(params.zdz, idx.z, idx.y, idx.x); 84 | 85 | float variance = params.sigma * params.sigma; 86 | int filter_rad = 2 * ceil(params.sigma * 2.5) + 1; 87 | 88 | float3 accum_grad = make_float3(0.0f); 89 | for (int32_t fy = -filter_rad; fy <= filter_rad; ++fy) 90 | { 91 | for (int32_t fx = -filter_rad; fx <= filter_rad; ++fx) 92 | { 93 | // Compute tap coordinates, used for input activations and bilateral guides 94 | int32_t y = idx.y + fy; 95 | int32_t x = idx.x + fx; 96 | 97 | if (y < 0 || x < 0 || y >= params.col.size(1) || x >= params.col.size(2)) 98 | continue; 99 | 100 | // Fetch current tap 101 | float3 t_col = fetch3(params.col, idx.z, y, x); 102 | float3 t_nrm = fetch3(params.nrm, idx.z, y, x); 103 | float2 t_zdz = fetch2(params.zdz, idx.z, y, x); 104 | 105 | ///////////////////////////////////////////////////////// 106 | // Compute bilateral weight 107 | ///////////////////////////////////////////////////////// 108 | 109 | // Distance, transposing fx & fy doesn't affect distance 110 | float dist_sqr = fx * fx + fy * fy; 111 | float dist = sqrtf(dist_sqr); 112 | float w_xy = expf(-dist_sqr / (2.0f * variance)); 113 | 114 | // Normal, transpose c_ and t_ (it's symmetric so doesn't matter) 115 | float w_normal = powf(min(max(dot(t_nrm, c_nrm), FLT_EPS), 1.0f), 128.0f); 116 | 117 | // Depth, transpose c_ and t_ (matters for the denominator) 118 | float w_depth = expf(-(abs(t_zdz.x - c_zdz.x) / max(t_zdz.y * dist, FLT_EPS))); 119 | 120 | float w = w_xy * w_normal * w_depth; 121 | 122 | float3 t_col_grad = w * fetch3(params.out_grad, idx.z, y, x); 123 | accum_grad += t_col_grad; 124 | } 125 | } 126 | 127 | params.col_grad[idx.z][idx.y][idx.x][0] = accum_grad.x; 128 | params.col_grad[idx.z][idx.y][idx.x][1] = accum_grad.y; 129 | params.col_grad[idx.z][idx.y][idx.x][2] = accum_grad.z; 130 | } 131 | -------------------------------------------------------------------------------- /render/optixutils/c_src/denoising.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #include "accessor.h" 11 | 12 | struct BilateralDenoiserParams 13 | { 14 | PackedTensorAccessor32 col; 15 | PackedTensorAccessor32 col_grad; 16 | PackedTensorAccessor32 nrm; 17 | PackedTensorAccessor32 zdz; 18 | PackedTensorAccessor32 out; 19 | PackedTensorAccessor32 out_grad; 20 | float sigma; 21 | }; 22 | -------------------------------------------------------------------------------- /render/optixutils/c_src/envsampling/params.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "../accessor.h" 10 | 11 | struct EnvSamplingParams 12 | { 13 | // Ray data 14 | PackedTensorAccessor32 ro; // ray origin 15 | 16 | // GBuffer 17 | PackedTensorAccessor32 mask; 18 | PackedTensorAccessor32 gb_pos; 19 | PackedTensorAccessor32 gb_pos_grad; 20 | PackedTensorAccessor32 gb_normal; 21 | PackedTensorAccessor32 gb_normal_grad; 22 | PackedTensorAccessor32 gb_view_pos; 23 | PackedTensorAccessor32 gb_kd; 24 | PackedTensorAccessor32 gb_kd_grad; 25 | PackedTensorAccessor32 gb_ks; 26 | PackedTensorAccessor32 gb_ks_grad; 27 | 28 | // Light 29 | PackedTensorAccessor32 light; 30 | PackedTensorAccessor32 light_grad; 31 | PackedTensorAccessor32 pdf; // light pdf 32 | PackedTensorAccessor32 rows; // light sampling cdf 33 | PackedTensorAccessor32 cols; // light sampling cdf 34 | 35 | // Output 36 | PackedTensorAccessor32 diff; 37 | PackedTensorAccessor32 diff_grad; 38 | PackedTensorAccessor32 spec; 39 | PackedTensorAccessor32 spec_grad; 40 | 41 | // Table with random permutations for stratified sampling 42 | PackedTensorAccessor32 perms; 43 | 44 | OptixTraversableHandle handle; 45 | unsigned int BSDF; 46 | unsigned int n_samples_x; 47 | unsigned int rnd_seed; 48 | unsigned int backward; 49 | float shadow_scale; 50 | }; -------------------------------------------------------------------------------- /render/optixutils/c_src/optix_wrapper.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | //------------------------------------------------------------------------ 15 | // Python OptiX state wrapper. 16 | 17 | struct OptiXState 18 | { 19 | OptixDeviceContext context; 20 | OptixTraversableHandle gas_handle; 21 | CUdeviceptr d_gas_output_buffer; 22 | 23 | // Differentiable env sampling 24 | OptixPipeline pipelineEnvSampling; 25 | OptixShaderBindingTable sbtEnvSampling; 26 | OptixModule moduleEnvSampling; 27 | }; 28 | 29 | 30 | class OptiXStateWrapper 31 | { 32 | public: 33 | OptiXStateWrapper (const std::string &path, const std::string &cuda_path); 34 | ~OptiXStateWrapper (void); 35 | 36 | OptiXState* pState; 37 | }; 38 | 39 | -------------------------------------------------------------------------------- /render/optixutils/include/optix.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 4 | * 5 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary 6 | * rights in and to this software, related documentation and any modifications thereto. 7 | * Any use, reproduction, disclosure or distribution of this software and related 8 | * documentation without an express license agreement from NVIDIA Corporation is strictly 9 | * prohibited. 10 | * 11 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* 12 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, 13 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY 15 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT 16 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF 17 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR 18 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF 19 | * SUCH DAMAGES 20 | */ 21 | 22 | /// @file 23 | /// @author NVIDIA Corporation 24 | /// @brief OptiX public API header 25 | /// 26 | /// Includes the host api if compiling host code, includes the cuda api if compiling device code. 27 | /// For the math library routines include optix_math.h 28 | 29 | #ifndef __optix_optix_h__ 30 | #define __optix_optix_h__ 31 | 32 | /// The OptiX version. 33 | /// 34 | /// - major = OPTIX_VERSION/10000 35 | /// - minor = (OPTIX_VERSION%10000)/100 36 | /// - micro = OPTIX_VERSION%100 37 | #define OPTIX_VERSION 70300 38 | 39 | 40 | #ifdef __CUDACC__ 41 | #include "optix_device.h" 42 | #else 43 | #include "optix_host.h" 44 | #endif 45 | 46 | 47 | #endif // __optix_optix_h__ 48 | -------------------------------------------------------------------------------- /render/optixutils/include/optix_device.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 4 | * 5 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary 6 | * rights in and to this software, related documentation and any modifications thereto. 7 | * Any use, reproduction, disclosure or distribution of this software and related 8 | * documentation without an express license agreement from NVIDIA Corporation is strictly 9 | * prohibited. 10 | * 11 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* 12 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, 13 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY 15 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT 16 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF 17 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR 18 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF 19 | * SUCH DAMAGES 20 | */ 21 | 22 | /** 23 | * @file optix_device.h 24 | * @author NVIDIA Corporation 25 | * @brief OptiX public API 26 | * 27 | * OptiX public API Reference - Host/Device side 28 | */ 29 | 30 | /******************************************************************************\ 31 | * optix_cuda.h 32 | * 33 | * This file provides the nvcc interface for generating PTX that the OptiX is 34 | * capable of parsing and weaving into the final kernel. This is included by 35 | * optix.h automatically if compiling device code. It can be included explicitly 36 | * in host code if desired. 37 | * 38 | \******************************************************************************/ 39 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) 40 | # define __OPTIX_INCLUDE_INTERNAL_HEADERS__ 41 | # define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ 42 | #endif 43 | #include "optix_7_device.h" 44 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ ) 45 | # undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ 46 | # undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ 47 | #endif 48 | -------------------------------------------------------------------------------- /render/optixutils/include/optix_function_table_definition.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary 5 | * rights in and to this software, related documentation and any modifications thereto. 6 | * Any use, reproduction, disclosure or distribution of this software and related 7 | * documentation without an express license agreement from NVIDIA Corporation is strictly 8 | * prohibited. 9 | * 10 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* 11 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, 12 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 13 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY 14 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT 15 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF 16 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR 17 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF 18 | * SUCH DAMAGES 19 | */ 20 | 21 | /// @file 22 | /// @author NVIDIA Corporation 23 | /// @brief OptiX public API header 24 | 25 | #ifndef __optix_optix_function_table_definition_h__ 26 | #define __optix_optix_function_table_definition_h__ 27 | 28 | #include "optix_function_table.h" 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif 33 | 34 | /** \addtogroup optix_function_table 35 | @{ 36 | */ 37 | 38 | /// If the stubs in optix_stubs.h are used, then the function table needs to be defined in exactly 39 | /// one translation unit. This can be achieved by including this header file in that translation 40 | /// unit. 41 | OptixFunctionTable g_optixFunctionTable; 42 | 43 | /*@}*/ // end group optix_function_table 44 | 45 | #ifdef __cplusplus 46 | } 47 | #endif 48 | 49 | #endif // __optix_optix_function_table_definition_h__ 50 | -------------------------------------------------------------------------------- /render/optixutils/include/optix_host.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 4 | * 5 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary 6 | * rights in and to this software, related documentation and any modifications thereto. 7 | * Any use, reproduction, disclosure or distribution of this software and related 8 | * documentation without an express license agreement from NVIDIA Corporation is strictly 9 | * prohibited. 10 | * 11 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* 12 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, 13 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY 15 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT 16 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF 17 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR 18 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF 19 | * SUCH DAMAGES 20 | */ 21 | 22 | /** 23 | * @file optix_host.h 24 | * @author NVIDIA Corporation 25 | * @brief OptiX public API 26 | * 27 | * OptiX public API Reference - Host side 28 | */ 29 | 30 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) 31 | # define __OPTIX_INCLUDE_INTERNAL_HEADERS__ 32 | # define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ 33 | #endif 34 | #include "optix_7_host.h" 35 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ ) 36 | # undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ 37 | # undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ 38 | #endif 39 | -------------------------------------------------------------------------------- /render/optixutils/include/optix_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary 5 | * rights in and to this software, related documentation and any modifications thereto. 6 | * Any use, reproduction, disclosure or distribution of this software and related 7 | * documentation without an express license agreement from NVIDIA Corporation is strictly 8 | * prohibited. 9 | * 10 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* 11 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, 12 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 13 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY 14 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT 15 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF 16 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR 17 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF 18 | * SUCH DAMAGES 19 | */ 20 | 21 | /** 22 | * @file optix_types.h 23 | * @author NVIDIA Corporation 24 | * @brief OptiX public API header 25 | * 26 | */ 27 | 28 | #ifndef __optix_optix_types_h__ 29 | #define __optix_optix_types_h__ 30 | 31 | // clang-format off 32 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) 33 | # define __OPTIX_INCLUDE_INTERNAL_HEADERS__ 34 | # define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ 35 | #endif 36 | #include "optix_7_types.h" 37 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ ) 38 | # undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ 39 | # undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ 40 | #endif 41 | // clang-format on 42 | 43 | #endif // #ifndef __optix_optix_types_h__ 44 | -------------------------------------------------------------------------------- /render/optixutils/ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import os 11 | import sys 12 | import torch 13 | import torch.utils.cpp_extension 14 | 15 | #---------------------------------------------------------------------------- 16 | # C++/Cuda plugin compiler/loader. 17 | 18 | _plugin = None 19 | if _plugin is None: 20 | 21 | # Make sure we can find the necessary compiler and libary binaries. 22 | if os.name == 'nt': 23 | optix_include_dir = os.path.dirname(__file__) + r"\include" 24 | 25 | def find_cl_path(): 26 | import glob 27 | for edition in ['Enterprise', 'Professional', 'BuildTools', 'Community']: 28 | vs_editions = glob.glob(r"C:\Program Files (x86)\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition) \ 29 | + glob.glob(r"C:\Program Files\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition) 30 | paths = sorted(vs_editions, reverse=True) 31 | if paths: 32 | return paths[0] 33 | 34 | # If cl.exe is not on path, try to find it. 35 | if os.system("where cl.exe >nul 2>nul") != 0: 36 | cl_path = find_cl_path() 37 | if cl_path is None: 38 | raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") 39 | os.environ['PATH'] += ';' + cl_path 40 | 41 | elif os.name == 'posix': 42 | optix_include_dir = os.path.dirname(__file__) + r"/include" 43 | 44 | include_paths = [optix_include_dir] 45 | 46 | # Compiler options. 47 | opts = ['-DNVDR_TORCH'] 48 | 49 | # Linker options. 50 | if os.name == 'posix': 51 | ldflags = ['-lcuda', '-lnvrtc'] 52 | elif os.name == 'nt': 53 | ldflags = ['cuda.lib', 'advapi32.lib', 'nvrtc.lib'] 54 | 55 | # List of sources. 56 | source_files = [ 57 | 'c_src/denoising.cu', 58 | 'c_src/optix_wrapper.cpp', 59 | 'c_src/torch_bindings.cpp' 60 | ] 61 | 62 | # Some containers set this to contain old architectures that won't compile. We only need the one installed in the machine. 63 | os.environ['TORCH_CUDA_ARCH_LIST'] = '' 64 | 65 | # Compile and load. 66 | build_dir = os.path.join(os. path. dirname(__file__), 'build') 67 | os.makedirs(build_dir, exist_ok=True) 68 | source_paths = [os.path.join(os.path.dirname(__file__), fn) for fn in source_files] 69 | torch.utils.cpp_extension.load(name='optixutils_plugin', sources=source_paths, extra_cflags=opts, 70 | build_directory=build_dir, 71 | extra_cuda_cflags=opts, extra_ldflags=ldflags, extra_include_paths=include_paths, with_cuda=True, verbose=True) 72 | 73 | # Import, cache, and return the compiled module. 74 | import optixutils_plugin 75 | _plugin = optixutils_plugin 76 | 77 | #---------------------------------------------------------------------------- 78 | # OptiX autograd func 79 | #---------------------------------------------------------------------------- 80 | 81 | class _optix_env_shade_func(torch.autograd.Function): 82 | _random_perm = {} 83 | 84 | @staticmethod 85 | def forward(ctx, optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, BSDF, n_samples_x, rnd_seed, shadow_scale): 86 | _rnd_seed = np.random.randint(2**31) if rnd_seed is None else rnd_seed 87 | if n_samples_x not in _optix_env_shade_func._random_perm: 88 | # Generate (32k) tables with random permutations to decorrelate the BSDF and light stratified samples 89 | _optix_env_shade_func._random_perm[n_samples_x] = torch.argsort(torch.rand(32768, n_samples_x * n_samples_x, device="cuda"), dim=-1).int() 90 | 91 | diff, spec = _plugin.env_shade_fwd(optix_ctx.cpp_wrapper, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, _optix_env_shade_func._random_perm[n_samples_x], BSDF, n_samples_x, _rnd_seed, shadow_scale) 92 | ctx.save_for_backward(mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols) 93 | ctx.optix_ctx = optix_ctx 94 | ctx.BSDF = BSDF 95 | ctx.n_samples_x = n_samples_x 96 | ctx.rnd_seed = rnd_seed 97 | ctx.shadow_scale = shadow_scale 98 | return diff, spec 99 | 100 | @staticmethod 101 | def backward(ctx, diff_grad, spec_grad): 102 | optix_ctx = ctx.optix_ctx 103 | _rnd_seed = np.random.randint(2**31) if ctx.rnd_seed is None else ctx.rnd_seed 104 | mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols = ctx.saved_variables 105 | gb_pos_grad, gb_normal_grad, gb_kd_grad, gb_ks_grad, light_grad = _plugin.env_shade_bwd( 106 | optix_ctx.cpp_wrapper, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, _optix_env_shade_func._random_perm[ctx.n_samples_x], 107 | ctx.BSDF, ctx.n_samples_x, _rnd_seed, ctx.shadow_scale, diff_grad, spec_grad) 108 | return None, None, None, gb_pos_grad, gb_normal_grad, None, gb_kd_grad, gb_ks_grad, light_grad, None, None, None, None, None, None, None 109 | 110 | class _bilateral_denoiser_func(torch.autograd.Function): 111 | @staticmethod 112 | def forward(ctx, col, nrm, zdz, sigma): 113 | ctx.save_for_backward(col, nrm, zdz) 114 | ctx.sigma = sigma 115 | out = _plugin.bilateral_denoiser_fwd(col, nrm, zdz, sigma) 116 | return out 117 | 118 | @staticmethod 119 | def backward(ctx, out_grad): 120 | col, nrm, zdz = ctx.saved_variables 121 | col_grad = _plugin.bilateral_denoiser_bwd(col, nrm, zdz, ctx.sigma, out_grad) 122 | return col_grad, None, None, None 123 | 124 | #---------------------------------------------------------------------------- 125 | # OptiX ray tracing utils 126 | #---------------------------------------------------------------------------- 127 | 128 | class OptiXContext: 129 | def __init__(self): 130 | print("Cuda path", torch.utils.cpp_extension.CUDA_HOME) 131 | self.cpp_wrapper = _plugin.OptiXStateWrapper(os.path.dirname(__file__), torch.utils.cpp_extension.CUDA_HOME) 132 | 133 | def optix_build_bvh(optix_ctx, verts, tris, rebuild): 134 | ''' 135 | choose not to raise error since we may have msdf supervision.. should clean the code later 136 | ''' 137 | # assert tris.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)" 138 | # assert verts.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)" 139 | _plugin.optix_build_bvh(optix_ctx.cpp_wrapper, verts.view(-1, 3), tris.view(-1, 3), rebuild) 140 | 141 | def optix_env_shade(optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, BSDF='pbr', n_samples_x=8, rnd_seed=None, shadow_scale=1.0): 142 | iBSDF = ['pbr', 'diffuse', 'white'].index(BSDF) # Ordering important, must match the order of the fwd/bwdPbrBSDF kernel. 143 | return _optix_env_shade_func.apply(optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, iBSDF, n_samples_x, rnd_seed, shadow_scale) 144 | 145 | def bilateral_denoiser(col, nrm, zdz, sigma): 146 | col_w = _bilateral_denoiser_func.apply(col, nrm, zdz, sigma) 147 | return col_w[..., 0:3] / col_w[..., 3:4] 148 | -------------------------------------------------------------------------------- /render/optixutils/tests/filter_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from pickletools import read_float8 10 | import torch 11 | 12 | import os 13 | import sys 14 | import math 15 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 16 | import optixutils as ou 17 | import numpy as np 18 | 19 | RES = 1024 20 | DTYPE = torch.float32 21 | 22 | def length(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor: 23 | return torch.sqrt(torch.clamp(dot(x,x), min=eps)) # Clamp to avoid nan gradients because grad(sqrt(0)) = NaN 24 | 25 | def safe_normalize(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor: 26 | return x / length(x, eps) 27 | 28 | def dot(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: 29 | return torch.sum(x*y, -1, keepdim=True) 30 | 31 | class BilateralDenoiser(torch.nn.Module): 32 | def __init__(self, sigma=1.0): 33 | super(BilateralDenoiser, self).__init__() 34 | self.set_sigma(sigma) 35 | 36 | def set_sigma(self, sigma): 37 | self.sigma = max(sigma, 0.0001) 38 | self.variance = self.sigma**2. 39 | self.N = 2 * math.ceil(self.sigma * 2.5) + 1 40 | 41 | def forward(self, input): 42 | eps = 0.0001 43 | col = input[..., 0:3] 44 | nrm = input[..., 3:6] 45 | kd = input[..., 6:9] 46 | zdz = input[..., 9:11] 47 | 48 | accum_col = torch.zeros_like(col) 49 | accum_w = torch.zeros_like(col[..., 0:1]) 50 | for y in range(-self.N, self.N+1): 51 | for x in range(-self.N, self.N+1): 52 | 53 | ty, tx = torch.meshgrid(torch.arange(0, input.shape[1], dtype=torch.float32, device="cuda"), torch.arange(0, input.shape[2], dtype=torch.float32, device="cuda")) 54 | tx = tx[None, ..., None] + x 55 | ty = ty[None, ..., None] + y 56 | 57 | dist_sqr = (x**2 + y**2) 58 | dist = np.sqrt(dist_sqr) 59 | w_xy = np.exp(-dist_sqr / (2 * self.variance)) 60 | 61 | with torch.no_grad(): 62 | nrm_tap = torch.roll(nrm, (-y, -x), (1, 2)) 63 | w_normal = torch.pow(torch.clamp(dot(nrm_tap, nrm), min=eps, max=1.0), 128.0) # From SVGF 64 | 65 | zdz_tap = torch.roll(zdz, (-y, -x), (1, 2)) 66 | w_depth = torch.exp(-(torch.abs(zdz_tap[..., 0:1] - zdz[..., 0:1]) / torch.clamp(zdz[..., 1:2] * dist, min=eps)) ) # From SVGF 67 | 68 | w = w_xy * w_normal * w_depth 69 | w = torch.where((tx >= 0) & (tx < input.shape[2]) & (ty >= 0) & (ty < input.shape[1]), w, torch.zeros_like(w)) 70 | 71 | col_tap = torch.roll(col, (-y, -x), (1, 2)) 72 | accum_col += col_tap * w 73 | accum_w += w 74 | return accum_col / torch.clamp(accum_w, min=eps) 75 | 76 | def relative_loss(name, ref, cuda): 77 | ref = ref.float() 78 | cuda = cuda.float() 79 | denom = torch.where(ref > 1e-7, ref, torch.ones_like(ref)) 80 | relative = torch.abs(ref - cuda) / denom 81 | print(name, torch.max(relative).item()) 82 | 83 | 84 | def test_filter(): 85 | img_cuda = torch.rand(1, RES, RES, 11, dtype=DTYPE, device='cuda') 86 | img_cuda[..., 3:6] = safe_normalize(img_cuda[..., 3:6]) 87 | img_ref = img_cuda.clone().detach().requires_grad_(True) 88 | img_cuda = img_cuda.clone().detach().requires_grad_(True) 89 | target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 90 | target_ref = target_cuda.clone().detach().requires_grad_(True) 91 | 92 | SIGMA = 2.0 93 | 94 | start = torch.cuda.Event(enable_timing=True) 95 | end = torch.cuda.Event(enable_timing=True) 96 | 97 | start.record() 98 | denoiser = BilateralDenoiser(sigma=SIGMA) 99 | denoised_ref = denoiser.forward(img_ref) 100 | ref_loss = torch.nn.MSELoss()(denoised_ref, target_ref) 101 | ref_loss.backward() 102 | end.record() 103 | torch.cuda.synchronize() 104 | print("Python:", start.elapsed_time(end)) 105 | 106 | start.record() 107 | denoised_cuda = ou.svgf(img_cuda[..., 0:3], img_cuda[..., 3:6], img_cuda[..., 9:11], img_cuda[..., 6:9], SIGMA) 108 | cuda_loss = torch.nn.MSELoss()(denoised_cuda, target_cuda) 109 | cuda_loss.backward() 110 | end.record() 111 | torch.cuda.synchronize() 112 | print("CUDA:", start.elapsed_time(end)) 113 | 114 | print("-------------------------------------------------------------") 115 | print(" Filter loss:") 116 | print("-------------------------------------------------------------") 117 | 118 | relative_loss("denoised:", denoised_ref[..., 0:3], denoised_cuda[..., 0:3]) 119 | relative_loss("grad:", img_ref.grad[..., 0:3], img_cuda.grad[..., 0:3]) 120 | 121 | test_filter() -------------------------------------------------------------------------------- /render/regularizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | import nvdiffrast.torch as dr 12 | import torch.nn.functional as F 13 | 14 | from render import util 15 | from . import mesh 16 | 17 | def luma(x): 18 | return ((x[..., 0:1] + x[..., 1:2] + x[..., 2:3]) / 3).repeat(1, 1, 1, 3) 19 | def value(x): 20 | return torch.max(x[..., 0:3], dim=-1, keepdim=True)[0].repeat(1, 1, 1, 3) 21 | 22 | def chroma_loss(kd, color_ref, lambda_chroma): 23 | eps = 0.001 24 | ref_chroma = color_ref[..., 0:3] / torch.clip(value(color_ref), min=eps) 25 | opt_chroma = kd[..., 0:3] / torch.clip(value(kd), min=eps) 26 | return torch.mean(torch.abs((opt_chroma - ref_chroma) * color_ref[..., 3:])) * lambda_chroma 27 | 28 | # Diffuse luma regularizer + specular 29 | def shading_loss(diffuse_light, specular_light, color_ref, lambda_diffuse, lambda_specular): 30 | diffuse_luma = luma(diffuse_light) 31 | specular_luma = luma(specular_light) 32 | ref_luma = value(color_ref) 33 | 34 | eps = 0.001 35 | img = util.rgb_to_srgb(torch.log(torch.clamp((diffuse_luma + specular_luma) * color_ref[..., 3:], min=0, max=65535) + 1)) 36 | target = util.rgb_to_srgb(torch.log(torch.clamp(ref_luma * color_ref[..., 3:], min=0, max=65535) + 1)) 37 | # error = torch.abs(img - target) * diffuse_luma / torch.clamp(diffuse_luma + specular_luma, min=eps) ### encourage specular component to take control 38 | error = torch.abs(img - target) ### the original version in the paper 39 | loss = torch.mean(error) * lambda_diffuse 40 | loss += torch.mean(specular_luma) / torch.clamp(torch.mean(diffuse_luma), min=eps) * lambda_specular 41 | return loss 42 | 43 | ###################################################################################### 44 | # Material smoothness loss 45 | ###################################################################################### 46 | 47 | def material_smoothness_grad(kd_grad, ks_grad, nrm_grad, lambda_kd=0.25, lambda_ks=0.1, lambda_nrm=0.0): 48 | kd_luma_grad = (kd_grad[..., 0] + kd_grad[..., 1] + kd_grad[..., 2]) / 3 49 | loss = torch.mean(kd_luma_grad * kd_grad[..., -1]) * lambda_kd 50 | loss += torch.mean(ks_grad[..., :-1] * ks_grad[..., -1:]) * lambda_ks 51 | loss += torch.mean(nrm_grad[..., :-1] * nrm_grad[..., -1:]) * lambda_nrm 52 | return loss 53 | 54 | ###################################################################################### 55 | # Computes the image gradient, useful for kd/ks smoothness losses 56 | ###################################################################################### 57 | def image_grad(buf, std=0.01): 58 | t, s = torch.meshgrid(torch.linspace(-1.0 + 1.0 / buf.shape[1], 1.0 - 1.0 / buf.shape[1], buf.shape[1], device="cuda"), 59 | torch.linspace(-1.0 + 1.0 / buf.shape[2], 1.0 - 1.0 / buf.shape[2], buf.shape[2], device="cuda"), 60 | indexing='ij') 61 | tc = torch.normal(mean=0, std=std, size=(buf.shape[0], buf.shape[1], buf.shape[2], 2), device="cuda") + torch.stack((s, t), dim=-1)[None, ...] 62 | tap = dr.texture(buf, tc, filter_mode='linear', boundary_mode='clamp') 63 | return torch.abs(tap[..., :-1] - buf[..., :-1]) * tap[..., -1:] * buf[..., -1:] 64 | 65 | ###################################################################################### 66 | # Computes the avergage edge length of a mesh. 67 | # Rough estimate of the tessellation of a mesh. Can be used e.g. to clamp gradients 68 | ###################################################################################### 69 | def avg_edge_length(v_pos, t_pos_idx): 70 | e_pos_idx = mesh.compute_edges(t_pos_idx) 71 | edge_len = util.length(v_pos[e_pos_idx[:, 0]] - v_pos[e_pos_idx[:, 1]]) 72 | return torch.mean(edge_len) 73 | 74 | ###################################################################################### 75 | # Laplacian regularization using umbrella operator (Fujiwara / Desbrun). 76 | # https://mgarland.org/class/geom04/material/smoothing.pdf 77 | ###################################################################################### 78 | def laplace_regularizer_const(v_pos, t_pos_idx): 79 | term = torch.zeros_like(v_pos) 80 | norm = torch.zeros_like(v_pos[..., 0:1]) 81 | 82 | v0 = v_pos[t_pos_idx[:, 0], :] 83 | v1 = v_pos[t_pos_idx[:, 1], :] 84 | v2 = v_pos[t_pos_idx[:, 2], :] 85 | 86 | term.scatter_add_(0, t_pos_idx[:, 0:1].repeat(1,3), (v1 - v0) + (v2 - v0)) 87 | term.scatter_add_(0, t_pos_idx[:, 1:2].repeat(1,3), (v0 - v1) + (v2 - v1)) 88 | term.scatter_add_(0, t_pos_idx[:, 2:3].repeat(1,3), (v0 - v2) + (v1 - v2)) 89 | 90 | two = torch.ones_like(v0) * 2.0 91 | norm.scatter_add_(0, t_pos_idx[:, 0:1], two) 92 | norm.scatter_add_(0, t_pos_idx[:, 1:2], two) 93 | norm.scatter_add_(0, t_pos_idx[:, 2:3], two) 94 | 95 | term = term / torch.clamp(norm, min=1.0) 96 | 97 | return torch.mean(term**2) 98 | 99 | ###################################################################################### 100 | # Smooth vertex normals 101 | ###################################################################################### 102 | def normal_consistency(v_pos, t_pos_idx): 103 | # Compute face normals 104 | v0 = v_pos[t_pos_idx[:, 0], :] 105 | v1 = v_pos[t_pos_idx[:, 1], :] 106 | v2 = v_pos[t_pos_idx[:, 2], :] 107 | 108 | face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0)) 109 | 110 | tris_per_edge = mesh.compute_edge_to_face_mapping(t_pos_idx) 111 | 112 | # Fetch normals for both faces sharind an edge 113 | n0 = face_normals[tris_per_edge[:, 0], :] 114 | n1 = face_normals[tris_per_edge[:, 1], :] 115 | 116 | # Compute error metric based on normal difference 117 | term = torch.clamp(util.dot(n0, n1), min=-1.0, max=1.0) 118 | term = (1.0 - term) * 0.5 119 | 120 | return torch.mean(torch.abs(term)) 121 | 122 | def ssim_loss(pred, target): 123 | # 定义窗口大小 124 | window_size = 11 125 | # 计算均值 126 | mu_x = F.conv2d(pred, window, padding=window_size//2, groups=3) 127 | mu_y = F.conv2d(target, window, padding=window_size//2, groups=3) 128 | 129 | # 计算方差 130 | sigma_x = F.conv2d(pred**2, window, padding=window_size//2, groups=3) - mu_x**2 131 | sigma_y = F.conv2d(target**2, window, padding=window_size//2, groups=3) - mu_y**2 132 | sigma_xy = F.conv2d(pred*target, window, padding=window_size//2, groups=3) - mu_x*mu_y 133 | 134 | # 常数,为了数值稳定性 135 | C1 = 0.01**2 136 | C2 = 0.03**2 137 | 138 | # 计算 SSIM 139 | ssim = ((2 * mu_x * mu_y + C1) * (2 * sigma_xy + C2)) / ((mu_x**2 + mu_y**2 + C1) * (sigma_x + sigma_y + C2)) 140 | 141 | return 1 - ssim.mean() -------------------------------------------------------------------------------- /render/renderutils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | from .ops import xfm_points, xfm_vectors, image_loss, diffuse_cubemap, specular_cubemap, prepare_shading_normal, lambert, frostbite_diffuse, pbr_specular, pbr_bsdf, _fresnel_shlick, _ndf_ggx, _lambda_ggx, _masking_smith 11 | __all__ = ["xfm_vectors", "xfm_points", "image_loss", "diffuse_cubemap","specular_cubemap", "prepare_shading_normal", "lambert", "frostbite_diffuse", "pbr_specular", "pbr_bsdf", "_fresnel_shlick", "_ndf_ggx", "_lambda_ggx", "_masking_smith", ] 12 | -------------------------------------------------------------------------------- /render/renderutils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /render/renderutils/__pycache__/bsdf.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/bsdf.cpython-38.pyc -------------------------------------------------------------------------------- /render/renderutils/__pycache__/loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/loss.cpython-38.pyc -------------------------------------------------------------------------------- /render/renderutils/__pycache__/ops.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/ops.cpython-38.pyc -------------------------------------------------------------------------------- /render/renderutils/bsdf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import math 11 | import torch 12 | 13 | NORMAL_THRESHOLD = 0.1 14 | 15 | ################################################################################ 16 | # Vector utility functions 17 | ################################################################################ 18 | 19 | def _dot(x, y): 20 | return torch.sum(x*y, -1, keepdim=True) 21 | 22 | def _reflect(x, n): 23 | return 2*_dot(x, n)*n - x 24 | 25 | def _safe_normalize(x): 26 | return torch.nn.functional.normalize(x, dim = -1) 27 | 28 | def _bend_normal(view_vec, smooth_nrm, geom_nrm, two_sided_shading): 29 | # Swap normal direction for backfacing surfaces 30 | if two_sided_shading: 31 | smooth_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, smooth_nrm, -smooth_nrm) 32 | geom_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, geom_nrm, -geom_nrm) 33 | 34 | t = torch.clamp(_dot(view_vec, smooth_nrm) / NORMAL_THRESHOLD, min=0, max=1) 35 | return torch.lerp(geom_nrm, smooth_nrm, t) 36 | 37 | 38 | def _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl): 39 | smooth_bitang = _safe_normalize(torch.cross(smooth_tng, smooth_nrm)) 40 | if opengl: 41 | shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] - smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0) 42 | else: 43 | shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] + smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0) 44 | return _safe_normalize(shading_nrm) 45 | 46 | def bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl): 47 | smooth_nrm = _safe_normalize(smooth_nrm) 48 | smooth_tng = _safe_normalize(smooth_tng) 49 | view_vec = _safe_normalize(view_pos - pos) 50 | shading_nrm = _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl) 51 | return _bend_normal(view_vec, shading_nrm, geom_nrm, two_sided_shading) 52 | 53 | ################################################################################ 54 | # Simple lambertian diffuse BSDF 55 | ################################################################################ 56 | 57 | def bsdf_lambert(nrm, wi): 58 | return torch.clamp(_dot(nrm, wi), min=0.0) / math.pi 59 | 60 | ################################################################################ 61 | # Frostbite diffuse 62 | ################################################################################ 63 | 64 | def bsdf_frostbite(nrm, wi, wo, linearRoughness): 65 | wiDotN = _dot(wi, nrm) 66 | woDotN = _dot(wo, nrm) 67 | 68 | h = _safe_normalize(wo + wi) 69 | wiDotH = _dot(wi, h) 70 | 71 | energyBias = 0.5 * linearRoughness 72 | energyFactor = 1.0 - (0.51 / 1.51) * linearRoughness 73 | f90 = energyBias + 2.0 * wiDotH * wiDotH * linearRoughness 74 | f0 = 1.0 75 | 76 | wiScatter = bsdf_fresnel_shlick(f0, f90, wiDotN) 77 | woScatter = bsdf_fresnel_shlick(f0, f90, woDotN) 78 | res = wiScatter * woScatter * energyFactor 79 | return torch.where((wiDotN > 0.0) & (woDotN > 0.0), res, torch.zeros_like(res)) 80 | 81 | ################################################################################ 82 | # Phong specular, loosely based on mitsuba implementation 83 | ################################################################################ 84 | 85 | def bsdf_phong(nrm, wo, wi, N): 86 | dp_r = torch.clamp(_dot(_reflect(wo, nrm), wi), min=0.0, max=1.0) 87 | dp_l = torch.clamp(_dot(nrm, wi), min=0.0, max=1.0) 88 | return (dp_r ** N) * dp_l * (N + 2) / (2 * math.pi) 89 | 90 | ################################################################################ 91 | # PBR's implementation of GGX specular 92 | ################################################################################ 93 | 94 | specular_epsilon = 1e-4 95 | 96 | def bsdf_fresnel_shlick(f0, f90, cosTheta): 97 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon) 98 | return f0 + (f90 - f0) * (1.0 - _cosTheta) ** 5.0 99 | 100 | def bsdf_ndf_ggx(alphaSqr, cosTheta): 101 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon) 102 | d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1 103 | return alphaSqr / (d * d * math.pi) 104 | 105 | def bsdf_lambda_ggx(alphaSqr, cosTheta): 106 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon) 107 | cosThetaSqr = _cosTheta * _cosTheta 108 | tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr 109 | res = 0.5 * (torch.sqrt(1 + alphaSqr * tanThetaSqr) - 1.0) 110 | return res 111 | 112 | def bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO): 113 | lambdaI = bsdf_lambda_ggx(alphaSqr, cosThetaI) 114 | lambdaO = bsdf_lambda_ggx(alphaSqr, cosThetaO) 115 | return 1 / (1 + lambdaI + lambdaO) 116 | 117 | def bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08): 118 | _alpha = torch.clamp(alpha, min=min_roughness*min_roughness, max=1.0) 119 | alphaSqr = _alpha * _alpha 120 | 121 | h = _safe_normalize(wo + wi) 122 | woDotN = _dot(wo, nrm) 123 | wiDotN = _dot(wi, nrm) 124 | woDotH = _dot(wo, h) 125 | nDotH = _dot(nrm, h) 126 | 127 | D = bsdf_ndf_ggx(alphaSqr, nDotH) 128 | G = bsdf_masking_smith_ggx_correlated(alphaSqr, woDotN, wiDotN) 129 | F = bsdf_fresnel_shlick(col, 1, woDotH) 130 | 131 | w = F * D * G * 0.25 / torch.clamp(woDotN, min=specular_epsilon) 132 | 133 | frontfacing = (woDotN > specular_epsilon) & (wiDotN > specular_epsilon) 134 | return torch.where(frontfacing, w, torch.zeros_like(w)) 135 | 136 | def bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF): 137 | wo = _safe_normalize(view_pos - pos) 138 | wi = _safe_normalize(light_pos - pos) 139 | 140 | spec_str = arm[..., 0:1] # x component 141 | roughness = arm[..., 1:2] # y component 142 | metallic = arm[..., 2:3] # z component 143 | ks = (0.04 * (1.0 - metallic) + kd * metallic) * (1 - spec_str) 144 | kd = kd * (1.0 - metallic) 145 | 146 | if BSDF == 0: 147 | diffuse = kd * bsdf_lambert(nrm, wi) 148 | else: 149 | diffuse = kd * bsdf_frostbite(nrm, wi, wo, roughness) 150 | specular = bsdf_pbr_specular(ks, nrm, wo, wi, roughness*roughness, min_roughness=min_roughness) 151 | return diffuse + specular 152 | -------------------------------------------------------------------------------- /render/renderutils/build/bsdf.cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/bsdf.cuda.o -------------------------------------------------------------------------------- /render/renderutils/build/build.ninja: -------------------------------------------------------------------------------- 1 | ninja_required_version = 1.3 2 | cxx = c++ 3 | nvcc = /usr/local/cuda/bin/nvcc 4 | 5 | cflags = -DTORCH_EXTENSION_NAME=renderutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -DNVDR_TORCH 6 | post_cflags = 7 | cuda_cflags = -DTORCH_EXTENSION_NAME=renderutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -DNVDR_TORCH -std=c++14 8 | cuda_post_cflags = 9 | cuda_dlink_post_cflags = 10 | ldflags = -shared -lcuda -lnvrtc -L/home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart 11 | 12 | rule compile 13 | command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags 14 | depfile = $out.d 15 | deps = gcc 16 | 17 | rule cuda_compile 18 | depfile = $out.d 19 | deps = gcc 20 | command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags 21 | 22 | 23 | 24 | rule link 25 | command = $cxx $in $ldflags -o $out 26 | 27 | build mesh.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/mesh.cu 28 | build loss.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/loss.cu 29 | build bsdf.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/bsdf.cu 30 | build normal.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/normal.cu 31 | build cubemap.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/cubemap.cu 32 | build common.o: compile /nas_data/chh/D3Human_main/render/renderutils/c_src/common.cpp 33 | build torch_bindings.o: compile /nas_data/chh/D3Human_main/render/renderutils/c_src/torch_bindings.cpp 34 | 35 | 36 | 37 | build renderutils_plugin.so: link mesh.cuda.o loss.cuda.o bsdf.cuda.o normal.cuda.o cubemap.cuda.o common.o torch_bindings.o 38 | 39 | default renderutils_plugin.so 40 | 41 | -------------------------------------------------------------------------------- /render/renderutils/build/common.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/common.o -------------------------------------------------------------------------------- /render/renderutils/build/cubemap.cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/cubemap.cuda.o -------------------------------------------------------------------------------- /render/renderutils/build/loss.cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/loss.cuda.o -------------------------------------------------------------------------------- /render/renderutils/build/mesh.cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/mesh.cuda.o -------------------------------------------------------------------------------- /render/renderutils/build/normal.cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/normal.cuda.o -------------------------------------------------------------------------------- /render/renderutils/build/renderutils_plugin.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/renderutils_plugin.so -------------------------------------------------------------------------------- /render/renderutils/build/torch_bindings.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/torch_bindings.o -------------------------------------------------------------------------------- /render/renderutils/c_src/bsdf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "common.h" 15 | 16 | struct LambertKernelParams 17 | { 18 | Tensor nrm; 19 | Tensor wi; 20 | Tensor out; 21 | dim3 gridSize; 22 | }; 23 | 24 | struct FrostbiteDiffuseKernelParams 25 | { 26 | Tensor nrm; 27 | Tensor wi; 28 | Tensor wo; 29 | Tensor linearRoughness; 30 | Tensor out; 31 | dim3 gridSize; 32 | }; 33 | 34 | struct FresnelShlickKernelParams 35 | { 36 | Tensor f0; 37 | Tensor f90; 38 | Tensor cosTheta; 39 | Tensor out; 40 | dim3 gridSize; 41 | }; 42 | 43 | struct NdfGGXParams 44 | { 45 | Tensor alphaSqr; 46 | Tensor cosTheta; 47 | Tensor out; 48 | dim3 gridSize; 49 | }; 50 | 51 | struct MaskingSmithParams 52 | { 53 | Tensor alphaSqr; 54 | Tensor cosThetaI; 55 | Tensor cosThetaO; 56 | Tensor out; 57 | dim3 gridSize; 58 | }; 59 | 60 | struct PbrSpecular 61 | { 62 | Tensor col; 63 | Tensor nrm; 64 | Tensor wo; 65 | Tensor wi; 66 | Tensor alpha; 67 | Tensor out; 68 | dim3 gridSize; 69 | float min_roughness; 70 | }; 71 | 72 | struct PbrBSDF 73 | { 74 | Tensor kd; 75 | Tensor arm; 76 | Tensor pos; 77 | Tensor nrm; 78 | Tensor view_pos; 79 | Tensor light_pos; 80 | Tensor out; 81 | dim3 gridSize; 82 | float min_roughness; 83 | int BSDF; 84 | }; 85 | -------------------------------------------------------------------------------- /render/renderutils/c_src/common.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #include 13 | #include 14 | 15 | //------------------------------------------------------------------------ 16 | // Block and grid size calculators for kernel launches. 17 | 18 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims) 19 | { 20 | int maxThreads = maxWidth * maxHeight; 21 | if (maxThreads <= 1 || (dims.x * dims.y) <= 1) 22 | return dim3(1, 1, 1); // Degenerate. 23 | 24 | // Start from max size. 25 | int bw = maxWidth; 26 | int bh = maxHeight; 27 | 28 | // Optimizations for weirdly sized buffers. 29 | if (dims.x < bw) 30 | { 31 | // Decrease block width to smallest power of two that covers the buffer width. 32 | while ((bw >> 1) >= dims.x) 33 | bw >>= 1; 34 | 35 | // Maximize height. 36 | bh = maxThreads / bw; 37 | if (bh > dims.y) 38 | bh = dims.y; 39 | } 40 | else if (dims.y < bh) 41 | { 42 | // Halve height and double width until fits completely inside buffer vertically. 43 | while (bh > dims.y) 44 | { 45 | bh >>= 1; 46 | if (bw < dims.x) 47 | bw <<= 1; 48 | } 49 | } 50 | 51 | // Done. 52 | return dim3(bw, bh, 1); 53 | } 54 | 55 | // returns the size of a block that can be reduced using horizontal SIMD operations (e.g. __shfl_xor_sync) 56 | dim3 getWarpSize(dim3 blockSize) 57 | { 58 | return dim3( 59 | std::min(blockSize.x, 32u), 60 | std::min(std::max(32u / blockSize.x, 1u), std::min(32u, blockSize.y)), 61 | std::min(std::max(32u / (blockSize.x * blockSize.y), 1u), std::min(32u, blockSize.z)) 62 | ); 63 | } 64 | 65 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims) 66 | { 67 | dim3 gridSize; 68 | gridSize.x = (dims.x - 1) / blockSize.x + 1; 69 | gridSize.y = (dims.y - 1) / blockSize.y + 1; 70 | gridSize.z = (dims.z - 1) / blockSize.z + 1; 71 | return gridSize; 72 | } 73 | 74 | //------------------------------------------------------------------------ 75 | -------------------------------------------------------------------------------- /render/renderutils/c_src/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | #include 14 | #include 15 | 16 | #include "vec3f.h" 17 | #include "vec4f.h" 18 | #include "tensor.h" 19 | 20 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims); 21 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims); 22 | 23 | #ifdef __CUDACC__ 24 | 25 | #ifdef _MSC_VER 26 | #define M_PI 3.14159265358979323846f 27 | #endif 28 | 29 | __host__ __device__ static inline dim3 getWarpSize(dim3 blockSize) 30 | { 31 | return dim3( 32 | min(blockSize.x, 32u), 33 | min(max(32u / blockSize.x, 1u), min(32u, blockSize.y)), 34 | min(max(32u / (blockSize.x * blockSize.y), 1u), min(32u, blockSize.z)) 35 | ); 36 | } 37 | 38 | __device__ static inline float clamp(float val, float mn, float mx) { return min(max(val, mn), mx); } 39 | #else 40 | dim3 getWarpSize(dim3 blockSize); 41 | #endif -------------------------------------------------------------------------------- /render/renderutils/c_src/cubemap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "common.h" 15 | 16 | struct DiffuseCubemapKernelParams 17 | { 18 | Tensor cubemap; 19 | Tensor out; 20 | dim3 gridSize; 21 | }; 22 | 23 | struct SpecularCubemapKernelParams 24 | { 25 | Tensor cubemap; 26 | Tensor bounds; 27 | Tensor out; 28 | dim3 gridSize; 29 | float costheta_cutoff; 30 | float roughness; 31 | }; 32 | 33 | struct SpecularBoundsKernelParams 34 | { 35 | float costheta_cutoff; 36 | Tensor out; 37 | dim3 gridSize; 38 | }; 39 | -------------------------------------------------------------------------------- /render/renderutils/c_src/loss.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "common.h" 15 | 16 | enum TonemapperType 17 | { 18 | TONEMAPPER_NONE = 0, 19 | TONEMAPPER_LOG_SRGB = 1 20 | }; 21 | 22 | enum LossType 23 | { 24 | LOSS_L1 = 0, 25 | LOSS_MSE = 1, 26 | LOSS_RELMSE = 2, 27 | LOSS_SMAPE = 3 28 | }; 29 | 30 | struct LossKernelParams 31 | { 32 | Tensor img; 33 | Tensor target; 34 | Tensor out; 35 | dim3 gridSize; 36 | TonemapperType tonemapper; 37 | LossType loss; 38 | }; 39 | -------------------------------------------------------------------------------- /render/renderutils/c_src/mesh.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #include 13 | #include 14 | 15 | #include "common.h" 16 | #include "mesh.h" 17 | 18 | 19 | //------------------------------------------------------------------------ 20 | // Kernels 21 | 22 | __global__ void xfmPointsFwdKernel(XfmKernelParams p) 23 | { 24 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 25 | unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z; 26 | 27 | __shared__ float mtx[4][4]; 28 | if (threadIdx.x < 16) 29 | mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0)); 30 | __syncthreads(); 31 | 32 | if (px >= p.gridSize.x) 33 | return; 34 | 35 | vec3f pos( 36 | p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)), 37 | p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)), 38 | p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0)) 39 | ); 40 | 41 | if (p.isPoints) 42 | { 43 | p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0] + mtx[3][0]); 44 | p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1] + mtx[3][1]); 45 | p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2] + mtx[3][2]); 46 | p.out.store(p.out.nhwcIndex(pz, px, 3, 0), pos.x * mtx[0][3] + pos.y * mtx[1][3] + pos.z * mtx[2][3] + mtx[3][3]); 47 | } 48 | else 49 | { 50 | p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0]); 51 | p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1]); 52 | p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2]); 53 | } 54 | } 55 | 56 | __global__ void xfmPointsBwdKernel(XfmKernelParams p) 57 | { 58 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 59 | unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z; 60 | 61 | __shared__ float mtx[4][4]; 62 | if (threadIdx.x < 16) 63 | mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0)); 64 | __syncthreads(); 65 | 66 | if (px >= p.gridSize.x) 67 | return; 68 | 69 | vec3f pos( 70 | p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)), 71 | p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)), 72 | p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0)) 73 | ); 74 | 75 | vec4f d_out( 76 | p.out.fetch(p.out.nhwcIndex(pz, px, 0, 0)), 77 | p.out.fetch(p.out.nhwcIndex(pz, px, 1, 0)), 78 | p.out.fetch(p.out.nhwcIndex(pz, px, 2, 0)), 79 | p.out.fetch(p.out.nhwcIndex(pz, px, 3, 0)) 80 | ); 81 | 82 | if (p.isPoints) 83 | { 84 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2] + d_out.w * mtx[0][3]); 85 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2] + d_out.w * mtx[1][3]); 86 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2] + d_out.w * mtx[2][3]); 87 | } 88 | else 89 | { 90 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2]); 91 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2]); 92 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2]); 93 | } 94 | } -------------------------------------------------------------------------------- /render/renderutils/c_src/mesh.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "common.h" 15 | 16 | struct XfmKernelParams 17 | { 18 | bool isPoints; 19 | Tensor points; 20 | Tensor matrix; 21 | Tensor out; 22 | dim3 gridSize; 23 | }; 24 | -------------------------------------------------------------------------------- /render/renderutils/c_src/normal.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #include "common.h" 13 | #include "normal.h" 14 | 15 | #define NORMAL_THRESHOLD 0.1f 16 | 17 | //------------------------------------------------------------------------ 18 | // Perturb shading normal by tangent frame 19 | 20 | __device__ vec3f fwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, bool opengl) 21 | { 22 | vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm); 23 | vec3f smooth_bitng = safeNormalize(_smooth_bitng); 24 | vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f); 25 | return safeNormalize(_shading_nrm); 26 | } 27 | 28 | __device__ void bwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, vec3f &d_perturbed_nrm, vec3f &d_smooth_nrm, vec3f &d_smooth_tng, const vec3f d_out, bool opengl) 29 | { 30 | //////////////////////////////////////////////////////////////////////// 31 | // FWD 32 | vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm); 33 | vec3f smooth_bitng = safeNormalize(_smooth_bitng); 34 | vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f); 35 | 36 | //////////////////////////////////////////////////////////////////////// 37 | // BWD 38 | vec3f d_shading_nrm(0); 39 | bwdSafeNormalize(_shading_nrm, d_shading_nrm, d_out); 40 | 41 | vec3f d_smooth_bitng(0); 42 | 43 | if (perturbed_nrm.z > 0.0f) 44 | { 45 | d_smooth_nrm += d_shading_nrm * perturbed_nrm.z; 46 | d_perturbed_nrm.z += sum(d_shading_nrm * smooth_nrm); 47 | } 48 | 49 | d_smooth_bitng += (opengl ? -1 : 1) * d_shading_nrm * perturbed_nrm.y; 50 | d_perturbed_nrm.y += (opengl ? -1 : 1) * sum(d_shading_nrm * smooth_bitng); 51 | 52 | d_smooth_tng += d_shading_nrm * perturbed_nrm.x; 53 | d_perturbed_nrm.x += sum(d_shading_nrm * smooth_tng); 54 | 55 | vec3f d__smooth_bitng(0); 56 | bwdSafeNormalize(_smooth_bitng, d__smooth_bitng, d_smooth_bitng); 57 | 58 | bwdCross(smooth_tng, smooth_nrm, d_smooth_tng, d_smooth_nrm, d__smooth_bitng); 59 | } 60 | 61 | //------------------------------------------------------------------------ 62 | #define bent_nrm_eps 0.001f 63 | 64 | __device__ vec3f fwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm) 65 | { 66 | float dp = dot(view_vec, smooth_nrm); 67 | float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f); 68 | return geom_nrm * (1.0f - t) + smooth_nrm * t; 69 | } 70 | 71 | __device__ void bwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm, vec3f& d_view_vec, vec3f& d_smooth_nrm, vec3f& d_geom_nrm, const vec3f d_out) 72 | { 73 | //////////////////////////////////////////////////////////////////////// 74 | // FWD 75 | float dp = dot(view_vec, smooth_nrm); 76 | float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f); 77 | 78 | //////////////////////////////////////////////////////////////////////// 79 | // BWD 80 | if (dp > NORMAL_THRESHOLD) 81 | d_smooth_nrm += d_out; 82 | else 83 | { 84 | // geom_nrm * (1.0f - t) + smooth_nrm * t; 85 | d_geom_nrm += d_out * (1.0f - t); 86 | d_smooth_nrm += d_out * t; 87 | float d_t = sum(d_out * (smooth_nrm - geom_nrm)); 88 | 89 | float d_dp = dp < 0.0f || dp > NORMAL_THRESHOLD ? 0.0f : d_t / NORMAL_THRESHOLD; 90 | 91 | bwdDot(view_vec, smooth_nrm, d_view_vec, d_smooth_nrm, d_dp); 92 | } 93 | } 94 | 95 | //------------------------------------------------------------------------ 96 | // Kernels 97 | 98 | __global__ void PrepareShadingNormalFwdKernel(PrepareShadingNormalKernelParams p) 99 | { 100 | // Calculate pixel position. 101 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 102 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y; 103 | unsigned int pz = blockIdx.z; 104 | if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z) 105 | return; 106 | 107 | vec3f pos = p.pos.fetch3(px, py, pz); 108 | vec3f view_pos = p.view_pos.fetch3(px, py, pz); 109 | vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz); 110 | vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz); 111 | vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz); 112 | vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz); 113 | 114 | vec3f smooth_nrm = safeNormalize(_smooth_nrm); 115 | vec3f smooth_tng = safeNormalize(_smooth_tng); 116 | vec3f view_vec = safeNormalize(view_pos - pos); 117 | vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl); 118 | 119 | vec3f res; 120 | if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f) 121 | res = fwdBendNormal(view_vec, -shading_nrm, -geom_nrm); 122 | else 123 | res = fwdBendNormal(view_vec, shading_nrm, geom_nrm); 124 | 125 | p.out.store(px, py, pz, res); 126 | } 127 | 128 | __global__ void PrepareShadingNormalBwdKernel(PrepareShadingNormalKernelParams p) 129 | { 130 | // Calculate pixel position. 131 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 132 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y; 133 | unsigned int pz = blockIdx.z; 134 | if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z) 135 | return; 136 | 137 | vec3f pos = p.pos.fetch3(px, py, pz); 138 | vec3f view_pos = p.view_pos.fetch3(px, py, pz); 139 | vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz); 140 | vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz); 141 | vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz); 142 | vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz); 143 | vec3f d_out = p.out.fetch3(px, py, pz); 144 | 145 | /////////////////////////////////////////////////////////////////////////////////////////////////// 146 | // FWD 147 | 148 | vec3f smooth_nrm = safeNormalize(_smooth_nrm); 149 | vec3f smooth_tng = safeNormalize(_smooth_tng); 150 | vec3f _view_vec = view_pos - pos; 151 | vec3f view_vec = safeNormalize(view_pos - pos); 152 | 153 | vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl); 154 | 155 | /////////////////////////////////////////////////////////////////////////////////////////////////// 156 | // BWD 157 | 158 | vec3f d_view_vec(0), d_shading_nrm(0), d_geom_nrm(0); 159 | if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f) 160 | { 161 | bwdBendNormal(view_vec, -shading_nrm, -geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out); 162 | d_shading_nrm = -d_shading_nrm; 163 | d_geom_nrm = -d_geom_nrm; 164 | } 165 | else 166 | bwdBendNormal(view_vec, shading_nrm, geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out); 167 | 168 | vec3f d_perturbed_nrm(0), d_smooth_nrm(0), d_smooth_tng(0); 169 | bwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, d_perturbed_nrm, d_smooth_nrm, d_smooth_tng, d_shading_nrm, p.opengl); 170 | 171 | vec3f d__view_vec(0), d__smooth_nrm(0), d__smooth_tng(0); 172 | bwdSafeNormalize(_view_vec, d__view_vec, d_view_vec); 173 | bwdSafeNormalize(_smooth_nrm, d__smooth_nrm, d_smooth_nrm); 174 | bwdSafeNormalize(_smooth_tng, d__smooth_tng, d_smooth_tng); 175 | 176 | p.pos.store_grad(px, py, pz, -d__view_vec); 177 | p.view_pos.store_grad(px, py, pz, d__view_vec); 178 | p.perturbed_nrm.store_grad(px, py, pz, d_perturbed_nrm); 179 | p.smooth_nrm.store_grad(px, py, pz, d__smooth_nrm); 180 | p.smooth_tng.store_grad(px, py, pz, d__smooth_tng); 181 | p.geom_nrm.store_grad(px, py, pz, d_geom_nrm); 182 | } -------------------------------------------------------------------------------- /render/renderutils/c_src/normal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "common.h" 15 | 16 | struct PrepareShadingNormalKernelParams 17 | { 18 | Tensor pos; 19 | Tensor view_pos; 20 | Tensor perturbed_nrm; 21 | Tensor smooth_nrm; 22 | Tensor smooth_tng; 23 | Tensor geom_nrm; 24 | Tensor out; 25 | dim3 gridSize; 26 | bool two_sided_shading, opengl; 27 | }; 28 | -------------------------------------------------------------------------------- /render/renderutils/c_src/tensor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | #if defined(__CUDACC__) && defined(BFLOAT16) 14 | #include // bfloat16 is float32 compatible with less mantissa bits 15 | #endif 16 | 17 | //--------------------------------------------------------------------------------- 18 | // CUDA-side Tensor class for in/out parameter parsing. Can be float32 or bfloat16 19 | 20 | struct Tensor 21 | { 22 | void* val; 23 | void* d_val; 24 | int dims[4], _dims[4]; 25 | int strides[4]; 26 | bool fp16; 27 | 28 | #if defined(__CUDA__) && !defined(__CUDA_ARCH__) 29 | Tensor() : val(nullptr), d_val(nullptr), fp16(true), dims{ 0, 0, 0, 0 }, _dims{ 0, 0, 0, 0 }, strides{ 0, 0, 0, 0 } {} 30 | #endif 31 | 32 | #ifdef __CUDACC__ 33 | // Helpers to index and read/write a single element 34 | __device__ inline int _nhwcIndex(int n, int h, int w, int c) const { return n * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]; } 35 | __device__ inline int nhwcIndex(int n, int h, int w, int c) const { return (dims[0] == 1 ? 0 : n * strides[0]) + (dims[1] == 1 ? 0 : h * strides[1]) + (dims[2] == 1 ? 0 : w * strides[2]) + (dims[3] == 1 ? 0 : c * strides[3]); } 36 | __device__ inline int nhwcIndexContinuous(int n, int h, int w, int c) const { return ((n * _dims[1] + h) * _dims[2] + w) * _dims[3] + c; } 37 | #ifdef BFLOAT16 38 | __device__ inline float fetch(unsigned int idx) const { return fp16 ? __bfloat162float(((__nv_bfloat16*)val)[idx]) : ((float*)val)[idx]; } 39 | __device__ inline void store(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)val)[idx] = __float2bfloat16(_val); else ((float*)val)[idx] = _val; } 40 | __device__ inline void store_grad(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)d_val)[idx] = __float2bfloat16(_val); else ((float*)d_val)[idx] = _val; } 41 | #else 42 | __device__ inline float fetch(unsigned int idx) const { return ((float*)val)[idx]; } 43 | __device__ inline void store(unsigned int idx, float _val) { ((float*)val)[idx] = _val; } 44 | __device__ inline void store_grad(unsigned int idx, float _val) { ((float*)d_val)[idx] = _val; } 45 | #endif 46 | 47 | ////////////////////////////////////////////////////////////////////////////////////////// 48 | // Fetch, use broadcasting for tensor dimensions of size 1 49 | __device__ inline float fetch1(unsigned int x, unsigned int y, unsigned int z) const 50 | { 51 | return fetch(nhwcIndex(z, y, x, 0)); 52 | } 53 | 54 | __device__ inline vec3f fetch3(unsigned int x, unsigned int y, unsigned int z) const 55 | { 56 | return vec3f( 57 | fetch(nhwcIndex(z, y, x, 0)), 58 | fetch(nhwcIndex(z, y, x, 1)), 59 | fetch(nhwcIndex(z, y, x, 2)) 60 | ); 61 | } 62 | 63 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////// 64 | // Store, no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside 65 | __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, float _val) 66 | { 67 | store(_nhwcIndex(z, y, x, 0), _val); 68 | } 69 | 70 | __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, vec3f _val) 71 | { 72 | store(_nhwcIndex(z, y, x, 0), _val.x); 73 | store(_nhwcIndex(z, y, x, 1), _val.y); 74 | store(_nhwcIndex(z, y, x, 2), _val.z); 75 | } 76 | 77 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////// 78 | // Store gradient , no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside 79 | __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, float _val) 80 | { 81 | store_grad(nhwcIndexContinuous(z, y, x, 0), _val); 82 | } 83 | 84 | __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, vec3f _val) 85 | { 86 | store_grad(nhwcIndexContinuous(z, y, x, 0), _val.x); 87 | store_grad(nhwcIndexContinuous(z, y, x, 1), _val.y); 88 | store_grad(nhwcIndexContinuous(z, y, x, 2), _val.z); 89 | } 90 | #endif 91 | 92 | }; 93 | -------------------------------------------------------------------------------- /render/renderutils/c_src/vec3f.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | struct vec3f 15 | { 16 | float x, y, z; 17 | 18 | #ifdef __CUDACC__ 19 | __device__ vec3f() { } 20 | __device__ vec3f(float v) { x = v; y = v; z = v; } 21 | __device__ vec3f(float _x, float _y, float _z) { x = _x; y = _y; z = _z; } 22 | __device__ vec3f(float3 v) { x = v.x; y = v.y; z = v.z; } 23 | 24 | __device__ inline vec3f& operator+=(const vec3f& b) { x += b.x; y += b.y; z += b.z; return *this; } 25 | __device__ inline vec3f& operator-=(const vec3f& b) { x -= b.x; y -= b.y; z -= b.z; return *this; } 26 | __device__ inline vec3f& operator*=(const vec3f& b) { x *= b.x; y *= b.y; z *= b.z; return *this; } 27 | __device__ inline vec3f& operator/=(const vec3f& b) { x /= b.x; y /= b.y; z /= b.z; return *this; } 28 | #endif 29 | }; 30 | 31 | #ifdef __CUDACC__ 32 | __device__ static inline vec3f operator+(const vec3f& a, const vec3f& b) { return vec3f(a.x + b.x, a.y + b.y, a.z + b.z); } 33 | __device__ static inline vec3f operator-(const vec3f& a, const vec3f& b) { return vec3f(a.x - b.x, a.y - b.y, a.z - b.z); } 34 | __device__ static inline vec3f operator*(const vec3f& a, const vec3f& b) { return vec3f(a.x * b.x, a.y * b.y, a.z * b.z); } 35 | __device__ static inline vec3f operator/(const vec3f& a, const vec3f& b) { return vec3f(a.x / b.x, a.y / b.y, a.z / b.z); } 36 | __device__ static inline vec3f operator-(const vec3f& a) { return vec3f(-a.x, -a.y, -a.z); } 37 | 38 | __device__ static inline float sum(vec3f a) 39 | { 40 | return a.x + a.y + a.z; 41 | } 42 | 43 | __device__ static inline vec3f cross(vec3f a, vec3f b) 44 | { 45 | vec3f out; 46 | out.x = a.y * b.z - a.z * b.y; 47 | out.y = a.z * b.x - a.x * b.z; 48 | out.z = a.x * b.y - a.y * b.x; 49 | return out; 50 | } 51 | 52 | __device__ static inline void bwdCross(vec3f a, vec3f b, vec3f &d_a, vec3f &d_b, vec3f d_out) 53 | { 54 | d_a.x += d_out.z * b.y - d_out.y * b.z; 55 | d_a.y += d_out.x * b.z - d_out.z * b.x; 56 | d_a.z += d_out.y * b.x - d_out.x * b.y; 57 | 58 | d_b.x += d_out.y * a.z - d_out.z * a.y; 59 | d_b.y += d_out.z * a.x - d_out.x * a.z; 60 | d_b.z += d_out.x * a.y - d_out.y * a.x; 61 | } 62 | 63 | __device__ static inline float dot(vec3f a, vec3f b) 64 | { 65 | return a.x * b.x + a.y * b.y + a.z * b.z; 66 | } 67 | 68 | __device__ static inline void bwdDot(vec3f a, vec3f b, vec3f& d_a, vec3f& d_b, float d_out) 69 | { 70 | d_a.x += d_out * b.x; d_a.y += d_out * b.y; d_a.z += d_out * b.z; 71 | d_b.x += d_out * a.x; d_b.y += d_out * a.y; d_b.z += d_out * a.z; 72 | } 73 | 74 | __device__ static inline vec3f reflect(vec3f x, vec3f n) 75 | { 76 | return n * 2.0f * dot(n, x) - x; 77 | } 78 | 79 | __device__ static inline void bwdReflect(vec3f x, vec3f n, vec3f& d_x, vec3f& d_n, const vec3f d_out) 80 | { 81 | d_x.x += d_out.x * (2 * n.x * n.x - 1) + d_out.y * (2 * n.x * n.y) + d_out.z * (2 * n.x * n.z); 82 | d_x.y += d_out.x * (2 * n.x * n.y) + d_out.y * (2 * n.y * n.y - 1) + d_out.z * (2 * n.y * n.z); 83 | d_x.z += d_out.x * (2 * n.x * n.z) + d_out.y * (2 * n.y * n.z) + d_out.z * (2 * n.z * n.z - 1); 84 | 85 | d_n.x += d_out.x * (2 * (2 * n.x * x.x + n.y * x.y + n.z * x.z)) + d_out.y * (2 * n.y * x.x) + d_out.z * (2 * n.z * x.x); 86 | d_n.y += d_out.x * (2 * n.x * x.y) + d_out.y * (2 * (n.x * x.x + 2 * n.y * x.y + n.z * x.z)) + d_out.z * (2 * n.z * x.y); 87 | d_n.z += d_out.x * (2 * n.x * x.z) + d_out.y * (2 * n.y * x.z) + d_out.z * (2 * (n.x * x.x + n.y * x.y + 2 * n.z * x.z)); 88 | } 89 | 90 | __device__ static inline vec3f safeNormalize(vec3f v) 91 | { 92 | float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); 93 | return l > 0.0f ? (v / l) : vec3f(0.0f); 94 | } 95 | 96 | __device__ static inline void bwdSafeNormalize(const vec3f v, vec3f& d_v, const vec3f d_out) 97 | { 98 | 99 | float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); 100 | if (l > 0.0f) 101 | { 102 | float fac = 1.0 / powf(v.x * v.x + v.y * v.y + v.z * v.z, 1.5f); 103 | d_v.x += (d_out.x * (v.y * v.y + v.z * v.z) - d_out.y * (v.x * v.y) - d_out.z * (v.x * v.z)) * fac; 104 | d_v.y += (d_out.y * (v.x * v.x + v.z * v.z) - d_out.x * (v.y * v.x) - d_out.z * (v.y * v.z)) * fac; 105 | d_v.z += (d_out.z * (v.x * v.x + v.y * v.y) - d_out.x * (v.z * v.x) - d_out.y * (v.z * v.y)) * fac; 106 | } 107 | } 108 | 109 | #endif -------------------------------------------------------------------------------- /render/renderutils/c_src/vec4f.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | struct vec4f 15 | { 16 | float x, y, z, w; 17 | 18 | #ifdef __CUDACC__ 19 | __device__ vec4f() { } 20 | __device__ vec4f(float v) { x = v; y = v; z = v; w = v; } 21 | __device__ vec4f(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; } 22 | __device__ vec4f(float4 v) { x = v.x; y = v.y; z = v.z; w = v.w; } 23 | #endif 24 | }; 25 | 26 | -------------------------------------------------------------------------------- /render/renderutils/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | 12 | #---------------------------------------------------------------------------- 13 | # HDR image losses 14 | #---------------------------------------------------------------------------- 15 | 16 | def _tonemap_srgb(f, exposure=5): 17 | f = f * exposure 18 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f) 19 | 20 | def _SMAPE(img, target, eps=0.01): 21 | nom = torch.abs(img - target) 22 | denom = torch.abs(img) + torch.abs(target) + 0.01 23 | return torch.mean(nom / denom) 24 | 25 | def _RELMSE(img, target, eps=0.1): 26 | nom = (img - target) * (img - target) 27 | denom = img * img + target * target + 0.1 28 | return torch.mean(nom / denom) 29 | 30 | def image_loss_fn(img, target, loss, tonemapper): 31 | if tonemapper == 'log_srgb': 32 | img = _tonemap_srgb(torch.log(torch.clamp(img, min=0, max=65535) + 1)) 33 | target = _tonemap_srgb(torch.log(torch.clamp(target, min=0, max=65535) + 1)) 34 | 35 | if loss == 'mse': 36 | return torch.nn.functional.mse_loss(img, target) 37 | elif loss == 'smape': 38 | return _SMAPE(img, target) 39 | elif loss == 'relmse': 40 | return _RELMSE(img, target) 41 | else: 42 | return torch.nn.functional.l1_loss(img, target) 43 | -------------------------------------------------------------------------------- /render/renderutils/tests/test_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | 12 | import os 13 | import sys 14 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 15 | import renderutils as ru 16 | 17 | RES = 8 18 | DTYPE = torch.float32 19 | 20 | def tonemap_srgb(f): 21 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f) 22 | 23 | def l1(output, target): 24 | x = torch.clamp(output, min=0, max=65535) 25 | r = torch.clamp(target, min=0, max=65535) 26 | x = tonemap_srgb(torch.log(x + 1)) 27 | r = tonemap_srgb(torch.log(r + 1)) 28 | return torch.nn.functional.l1_loss(x,r) 29 | 30 | def relative_loss(name, ref, cuda): 31 | ref = ref.float() 32 | cuda = cuda.float() 33 | print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item()) 34 | 35 | def test_loss(loss, tonemapper): 36 | img_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 37 | img_ref = img_cuda.clone().detach().requires_grad_(True) 38 | target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 39 | target_ref = target_cuda.clone().detach().requires_grad_(True) 40 | 41 | ref_loss = ru.image_loss(img_ref, target_ref, loss=loss, tonemapper=tonemapper, use_python=True) 42 | ref_loss.backward() 43 | 44 | cuda_loss = ru.image_loss(img_cuda, target_cuda, loss=loss, tonemapper=tonemapper) 45 | cuda_loss.backward() 46 | 47 | print("-------------------------------------------------------------") 48 | print(" Loss: %s, %s" % (loss, tonemapper)) 49 | print("-------------------------------------------------------------") 50 | 51 | relative_loss("res:", ref_loss, cuda_loss) 52 | relative_loss("img:", img_ref.grad, img_cuda.grad) 53 | relative_loss("target:", target_ref.grad, target_cuda.grad) 54 | 55 | 56 | test_loss('l1', 'none') 57 | test_loss('l1', 'log_srgb') 58 | test_loss('mse', 'log_srgb') 59 | test_loss('smape', 'none') 60 | test_loss('relmse', 'none') 61 | test_loss('mse', 'none') -------------------------------------------------------------------------------- /render/renderutils/tests/test_mesh.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | 12 | import os 13 | import sys 14 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 15 | import renderutils as ru 16 | 17 | BATCH = 8 18 | RES = 1024 19 | DTYPE = torch.float32 20 | 21 | torch.manual_seed(0) 22 | 23 | def tonemap_srgb(f): 24 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f) 25 | 26 | def l1(output, target): 27 | x = torch.clamp(output, min=0, max=65535) 28 | r = torch.clamp(target, min=0, max=65535) 29 | x = tonemap_srgb(torch.log(x + 1)) 30 | r = tonemap_srgb(torch.log(r + 1)) 31 | return torch.nn.functional.l1_loss(x,r) 32 | 33 | def relative_loss(name, ref, cuda): 34 | ref = ref.float() 35 | cuda = cuda.float() 36 | print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref)).item()) 37 | 38 | def test_xfm_points(): 39 | points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 40 | points_ref = points_cuda.clone().detach().requires_grad_(True) 41 | mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False) 42 | mtx_ref = mtx_cuda.clone().detach().requires_grad_(True) 43 | target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True) 44 | 45 | ref_out = ru.xfm_points(points_ref, mtx_ref, use_python=True) 46 | ref_loss = torch.nn.MSELoss()(ref_out, target) 47 | ref_loss.backward() 48 | 49 | cuda_out = ru.xfm_points(points_cuda, mtx_cuda) 50 | cuda_loss = torch.nn.MSELoss()(cuda_out, target) 51 | cuda_loss.backward() 52 | 53 | print("-------------------------------------------------------------") 54 | 55 | relative_loss("res:", ref_out, cuda_out) 56 | relative_loss("points:", points_ref.grad, points_cuda.grad) 57 | 58 | def test_xfm_vectors(): 59 | points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 60 | points_ref = points_cuda.clone().detach().requires_grad_(True) 61 | points_cuda_p = points_cuda.clone().detach().requires_grad_(True) 62 | points_ref_p = points_cuda.clone().detach().requires_grad_(True) 63 | mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False) 64 | mtx_ref = mtx_cuda.clone().detach().requires_grad_(True) 65 | target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True) 66 | 67 | ref_out = ru.xfm_vectors(points_ref.contiguous(), mtx_ref, use_python=True) 68 | ref_loss = torch.nn.MSELoss()(ref_out, target[..., 0:3]) 69 | ref_loss.backward() 70 | 71 | cuda_out = ru.xfm_vectors(points_cuda.contiguous(), mtx_cuda) 72 | cuda_loss = torch.nn.MSELoss()(cuda_out, target[..., 0:3]) 73 | cuda_loss.backward() 74 | 75 | ref_out_p = ru.xfm_points(points_ref_p.contiguous(), mtx_ref, use_python=True) 76 | ref_loss_p = torch.nn.MSELoss()(ref_out_p, target) 77 | ref_loss_p.backward() 78 | 79 | cuda_out_p = ru.xfm_points(points_cuda_p.contiguous(), mtx_cuda) 80 | cuda_loss_p = torch.nn.MSELoss()(cuda_out_p, target) 81 | cuda_loss_p.backward() 82 | 83 | print("-------------------------------------------------------------") 84 | 85 | relative_loss("res:", ref_out, cuda_out) 86 | relative_loss("points:", points_ref.grad, points_cuda.grad) 87 | relative_loss("points_p:", points_ref_p.grad, points_cuda_p.grad) 88 | 89 | test_xfm_points() 90 | test_xfm_vectors() 91 | -------------------------------------------------------------------------------- /render/renderutils/tests/test_perf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | 12 | import os 13 | import sys 14 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 15 | import renderutils as ru 16 | 17 | DTYPE=torch.float32 18 | 19 | def test_bsdf(BATCH, RES, ITR): 20 | kd_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 21 | kd_ref = kd_cuda.clone().detach().requires_grad_(True) 22 | arm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 23 | arm_ref = arm_cuda.clone().detach().requires_grad_(True) 24 | pos_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 25 | pos_ref = pos_cuda.clone().detach().requires_grad_(True) 26 | nrm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 27 | nrm_ref = nrm_cuda.clone().detach().requires_grad_(True) 28 | view_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 29 | view_ref = view_cuda.clone().detach().requires_grad_(True) 30 | light_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 31 | light_ref = light_cuda.clone().detach().requires_grad_(True) 32 | target = torch.rand(BATCH, RES, RES, 3, device='cuda') 33 | 34 | start = torch.cuda.Event(enable_timing=True) 35 | end = torch.cuda.Event(enable_timing=True) 36 | 37 | ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda) 38 | 39 | print("--- Testing: [%d, %d, %d] ---" % (BATCH, RES, RES)) 40 | 41 | start.record() 42 | for i in range(ITR): 43 | ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True) 44 | end.record() 45 | torch.cuda.synchronize() 46 | print("Pbr BSDF python:", start.elapsed_time(end)) 47 | 48 | start.record() 49 | for i in range(ITR): 50 | cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda) 51 | end.record() 52 | torch.cuda.synchronize() 53 | print("Pbr BSDF cuda:", start.elapsed_time(end)) 54 | 55 | test_bsdf(1, 512, 1000) 56 | test_bsdf(16, 512, 1000) 57 | test_bsdf(1, 2048, 1000) 58 | -------------------------------------------------------------------------------- /script/__pycache__/connet_face_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/script/__pycache__/connet_face_head.cpython-38.pyc -------------------------------------------------------------------------------- /script/__pycache__/get_tet_smpl.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/script/__pycache__/get_tet_smpl.cpython-38.pyc -------------------------------------------------------------------------------- /script/__pycache__/process_body_cloth_head_msdfcut.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/script/__pycache__/process_body_cloth_head_msdfcut.cpython-38.pyc -------------------------------------------------------------------------------- /script/get_tet_smpl.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import pyvista as pv 4 | import pytetwild 5 | 6 | import numpy as np 7 | import tetgen 8 | 9 | def get_tet_mesh(mesh_path, save_npz_path): 10 | 11 | surface_mesh = pv.read(mesh_path) 12 | tet = tetgen.TetGen(surface_mesh) 13 | tet.make_manifold(verbose=True) 14 | tet_grid_volume=6e-3 15 | vertices, indices = tet.tetrahedralize( fixedvolume=1, 16 | maxvolume=tet_grid_volume, 17 | regionattrib=1, 18 | nobisect=False, steinerleft=-1, order=1, metric=1, meditview=1, nonodewritten=0, verbose=2) 19 | # shell = tet.grid.extract_surface() 20 | # tet_path = "tet_smpl2.obj" 21 | # vertices = vertices.to(np.float64) 22 | vertices = vertices.astype(np.float32) 23 | tet_path = save_npz_path.replace("npz", "obj") 24 | save_tet_mesh_as_obj(vertices, indices, tet_path) 25 | np.savez(save_npz_path, v=vertices, f=indices) 26 | 27 | return vertices, indices 28 | 29 | 30 | def get_tet_mesh_test(mesh_path, save_npz_path): 31 | 32 | surface_mesh = pv.read(mesh_path) 33 | tetrahedral_mesh = pytetwild.tetrahedralize_pv(surface_mesh, edge_length_fac=0.1) 34 | tetrahedral_mesh.explode(1).plot(show_edges=True) 35 | 36 | v = tetrahedral_mesh.points 37 | f = tetrahedral_mesh.cells.reshape(-1, 5)[:, 1:] 38 | 39 | np.savez(save_npz_path, v=v, f=f) 40 | 41 | return v, f 42 | 43 | 44 | def save_tet_mesh_as_obj(vertices, tetrahedra, filename): 45 | with open(filename, 'w') as f: 46 | for vertex in vertices: 47 | f.write(f"v {vertex[0]} {vertex[1]} {vertex[2]}\n") 48 | 49 | if tetrahedra is not None: 50 | for tet in tetrahedra: 51 | f.write(f"f {tet[0]+1} {tet[1]+1} {tet[2]+1} {tet[3]+1}\n") 52 | 53 | -------------------------------------------------------------------------------- /ssim_loss.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import torch.nn.functional as F 14 | from torch.autograd import Variable 15 | from math import exp 16 | 17 | def l1_loss(network_output, gt): 18 | return torch.abs((network_output - gt)).mean() 19 | 20 | def l2_loss(network_output, gt): 21 | return ((network_output - gt) ** 2).mean() 22 | 23 | def gaussian(window_size, sigma): 24 | gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)]) 25 | return gauss / gauss.sum() 26 | 27 | def create_window(window_size, channel): 28 | _1D_window = gaussian(window_size, 1.5).unsqueeze(1) 29 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) 30 | window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) 31 | return window 32 | 33 | def ssim(img1, img2, window_size=11, size_average=True): 34 | channel = img1.size(-3) 35 | window = create_window(window_size, channel) 36 | 37 | if img1.is_cuda: 38 | window = window.cuda(img1.get_device()) 39 | window = window.type_as(img1) 40 | 41 | return _ssim(img1, img2, window, window_size, channel, size_average) 42 | 43 | def _ssim(img1, img2, window, window_size, channel, size_average=True): 44 | mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel) 45 | mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel) 46 | 47 | mu1_sq = mu1.pow(2) 48 | mu2_sq = mu2.pow(2) 49 | mu1_mu2 = mu1 * mu2 50 | 51 | sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq 52 | sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq 53 | sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2 54 | 55 | C1 = 0.01 ** 2 56 | C2 = 0.03 ** 2 57 | 58 | ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)) 59 | 60 | if size_average: 61 | return ssim_map.mean() 62 | else: 63 | return ssim_map.mean(1).mean(1).mean(1) 64 | 65 | -------------------------------------------------------------------------------- /third_parties/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/__init__.py -------------------------------------------------------------------------------- /third_parties/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /third_parties/lpips/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | import numpy as np 7 | import torch 8 | # from torch.autograd import Variable 9 | 10 | from lpips.trainer import * 11 | from lpips.lpips import * 12 | 13 | def normalize_tensor(in_feat,eps=1e-10): 14 | norm_factor = torch.sqrt(torch.sum(in_feat**2,dim=1,keepdim=True)) 15 | return in_feat/(norm_factor+eps) 16 | 17 | def l2(p0, p1, range=255.): 18 | return .5*np.mean((p0 / range - p1 / range)**2) 19 | 20 | def psnr(p0, p1, peak=255.): 21 | return 10*np.log10(peak**2/np.mean((1.*p0-1.*p1)**2)) 22 | 23 | def dssim(p0, p1, range=255.): 24 | from skimage.measure import compare_ssim 25 | return (1 - compare_ssim(p0, p1, data_range=range, multichannel=True)) / 2. 26 | 27 | def rgb2lab(in_img,mean_cent=False): 28 | from skimage import color 29 | img_lab = color.rgb2lab(in_img) 30 | if(mean_cent): 31 | img_lab[:,:,0] = img_lab[:,:,0]-50 32 | return img_lab 33 | 34 | def tensor2np(tensor_obj): 35 | # change dimension of a tensor object into a numpy array 36 | return tensor_obj[0].cpu().float().numpy().transpose((1,2,0)) 37 | 38 | def np2tensor(np_obj): 39 | # change dimenion of np array into tensor array 40 | return torch.Tensor(np_obj[:, :, :, np.newaxis].transpose((3, 2, 0, 1))) 41 | 42 | def tensor2tensorlab(image_tensor,to_norm=True,mc_only=False): 43 | # image tensor to lab tensor 44 | from skimage import color 45 | 46 | img = tensor2im(image_tensor) 47 | img_lab = color.rgb2lab(img) 48 | if(mc_only): 49 | img_lab[:,:,0] = img_lab[:,:,0]-50 50 | if(to_norm and not mc_only): 51 | img_lab[:,:,0] = img_lab[:,:,0]-50 52 | img_lab = img_lab/100. 53 | 54 | return np2tensor(img_lab) 55 | 56 | def tensorlab2tensor(lab_tensor,return_inbnd=False): 57 | from skimage import color 58 | import warnings 59 | warnings.filterwarnings("ignore") 60 | 61 | lab = tensor2np(lab_tensor)*100. 62 | lab[:,:,0] = lab[:,:,0]+50 63 | 64 | rgb_back = 255.*np.clip(color.lab2rgb(lab.astype('float')),0,1) 65 | if(return_inbnd): 66 | # convert back to lab, see if we match 67 | lab_back = color.rgb2lab(rgb_back.astype('uint8')) 68 | mask = 1.*np.isclose(lab_back,lab,atol=2.) 69 | mask = np2tensor(np.prod(mask,axis=2)[:,:,np.newaxis]) 70 | return (im2tensor(rgb_back),mask) 71 | else: 72 | return im2tensor(rgb_back) 73 | 74 | def load_image(path): 75 | if(path[-3:] == 'dng'): 76 | import rawpy 77 | with rawpy.imread(path) as raw: 78 | img = raw.postprocess() 79 | elif(path[-3:]=='bmp' or path[-3:]=='jpg' or path[-3:]=='png' or path[-4:]=='jpeg'): 80 | import cv2 81 | return cv2.imread(path)[:,:,::-1] 82 | else: 83 | img = (255*plt.imread(path)[:,:,:3]).astype('uint8') 84 | 85 | return img 86 | 87 | def rgb2lab(input): 88 | from skimage import color 89 | return color.rgb2lab(input / 255.) 90 | 91 | def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.): 92 | image_numpy = image_tensor[0].cpu().float().numpy() 93 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor 94 | return image_numpy.astype(imtype) 95 | 96 | def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.): 97 | return torch.Tensor((image / factor - cent) 98 | [:, :, :, np.newaxis].transpose((3, 2, 0, 1))) 99 | 100 | def tensor2vec(vector_tensor): 101 | return vector_tensor.data.cpu().numpy()[:, :, 0, 0] 102 | 103 | 104 | def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.): 105 | # def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=1.): 106 | image_numpy = image_tensor[0].cpu().float().numpy() 107 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor 108 | return image_numpy.astype(imtype) 109 | 110 | def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.): 111 | # def im2tensor(image, imtype=np.uint8, cent=1., factor=1.): 112 | return torch.Tensor((image / factor - cent) 113 | [:, :, :, np.newaxis].transpose((3, 2, 0, 1))) 114 | 115 | 116 | 117 | def voc_ap(rec, prec, use_07_metric=False): 118 | """ ap = voc_ap(rec, prec, [use_07_metric]) 119 | Compute VOC AP given precision and recall. 120 | If use_07_metric is true, uses the 121 | VOC 07 11 point method (default:False). 122 | """ 123 | if use_07_metric: 124 | # 11 point metric 125 | ap = 0. 126 | for t in np.arange(0., 1.1, 0.1): 127 | if np.sum(rec >= t) == 0: 128 | p = 0 129 | else: 130 | p = np.max(prec[rec >= t]) 131 | ap = ap + p / 11. 132 | else: 133 | # correct AP calculation 134 | # first append sentinel values at the end 135 | mrec = np.concatenate(([0.], rec, [1.])) 136 | mpre = np.concatenate(([0.], prec, [0.])) 137 | 138 | # compute the precision envelope 139 | for i in range(mpre.size - 1, 0, -1): 140 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 141 | 142 | # to calculate area under PR curve, look for points 143 | # where X axis (recall) changes value 144 | i = np.where(mrec[1:] != mrec[:-1])[0] 145 | 146 | # and sum (\Delta recall) * prec 147 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 148 | return ap 149 | 150 | -------------------------------------------------------------------------------- /third_parties/lpips/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /third_parties/lpips/pretrained_networks.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import torch 3 | from torchvision import models as tv 4 | 5 | class squeezenet(torch.nn.Module): 6 | def __init__(self, requires_grad=False, pretrained=True): 7 | super(squeezenet, self).__init__() 8 | pretrained_features = tv.squeezenet1_1(pretrained=pretrained).features 9 | self.slice1 = torch.nn.Sequential() 10 | self.slice2 = torch.nn.Sequential() 11 | self.slice3 = torch.nn.Sequential() 12 | self.slice4 = torch.nn.Sequential() 13 | self.slice5 = torch.nn.Sequential() 14 | self.slice6 = torch.nn.Sequential() 15 | self.slice7 = torch.nn.Sequential() 16 | self.N_slices = 7 17 | for x in range(2): 18 | self.slice1.add_module(str(x), pretrained_features[x]) 19 | for x in range(2,5): 20 | self.slice2.add_module(str(x), pretrained_features[x]) 21 | for x in range(5, 8): 22 | self.slice3.add_module(str(x), pretrained_features[x]) 23 | for x in range(8, 10): 24 | self.slice4.add_module(str(x), pretrained_features[x]) 25 | for x in range(10, 11): 26 | self.slice5.add_module(str(x), pretrained_features[x]) 27 | for x in range(11, 12): 28 | self.slice6.add_module(str(x), pretrained_features[x]) 29 | for x in range(12, 13): 30 | self.slice7.add_module(str(x), pretrained_features[x]) 31 | if not requires_grad: 32 | for param in self.parameters(): 33 | param.requires_grad = False 34 | 35 | def forward(self, X): 36 | h = self.slice1(X) 37 | h_relu1 = h 38 | h = self.slice2(h) 39 | h_relu2 = h 40 | h = self.slice3(h) 41 | h_relu3 = h 42 | h = self.slice4(h) 43 | h_relu4 = h 44 | h = self.slice5(h) 45 | h_relu5 = h 46 | h = self.slice6(h) 47 | h_relu6 = h 48 | h = self.slice7(h) 49 | h_relu7 = h 50 | vgg_outputs = namedtuple("SqueezeOutputs", ['relu1','relu2','relu3','relu4','relu5','relu6','relu7']) 51 | out = vgg_outputs(h_relu1,h_relu2,h_relu3,h_relu4,h_relu5,h_relu6,h_relu7) 52 | 53 | return out 54 | 55 | 56 | class alexnet(torch.nn.Module): 57 | def __init__(self, requires_grad=False, pretrained=True): 58 | super(alexnet, self).__init__() 59 | weights = tv.AlexNet_Weights.DEFAULT if pretrained else None 60 | alexnet_pretrained_features = tv.alexnet(weights=weights).features 61 | self.slice1 = torch.nn.Sequential() 62 | self.slice2 = torch.nn.Sequential() 63 | self.slice3 = torch.nn.Sequential() 64 | self.slice4 = torch.nn.Sequential() 65 | self.slice5 = torch.nn.Sequential() 66 | self.N_slices = 5 67 | for x in range(2): 68 | self.slice1.add_module(str(x), alexnet_pretrained_features[x]) 69 | for x in range(2, 5): 70 | self.slice2.add_module(str(x), alexnet_pretrained_features[x]) 71 | for x in range(5, 8): 72 | self.slice3.add_module(str(x), alexnet_pretrained_features[x]) 73 | for x in range(8, 10): 74 | self.slice4.add_module(str(x), alexnet_pretrained_features[x]) 75 | for x in range(10, 12): 76 | self.slice5.add_module(str(x), alexnet_pretrained_features[x]) 77 | if not requires_grad: 78 | for param in self.parameters(): 79 | param.requires_grad = False 80 | 81 | def forward(self, X): 82 | h = self.slice1(X) 83 | h_relu1 = h 84 | h = self.slice2(h) 85 | h_relu2 = h 86 | h = self.slice3(h) 87 | h_relu3 = h 88 | h = self.slice4(h) 89 | h_relu4 = h 90 | h = self.slice5(h) 91 | h_relu5 = h 92 | alexnet_outputs = namedtuple("AlexnetOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5']) 93 | out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5) 94 | 95 | return out 96 | 97 | class vgg16(torch.nn.Module): 98 | def __init__(self, requires_grad=False, pretrained=True): 99 | super(vgg16, self).__init__() 100 | weights = tv.VGG16_Weights.DEFAULT if pretrained else None 101 | vgg_pretrained_features = tv.vgg16(weights=weights).features 102 | self.slice1 = torch.nn.Sequential() 103 | self.slice2 = torch.nn.Sequential() 104 | self.slice3 = torch.nn.Sequential() 105 | self.slice4 = torch.nn.Sequential() 106 | self.slice5 = torch.nn.Sequential() 107 | self.N_slices = 5 108 | for x in range(4): 109 | self.slice1.add_module(str(x), vgg_pretrained_features[x]) 110 | for x in range(4, 9): 111 | self.slice2.add_module(str(x), vgg_pretrained_features[x]) 112 | for x in range(9, 16): 113 | self.slice3.add_module(str(x), vgg_pretrained_features[x]) 114 | for x in range(16, 23): 115 | self.slice4.add_module(str(x), vgg_pretrained_features[x]) 116 | for x in range(23, 30): 117 | self.slice5.add_module(str(x), vgg_pretrained_features[x]) 118 | if not requires_grad: 119 | for param in self.parameters(): 120 | param.requires_grad = False 121 | 122 | def forward(self, X): 123 | h = self.slice1(X) 124 | h_relu1_2 = h 125 | h = self.slice2(h) 126 | h_relu2_2 = h 127 | h = self.slice3(h) 128 | h_relu3_3 = h 129 | h = self.slice4(h) 130 | h_relu4_3 = h 131 | h = self.slice5(h) 132 | h_relu5_3 = h 133 | vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3']) 134 | out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3) 135 | 136 | return out 137 | 138 | 139 | 140 | class resnet(torch.nn.Module): 141 | def __init__(self, requires_grad=False, pretrained=True, num=18): 142 | super(resnet, self).__init__() 143 | if(num==18): 144 | self.net = tv.resnet18(pretrained=pretrained) 145 | elif(num==34): 146 | self.net = tv.resnet34(pretrained=pretrained) 147 | elif(num==50): 148 | self.net = tv.resnet50(pretrained=pretrained) 149 | elif(num==101): 150 | self.net = tv.resnet101(pretrained=pretrained) 151 | elif(num==152): 152 | self.net = tv.resnet152(pretrained=pretrained) 153 | self.N_slices = 5 154 | 155 | self.conv1 = self.net.conv1 156 | self.bn1 = self.net.bn1 157 | self.relu = self.net.relu 158 | self.maxpool = self.net.maxpool 159 | self.layer1 = self.net.layer1 160 | self.layer2 = self.net.layer2 161 | self.layer3 = self.net.layer3 162 | self.layer4 = self.net.layer4 163 | 164 | def forward(self, X): 165 | h = self.conv1(X) 166 | h = self.bn1(h) 167 | h = self.relu(h) 168 | h_relu1 = h 169 | h = self.maxpool(h) 170 | h = self.layer1(h) 171 | h_conv2 = h 172 | h = self.layer2(h) 173 | h_conv3 = h 174 | h = self.layer3(h) 175 | h_conv4 = h 176 | h = self.layer4(h) 177 | h_conv5 = h 178 | 179 | outputs = namedtuple("Outputs", ['relu1','conv2','conv3','conv4','conv5']) 180 | out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5) 181 | 182 | return out 183 | -------------------------------------------------------------------------------- /third_parties/lpips/weights/v0.0/alex.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.0/alex.pth -------------------------------------------------------------------------------- /third_parties/lpips/weights/v0.0/squeeze.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.0/squeeze.pth -------------------------------------------------------------------------------- /third_parties/lpips/weights/v0.0/vgg.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.0/vgg.pth -------------------------------------------------------------------------------- /third_parties/lpips/weights/v0.1/alex.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.1/alex.pth -------------------------------------------------------------------------------- /third_parties/lpips/weights/v0.1/squeeze.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.1/squeeze.pth -------------------------------------------------------------------------------- /third_parties/lpips/weights/v0.1/vgg.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.1/vgg.pth -------------------------------------------------------------------------------- /third_parties/pytorch3d/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ops 2 | -------------------------------------------------------------------------------- /third_parties/pytorch3d/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/pytorch3d/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /third_parties/pytorch3d/__pycache__/ops.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/pytorch3d/__pycache__/ops.cpython-38.pyc -------------------------------------------------------------------------------- /third_parties/pytorch3d/cuda/knn.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | #include 11 | #include 12 | #include "utils/pytorch3d_cutils.h" 13 | #define WITH_CUDA true 14 | 15 | // Compute indices of K nearest neighbors in pointcloud p2 to points 16 | // in pointcloud p1. 17 | // 18 | // Args: 19 | // p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each 20 | // containing P1 points of dimension D. 21 | // p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each 22 | // containing P2 points of dimension D. 23 | // lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud. 24 | // lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud. 25 | // norm: int specifying the norm for the distance (1 for L1, 2 for L2) 26 | // K: int giving the number of nearest points to return. 27 | // version: Integer telling which implementation to use. 28 | // 29 | // Returns: 30 | // p1_neighbor_idx: LongTensor of shape (N, P1, K), where 31 | // p1_neighbor_idx[n, i, k] = j means that the kth nearest 32 | // neighbor to p1[n, i] in the cloud p2[n] is p2[n, j]. 33 | // It is padded with zeros so that it can be used easily in a later 34 | // gather() operation. 35 | // 36 | // p1_neighbor_dists: FloatTensor of shape (N, P1, K) containing the squared 37 | // distance from each point p1[n, p, :] to its K neighbors 38 | // p2[n, p1_neighbor_idx[n, p, k], :]. 39 | 40 | // CPU implementation. 41 | std::tuple KNearestNeighborIdxCpu( 42 | const at::Tensor& p1, 43 | const at::Tensor& p2, 44 | const at::Tensor& lengths1, 45 | const at::Tensor& lengths2, 46 | const int norm, 47 | const int K); 48 | 49 | // CUDA implementation 50 | std::tuple KNearestNeighborIdxCuda( 51 | const at::Tensor& p1, 52 | const at::Tensor& p2, 53 | const at::Tensor& lengths1, 54 | const at::Tensor& lengths2, 55 | const int norm, 56 | const int K, 57 | const int version); 58 | 59 | // Implementation which is exposed. 60 | std::tuple KNearestNeighborIdx( 61 | const at::Tensor& p1, 62 | const at::Tensor& p2, 63 | const at::Tensor& lengths1, 64 | const at::Tensor& lengths2, 65 | const int norm, 66 | const int K, 67 | const int version) { 68 | if (p1.is_cuda() || p2.is_cuda()) { 69 | #ifdef WITH_CUDA 70 | CHECK_CUDA(p1); 71 | CHECK_CUDA(p2); 72 | return KNearestNeighborIdxCuda( 73 | p1, p2, lengths1, lengths2, norm, K, version); 74 | #else 75 | AT_ERROR("Not compiled with GPU support."); 76 | #endif 77 | } 78 | return KNearestNeighborIdxCpu(p1, p2, lengths1, lengths2, norm, K); 79 | } 80 | 81 | // Compute gradients with respect to p1 and p2 82 | // 83 | // Args: 84 | // p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each 85 | // containing P1 points of dimension D. 86 | // p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each 87 | // containing P2 points of dimension D. 88 | // lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud. 89 | // lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud. 90 | // p1_neighbor_idx: LongTensor of shape (N, P1, K), where 91 | // p1_neighbor_idx[n, i, k] = j means that the kth nearest 92 | // neighbor to p1[n, i] in the cloud p2[n] is p2[n, j]. 93 | // It is padded with zeros so that it can be used easily in a later 94 | // gather() operation. This is computed from the forward pass. 95 | // norm: int specifying the norm for the distance (1 for L1, 2 for L2) 96 | // grad_dists: FLoatTensor of shape (N, P1, K) which contains the input 97 | // gradients. 98 | // 99 | // Returns: 100 | // grad_p1: FloatTensor of shape (N, P1, D) containing the output gradients 101 | // wrt p1. 102 | // grad_p2: FloatTensor of shape (N, P2, D) containing the output gradients 103 | // wrt p2. 104 | 105 | // CPU implementation. 106 | std::tuple KNearestNeighborBackwardCpu( 107 | const at::Tensor& p1, 108 | const at::Tensor& p2, 109 | const at::Tensor& lengths1, 110 | const at::Tensor& lengths2, 111 | const at::Tensor& idxs, 112 | const int norm, 113 | const at::Tensor& grad_dists); 114 | 115 | // CUDA implementation 116 | std::tuple KNearestNeighborBackwardCuda( 117 | const at::Tensor& p1, 118 | const at::Tensor& p2, 119 | const at::Tensor& lengths1, 120 | const at::Tensor& lengths2, 121 | const at::Tensor& idxs, 122 | const int norm, 123 | const at::Tensor& grad_dists); 124 | 125 | // Implementation which is exposed. 126 | std::tuple KNearestNeighborBackward( 127 | const at::Tensor& p1, 128 | const at::Tensor& p2, 129 | const at::Tensor& lengths1, 130 | const at::Tensor& lengths2, 131 | const at::Tensor& idxs, 132 | const int norm, 133 | const at::Tensor& grad_dists) { 134 | if (p1.is_cuda() || p2.is_cuda()) { 135 | #ifdef WITH_CUDA 136 | CHECK_CUDA(p1); 137 | CHECK_CUDA(p2); 138 | return KNearestNeighborBackwardCuda( 139 | p1, p2, lengths1, lengths2, idxs, norm, grad_dists); 140 | #else 141 | AT_ERROR("Not compiled with GPU support."); 142 | #endif 143 | } 144 | return KNearestNeighborBackwardCpu( 145 | p1, p2, lengths1, lengths2, idxs, norm, grad_dists); 146 | } 147 | 148 | // Utility to check whether a KNN version can be used. 149 | // 150 | // Args: 151 | // version: Integer in the range 0 <= version <= 3 indicating one of our 152 | // KNN implementations. 153 | // D: Number of dimensions for the input and query point clouds 154 | // K: Number of neighbors to be found 155 | // 156 | // Returns: 157 | // Whether the indicated KNN version can be used. 158 | bool KnnCheckVersion(int version, const int64_t D, const int64_t K); 159 | 160 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 161 | m.def("knn_points_idx", &KNearestNeighborIdx); 162 | m.def("knn_points_backward", &KNearestNeighborBackward); 163 | } 164 | -------------------------------------------------------------------------------- /third_parties/pytorch3d/cuda/knn_cpu.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | std::tuple KNearestNeighborIdxCpu( 14 | const at::Tensor& p1, 15 | const at::Tensor& p2, 16 | const at::Tensor& lengths1, 17 | const at::Tensor& lengths2, 18 | const int norm, 19 | const int K) { 20 | const int N = p1.size(0); 21 | const int P1 = p1.size(1); 22 | const int D = p1.size(2); 23 | 24 | auto long_opts = lengths1.options().dtype(torch::kInt64); 25 | torch::Tensor idxs = torch::full({N, P1, K}, 0, long_opts); 26 | torch::Tensor dists = torch::full({N, P1, K}, 0, p1.options()); 27 | 28 | auto p1_a = p1.accessor(); 29 | auto p2_a = p2.accessor(); 30 | auto lengths1_a = lengths1.accessor(); 31 | auto lengths2_a = lengths2.accessor(); 32 | auto idxs_a = idxs.accessor(); 33 | auto dists_a = dists.accessor(); 34 | 35 | for (int n = 0; n < N; ++n) { 36 | const int64_t length1 = lengths1_a[n]; 37 | const int64_t length2 = lengths2_a[n]; 38 | for (int64_t i1 = 0; i1 < length1; ++i1) { 39 | // Use a priority queue to store (distance, index) tuples. 40 | std::priority_queue> q; 41 | for (int64_t i2 = 0; i2 < length2; ++i2) { 42 | float dist = 0; 43 | for (int d = 0; d < D; ++d) { 44 | float diff = p1_a[n][i1][d] - p2_a[n][i2][d]; 45 | if (norm == 1) { 46 | dist += abs(diff); 47 | } else { // norm is 2 (default) 48 | dist += diff * diff; 49 | } 50 | } 51 | int size = static_cast(q.size()); 52 | if (size < K || dist < std::get<0>(q.top())) { 53 | q.emplace(dist, i2); 54 | if (size >= K) { 55 | q.pop(); 56 | } 57 | } 58 | } 59 | while (!q.empty()) { 60 | auto t = q.top(); 61 | q.pop(); 62 | const int k = q.size(); 63 | dists_a[n][i1][k] = std::get<0>(t); 64 | idxs_a[n][i1][k] = std::get<1>(t); 65 | } 66 | } 67 | } 68 | return std::make_tuple(idxs, dists); 69 | } 70 | 71 | // ------------------------------------------------------------- // 72 | // Backward Operators // 73 | // ------------------------------------------------------------- // 74 | 75 | std::tuple KNearestNeighborBackwardCpu( 76 | const at::Tensor& p1, 77 | const at::Tensor& p2, 78 | const at::Tensor& lengths1, 79 | const at::Tensor& lengths2, 80 | const at::Tensor& idxs, 81 | const int norm, 82 | const at::Tensor& grad_dists) { 83 | const int N = p1.size(0); 84 | const int P1 = p1.size(1); 85 | const int D = p1.size(2); 86 | const int P2 = p2.size(1); 87 | const int K = idxs.size(2); 88 | 89 | torch::Tensor grad_p1 = torch::full({N, P1, D}, 0, p1.options()); 90 | torch::Tensor grad_p2 = torch::full({N, P2, D}, 0, p2.options()); 91 | 92 | auto p1_a = p1.accessor(); 93 | auto p2_a = p2.accessor(); 94 | auto lengths1_a = lengths1.accessor(); 95 | auto lengths2_a = lengths2.accessor(); 96 | auto idxs_a = idxs.accessor(); 97 | auto grad_dists_a = grad_dists.accessor(); 98 | auto grad_p1_a = grad_p1.accessor(); 99 | auto grad_p2_a = grad_p2.accessor(); 100 | 101 | for (int n = 0; n < N; ++n) { 102 | const int64_t length1 = lengths1_a[n]; 103 | int64_t length2 = lengths2_a[n]; 104 | length2 = (length2 < K) ? length2 : K; 105 | for (int64_t i1 = 0; i1 < length1; ++i1) { 106 | for (int64_t k = 0; k < length2; ++k) { 107 | const int64_t i2 = idxs_a[n][i1][k]; 108 | // If the index is the pad value of -1 then ignore it 109 | if (i2 == -1) { 110 | continue; 111 | } 112 | for (int64_t d = 0; d < D; ++d) { 113 | float diff = 0.0; 114 | if (norm == 1) { 115 | float sign = (p1_a[n][i1][d] > p2_a[n][i2][d]) ? 1.0 : -1.0; 116 | diff = grad_dists_a[n][i1][k] * sign; 117 | } else { // norm is 2 (default) 118 | diff = 2.0f * grad_dists_a[n][i1][k] * 119 | (p1_a[n][i1][d] - p2_a[n][i2][d]); 120 | } 121 | grad_p1_a[n][i1][d] += diff; 122 | grad_p2_a[n][i2][d] += -1.0f * diff; 123 | } 124 | } 125 | } 126 | } 127 | return std::make_tuple(grad_p1, grad_p2); 128 | } 129 | -------------------------------------------------------------------------------- /third_parties/pytorch3d/cuda/utils/index_utils.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | // This converts dynamic array lookups into static array lookups, for small 10 | // arrays up to size 32. 11 | // 12 | // Suppose we have a small thread-local array: 13 | // 14 | // float vals[10]; 15 | // 16 | // Ideally we should only index this array using static indices: 17 | // 18 | // for (int i = 0; i < 10; ++i) vals[i] = i * i; 19 | // 20 | // If we do so, then the CUDA compiler may be able to place the array into 21 | // registers, which can have a big performance improvement. However if we 22 | // access the array dynamically, the the compiler may force the array into 23 | // local memory, which has the same latency as global memory. 24 | // 25 | // These functions convert dynamic array access into static array access 26 | // using a brute-force lookup table. It can be used like this: 27 | // 28 | // float vals[10]; 29 | // int idx = 3; 30 | // float val = 3.14f; 31 | // RegisterIndexUtils::set(vals, idx, val); 32 | // float val2 = RegisterIndexUtils::get(vals, idx); 33 | // 34 | // The implementation is based on fbcuda/RegisterUtils.cuh: 35 | // https://github.com/facebook/fbcuda/blob/master/RegisterUtils.cuh 36 | // To avoid depending on the entire library, we just reimplement these two 37 | // functions. The fbcuda implementation is a bit more sophisticated, and uses 38 | // the preprocessor to generate switch statements that go up to N for each 39 | // value of N. We are lazy and just have a giant explicit switch statement. 40 | // 41 | // We might be able to use a template metaprogramming approach similar to 42 | // DispatchKernel1D for this. However DispatchKernel1D is intended to be used 43 | // for dispatching to the correct CUDA kernel on the host, while this is 44 | // is intended to run on the device. I was concerned that a metaprogramming 45 | // approach for this might lead to extra function calls at runtime if the 46 | // compiler fails to optimize them away, which could be very slow on device. 47 | // However I didn't actually benchmark or test this. 48 | template 49 | struct RegisterIndexUtils { 50 | __device__ __forceinline__ static T get(const T arr[N], int idx) { 51 | if (idx < 0 || idx >= N) 52 | return T(); 53 | switch (idx) { 54 | case 0: 55 | return arr[0]; 56 | case 1: 57 | return arr[1]; 58 | case 2: 59 | return arr[2]; 60 | case 3: 61 | return arr[3]; 62 | case 4: 63 | return arr[4]; 64 | case 5: 65 | return arr[5]; 66 | case 6: 67 | return arr[6]; 68 | case 7: 69 | return arr[7]; 70 | case 8: 71 | return arr[8]; 72 | case 9: 73 | return arr[9]; 74 | case 10: 75 | return arr[10]; 76 | case 11: 77 | return arr[11]; 78 | case 12: 79 | return arr[12]; 80 | case 13: 81 | return arr[13]; 82 | case 14: 83 | return arr[14]; 84 | case 15: 85 | return arr[15]; 86 | case 16: 87 | return arr[16]; 88 | case 17: 89 | return arr[17]; 90 | case 18: 91 | return arr[18]; 92 | case 19: 93 | return arr[19]; 94 | case 20: 95 | return arr[20]; 96 | case 21: 97 | return arr[21]; 98 | case 22: 99 | return arr[22]; 100 | case 23: 101 | return arr[23]; 102 | case 24: 103 | return arr[24]; 104 | case 25: 105 | return arr[25]; 106 | case 26: 107 | return arr[26]; 108 | case 27: 109 | return arr[27]; 110 | case 28: 111 | return arr[28]; 112 | case 29: 113 | return arr[29]; 114 | case 30: 115 | return arr[30]; 116 | case 31: 117 | return arr[31]; 118 | }; 119 | return T(); 120 | } 121 | 122 | __device__ __forceinline__ static void set(T arr[N], int idx, T val) { 123 | if (idx < 0 || idx >= N) 124 | return; 125 | switch (idx) { 126 | case 0: 127 | arr[0] = val; 128 | break; 129 | case 1: 130 | arr[1] = val; 131 | break; 132 | case 2: 133 | arr[2] = val; 134 | break; 135 | case 3: 136 | arr[3] = val; 137 | break; 138 | case 4: 139 | arr[4] = val; 140 | break; 141 | case 5: 142 | arr[5] = val; 143 | break; 144 | case 6: 145 | arr[6] = val; 146 | break; 147 | case 7: 148 | arr[7] = val; 149 | break; 150 | case 8: 151 | arr[8] = val; 152 | break; 153 | case 9: 154 | arr[9] = val; 155 | break; 156 | case 10: 157 | arr[10] = val; 158 | break; 159 | case 11: 160 | arr[11] = val; 161 | break; 162 | case 12: 163 | arr[12] = val; 164 | break; 165 | case 13: 166 | arr[13] = val; 167 | break; 168 | case 14: 169 | arr[14] = val; 170 | break; 171 | case 15: 172 | arr[15] = val; 173 | break; 174 | case 16: 175 | arr[16] = val; 176 | break; 177 | case 17: 178 | arr[17] = val; 179 | break; 180 | case 18: 181 | arr[18] = val; 182 | break; 183 | case 19: 184 | arr[19] = val; 185 | break; 186 | case 20: 187 | arr[20] = val; 188 | break; 189 | case 21: 190 | arr[21] = val; 191 | break; 192 | case 22: 193 | arr[22] = val; 194 | break; 195 | case 23: 196 | arr[23] = val; 197 | break; 198 | case 24: 199 | arr[24] = val; 200 | break; 201 | case 25: 202 | arr[25] = val; 203 | break; 204 | case 26: 205 | arr[26] = val; 206 | break; 207 | case 27: 208 | arr[27] = val; 209 | break; 210 | case 28: 211 | arr[28] = val; 212 | break; 213 | case 29: 214 | arr[29] = val; 215 | break; 216 | case 30: 217 | arr[30] = val; 218 | break; 219 | case 31: 220 | arr[31] = val; 221 | break; 222 | } 223 | } 224 | }; 225 | -------------------------------------------------------------------------------- /third_parties/pytorch3d/cuda/utils/mink.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | #define MINK_H 11 | 12 | #include "index_utils.cuh" 13 | 14 | // A data structure to keep track of the smallest K keys seen so far as well 15 | // as their associated values, intended to be used in device code. 16 | // This data structure doesn't allocate any memory; keys and values are stored 17 | // in arrays passed to the constructor. 18 | // 19 | // The implementation is generic; it can be used for any key type that supports 20 | // the < operator, and can be used with any value type. 21 | // 22 | // Example usage: 23 | // 24 | // float keys[K]; 25 | // int values[K]; 26 | // MinK mink(keys, values, K); 27 | // for (...) { 28 | // // Produce some key and value from somewhere 29 | // mink.add(key, value); 30 | // } 31 | // mink.sort(); 32 | // 33 | // Now keys and values store the smallest K keys seen so far and the values 34 | // associated to these keys: 35 | // 36 | // for (int k = 0; k < K; ++k) { 37 | // float key_k = keys[k]; 38 | // int value_k = values[k]; 39 | // } 40 | template 41 | class MinK { 42 | public: 43 | // Constructor. 44 | // 45 | // Arguments: 46 | // keys: Array in which to store keys 47 | // values: Array in which to store values 48 | // K: How many values to keep track of 49 | __device__ MinK(key_t* keys, value_t* vals, int K) 50 | : keys(keys), vals(vals), K(K), _size(0) {} 51 | 52 | // Try to add a new key and associated value to the data structure. If the key 53 | // is one of the smallest K seen so far then it will be kept; otherwise it 54 | // it will not be kept. 55 | // 56 | // This takes O(1) operations if the new key is not kept, or if the structure 57 | // currently contains fewer than K elements. Otherwise this takes O(K) time. 58 | // 59 | // Arguments: 60 | // key: The key to add 61 | // val: The value associated to the key 62 | __device__ __forceinline__ void add(const key_t& key, const value_t& val) { 63 | if (_size < K) { 64 | keys[_size] = key; 65 | vals[_size] = val; 66 | if (_size == 0 || key > max_key) { 67 | max_key = key; 68 | max_idx = _size; 69 | } 70 | _size++; 71 | } else if (key < max_key) { 72 | keys[max_idx] = key; 73 | vals[max_idx] = val; 74 | max_key = key; 75 | for (int k = 0; k < K; ++k) { 76 | key_t cur_key = keys[k]; 77 | if (cur_key > max_key) { 78 | max_key = cur_key; 79 | max_idx = k; 80 | } 81 | } 82 | } 83 | } 84 | 85 | // Get the number of items currently stored in the structure. 86 | // This takes O(1) time. 87 | __device__ __forceinline__ int size() { 88 | return _size; 89 | } 90 | 91 | // Sort the items stored in the structure using bubble sort. 92 | // This takes O(K^2) time. 93 | __device__ __forceinline__ void sort() { 94 | for (int i = 0; i < _size - 1; ++i) { 95 | for (int j = 0; j < _size - i - 1; ++j) { 96 | if (keys[j + 1] < keys[j]) { 97 | key_t key = keys[j]; 98 | value_t val = vals[j]; 99 | keys[j] = keys[j + 1]; 100 | vals[j] = vals[j + 1]; 101 | keys[j + 1] = key; 102 | vals[j + 1] = val; 103 | } 104 | } 105 | } 106 | } 107 | 108 | private: 109 | key_t* keys; 110 | value_t* vals; 111 | int K; 112 | int _size; 113 | key_t max_key; 114 | int max_idx; 115 | }; 116 | 117 | // This is a version of MinK that only touches the arrays using static indexing 118 | // via RegisterIndexUtils. If the keys and values are stored in thread-local 119 | // arrays, then this may allow the compiler to place them in registers for 120 | // fast access. 121 | // 122 | // This has the same API as RegisterMinK, but doesn't support sorting. 123 | // We found that sorting via RegisterIndexUtils gave very poor performance, 124 | // and suspect it may have prevented the compiler from placing the arrays 125 | // into registers. 126 | template 127 | class RegisterMinK { 128 | public: 129 | __device__ RegisterMinK(key_t* keys, value_t* vals) 130 | : keys(keys), vals(vals), _size(0) {} 131 | 132 | __device__ __forceinline__ void add(const key_t& key, const value_t& val) { 133 | if (_size < K) { 134 | RegisterIndexUtils::set(keys, _size, key); 135 | RegisterIndexUtils::set(vals, _size, val); 136 | if (_size == 0 || key > max_key) { 137 | max_key = key; 138 | max_idx = _size; 139 | } 140 | _size++; 141 | } else if (key < max_key) { 142 | RegisterIndexUtils::set(keys, max_idx, key); 143 | RegisterIndexUtils::set(vals, max_idx, val); 144 | max_key = key; 145 | for (int k = 0; k < K; ++k) { 146 | key_t cur_key = RegisterIndexUtils::get(keys, k); 147 | if (cur_key > max_key) { 148 | max_key = cur_key; 149 | max_idx = k; 150 | } 151 | } 152 | } 153 | } 154 | 155 | __device__ __forceinline__ int size() { 156 | return _size; 157 | } 158 | 159 | private: 160 | key_t* keys; 161 | value_t* vals; 162 | int _size; 163 | key_t max_key; 164 | int max_idx; 165 | }; 166 | -------------------------------------------------------------------------------- /third_parties/pytorch3d/cuda/utils/pytorch3d_cutils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | #include 11 | 12 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor.") 13 | #define CHECK_CONTIGUOUS(x) \ 14 | TORCH_CHECK(x.is_contiguous(), #x " must be contiguous.") 15 | #define CHECK_CONTIGUOUS_CUDA(x) \ 16 | CHECK_CUDA(x); \ 17 | CHECK_CONTIGUOUS(x) 18 | --------------------------------------------------------------------------------