├── README.md
├── checkpoints
    ├── init_deform_deform_cond_pe8.pth
    └── script
    │   ├── midpoint.mlx
    │   ├── midpoint_head.mlx
    │   ├── remesh.mlx
    │   ├── remesh_bac.mlx
    │   └── wt.mlx
├── configs
    └── f3c.json
├── dataset
    ├── __pycache__
    │   ├── dataset.cpython-38.pyc
    │   └── dataset_split.cpython-38.pyc
    ├── dataset.py
    └── dataset_split.py
├── deform
    ├── __pycache__
    │   └── smplx_exavatar_deformer.cpython-38.pyc
    ├── smplx_exavatar
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── body_models.cpython-38.pyc
    │   │   ├── lbs.cpython-38.pyc
    │   │   ├── utils.cpython-38.pyc
    │   │   ├── vertex_ids.cpython-38.pyc
    │   │   └── vertex_joint_selector.cpython-38.pyc
    │   ├── body_models.py
    │   ├── joint_names.py
    │   ├── lbs.py
    │   ├── utils.py
    │   ├── vertex_ids.py
    │   └── vertex_joint_selector.py
    └── smplx_exavatar_deformer.py
├── denoiser
    ├── __pycache__
    │   └── denoiser.cpython-38.pyc
    └── denoiser.py
├── figs
    └── pipe.jpg
├── geometry
    ├── __pycache__
    │   ├── embedding.cpython-38.pyc
    │   ├── gshell_tets.cpython-38.pyc
    │   ├── hmsdf.cpython-38.pyc
    │   ├── hmsdf_tets_split.cpython-38.pyc
    │   └── mlp.cpython-38.pyc
    ├── embedding.py
    ├── gshell_tets.py
    ├── hmsdf.py
    ├── hmsdf_tets_split.py
    └── mlp.py
├── lap_loss.py
├── render
    ├── __pycache__
    │   ├── light.cpython-38.pyc
    │   ├── material.cpython-38.pyc
    │   ├── mesh.cpython-38.pyc
    │   ├── mlptexture.cpython-38.pyc
    │   ├── obj.cpython-38.pyc
    │   ├── regularizer.cpython-38.pyc
    │   ├── render.cpython-38.pyc
    │   ├── render_mask.cpython-38.pyc
    │   ├── texture.cpython-38.pyc
    │   └── util.cpython-38.pyc
    ├── light.py
    ├── material.py
    ├── mesh.py
    ├── mlptexture.py
    ├── obj.py
    ├── optixutils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   └── ops.cpython-38.pyc
    │   ├── build
    │   │   ├── build.ninja
    │   │   ├── denoising.cuda.o
    │   │   ├── optix_wrapper.o
    │   │   ├── optixutils_plugin.so
    │   │   └── torch_bindings.o
    │   ├── c_src
    │   │   ├── accessor.h
    │   │   ├── bsdf.h
    │   │   ├── common.h
    │   │   ├── denoising.cu
    │   │   ├── denoising.h
    │   │   ├── envsampling
    │   │   │   ├── kernel.cu
    │   │   │   └── params.h
    │   │   ├── math_utils.h
    │   │   ├── optix_wrapper.cpp
    │   │   ├── optix_wrapper.h
    │   │   └── torch_bindings.cpp
    │   ├── include
    │   │   ├── internal
    │   │   │   ├── optix_7_device_impl.h
    │   │   │   ├── optix_7_device_impl_exception.h
    │   │   │   └── optix_7_device_impl_transformations.h
    │   │   ├── optix.h
    │   │   ├── optix_7_device.h
    │   │   ├── optix_7_host.h
    │   │   ├── optix_7_types.h
    │   │   ├── optix_denoiser_tiling.h
    │   │   ├── optix_device.h
    │   │   ├── optix_function_table.h
    │   │   ├── optix_function_table_definition.h
    │   │   ├── optix_host.h
    │   │   ├── optix_stack_size.h
    │   │   ├── optix_stubs.h
    │   │   └── optix_types.h
    │   ├── ops.py
    │   └── tests
    │   │   └── filter_test.py
    ├── regularizer.py
    ├── render.py
    ├── render_mask.py
    ├── renderutils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── bsdf.cpython-38.pyc
    │   │   ├── loss.cpython-38.pyc
    │   │   └── ops.cpython-38.pyc
    │   ├── bsdf.py
    │   ├── build
    │   │   ├── bsdf.cuda.o
    │   │   ├── build.ninja
    │   │   ├── common.o
    │   │   ├── cubemap.cuda.o
    │   │   ├── loss.cuda.o
    │   │   ├── mesh.cuda.o
    │   │   ├── normal.cuda.o
    │   │   ├── renderutils_plugin.so
    │   │   └── torch_bindings.o
    │   ├── c_src
    │   │   ├── bsdf.cu
    │   │   ├── bsdf.h
    │   │   ├── common.cpp
    │   │   ├── common.h
    │   │   ├── cubemap.cu
    │   │   ├── cubemap.h
    │   │   ├── loss.cu
    │   │   ├── loss.h
    │   │   ├── mesh.cu
    │   │   ├── mesh.h
    │   │   ├── normal.cu
    │   │   ├── normal.h
    │   │   ├── tensor.h
    │   │   ├── torch_bindings.cpp
    │   │   ├── vec3f.h
    │   │   └── vec4f.h
    │   ├── loss.py
    │   ├── ops.py
    │   └── tests
    │   │   ├── test_bsdf.py
    │   │   ├── test_loss.py
    │   │   ├── test_mesh.py
    │   │   └── test_perf.py
    ├── texture.py
    └── util.py
├── script
    ├── __pycache__
    │   ├── connet_face_head.cpython-38.pyc
    │   ├── get_tet_smpl.cpython-38.pyc
    │   └── process_body_cloth_head_msdfcut.cpython-38.pyc
    ├── connet_face_head.py
    ├── get_tet_smpl.py
    └── process_body_cloth_head_msdfcut.py
├── ssim_loss.py
├── third_parties
    ├── __init__.py
    ├── __pycache__
    │   └── __init__.cpython-38.pyc
    ├── lpips
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-38.pyc
    │   ├── lpips.py
    │   ├── pretrained_networks.py
    │   ├── trainer.py
    │   └── weights
    │   │   ├── v0.0
    │   │       ├── alex.pth
    │   │       ├── squeeze.pth
    │   │       └── vgg.pth
    │   │   └── v0.1
    │   │       ├── alex.pth
    │   │       ├── squeeze.pth
    │   │       └── vgg.pth
    └── pytorch3d
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-38.pyc
    │       └── ops.cpython-38.pyc
    │   ├── cuda
    │       ├── knn.cpp
    │       ├── knn.cu
    │       ├── knn_cpu.cpp
    │       └── utils
    │       │   ├── dispatch.cuh
    │       │   ├── index_utils.cuh
    │       │   ├── mink.cuh
    │       │   └── pytorch3d_cutils.h
    │   └── ops.py
└── train.py


/README.md:
--------------------------------------------------------------------------------
 1 | # D3-Human: Dynamic Disentangled Digital Human from Monocular Video
 2 | 
 3 | PyTorch implementation of the paper "D3-Human: Dynamic Disentangled Digital Human from Monocular Video". This repository contains the reconstructing code and data.
 4 | 
 5 | **|[Project Page](https://ustc3dv.github.io/D3Human/)|**  **|[Paper](https://arxiv.org/html/2501.01589v1)|**
 6 | 
 7 | This method can reconstruct disentangled garment and body geometry from monocular videos.
 8 | 
 9 | ## Pipeline
10 | Neural-ABC is a neural implicit parametric model with latent spaces of human identity, clothing, shape and pose. 
11 | It can generate various human identities and different clothes. 
12 | The clothed human body can deform into different body shapes and poses. 
13 | 
14 | ![pipeline](figs/pipe.jpg)
15 | 
16 | ## Setup
17 | 
18 | This code has been tested on Tesla V100. 
19 | 
20 | Environment:
21 | * Ubuntu 20.04
22 | * python 3.8.19
23 | 
24 | Run the following:
25 | ```
26 | pip install ninja imageio PyOpenGL glfw xatlas gdown
27 | pip install git+https://github.com/NVlabs/nvdiffrast/
28 | pip install --global-option="--no-networks" git+https://github.com/NVlabs/tiny-cuda-nn#subdirectory=bindings/torch
29 | ```
30 | 
31 | Download the female SMPL-X model from https://smpl-x.is.tue.mpg.de/ and place them in the folder of `./smplx`.
32 | 
33 | Download the preprocess data from [here](https://drive.google.com/drive/folders/1-OY5X7pnt45XBMURVTM55xhOrKKUi7BX?usp=sharing) and place it in the folder of `./data`.
34 | 
35 | ## Reconstruction
36 | 
37 | Use the following code to reconstruct:
38 | 
39 | ```
40 | CUDA_VISIBLE_DEVICES=0 python train.py -o res/f3c --folder_name female-3-casual --config configs/f3c.json
41 | ```
42 | 
43 | 
44 | ## Dataset Preparation
45 | If you wish to reconstruct your own monocular video, you can use ​​[ExAvatar​​](https://github.com/mks0601/ExAvatar_RELEASE) to obtain SMPL-X coefficients and camera parameters, ​​[Sapiens](https://github.com/facebookresearch/sapiens) to obtain normals, and ​​[SAM2](https://github.com/facebookresearch/sam2) to obtain masks for garments, the body, and the fully clothed human.
46 | 
47 | ## Notes
48 | If MeshLab cannot be executed from the command line, you can manually perform remeshing and watertight processing within the software.
49 | 
50 | ## Citation
51 | 
52 | If you find our paper useful for your work please cite:
53 | 
54 | ```
55 |       @article{Chen2024D3human,
56 |          author = {Honghu, Chen and Bo, Peng and Yunfan, Tao and Juyong, Zhang}, 
57 |          title = {D$^3$-Human: Dynamic Disentangled Digital Human from Monocular Video}, 
58 |          journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
59 |          year={2025}
60 |         }
61 | ```
62 | 
63 | ## Contact
64 | For more questions, please contact honghuc@mail.ustc.edu.cn
65 | 
66 | ## Acknowledgement
67 | 
68 | Our data is processed with the help of [G-Shell](https://github.com/CrisHY1995/StereoPIFu_Code):
69 | ```
70 | @inproceedings{Liu2024gshell,
71 |     title={Ghost on the Shell: An Expressive Representation of General 3D Shapes},
72 |     author={Liu, Zhen and Feng, Yao and Xiu, Yuliang and Liu, Weiyang 
73 |       and Paull, Liam and Black, Michael J and Sch{\"o}lkopf, Bernhard},
74 |     booktitle={ICLR},
75 |     year={2024}
76 | }
77 | 
78 | ```
79 | 


--------------------------------------------------------------------------------
/checkpoints/init_deform_deform_cond_pe8.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/checkpoints/init_deform_deform_cond_pe8.pth


--------------------------------------------------------------------------------
/checkpoints/script/midpoint.mlx:
--------------------------------------------------------------------------------
1 | <!DOCTYPE FilterScript>
2 | <FilterScript>
3 |  <filter name="Subdivision Surfaces: Midpoint">
4 |   <Param isxmlparam="0" tooltip="Number of time the model is subdivided." value="3" description="Iterations" name="Iterations" type="RichInt"/>
5 |   <Param isxmlparam="0" min="0" tooltip="All the edges &lt;b>longer&lt;/b> than this threshold will be refined.&lt;br>Setting this value to zero will force an uniform refinement." value="0.016378" max="1.63779" description="Edge Threshold" name="Threshold" type="RichAbsPerc"/>
6 |   <Param isxmlparam="0" tooltip="If selected the filter affect only the selected faces" value="false" description="Affect only selected faces" name="Selected" type="RichBool"/>
7 |  </filter>
8 | </FilterScript>
9 | 


--------------------------------------------------------------------------------
/checkpoints/script/midpoint_head.mlx:
--------------------------------------------------------------------------------
1 | <!DOCTYPE FilterScript>
2 | <FilterScript>
3 |  <filter name="Subdivision Surfaces: Midpoint">
4 |   <Param isxmlparam="0" tooltip="Number of time the model is subdivided." value="3" description="Iterations" name="Iterations" type="RichInt"/>
5 |   <Param isxmlparam="0" min="0" tooltip="All the edges &lt;b>longer&lt;/b> than this threshold will be refined.&lt;br>Setting this value to zero will force an uniform refinement." value="0.002" max="1.63779" description="Edge Threshold" name="Threshold" type="RichAbsPerc"/>
6 |   <Param isxmlparam="0" tooltip="If selected the filter affect only the selected faces" value="false" description="Affect only selected faces" name="Selected" type="RichBool"/>
7 |  </filter>
8 | </FilterScript>
9 | 


--------------------------------------------------------------------------------
/checkpoints/script/remesh.mlx:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE FilterScript>
 2 | <FilterScript>
 3 |  <filter name="Remeshing: Isotropic Explicit Remeshing">
 4 |   <Param name="Iterations" tooltip="Number of iterations of the remeshing operations to repeat on the mesh." value="3" type="RichInt" description="Iterations" isxmlparam="0"/>
 5 |   <Param name="Adaptive" tooltip="Toggles adaptive isotropic remeshing." value="false" type="RichBool" description="Adaptive remeshing" isxmlparam="0"/>
 6 |   <Param name="SelectedOnly" tooltip="If checked the remeshing operations will be applied only to the selected faces." value="false" type="RichBool" description="Remesh only selected faces" isxmlparam="0"/>
 7 |   <Param name="TargetLen" tooltip="Sets the target length for the remeshed mesh edges." value="0.01" type="RichAbsPerc" min="0" description="Target Length" isxmlparam="0" max="1.66844"/>
 8 |   <Param name="FeatureDeg" tooltip="Minimum angle between faces of the original to consider the shared edge as a feature to be preserved." value="30" type="RichFloat" description="Crease Angle" isxmlparam="0"/>
 9 |   <Param name="CheckSurfDist" tooltip="If toggled each local operation must deviate from original mesh by [Max. surface distance]" value="true" type="RichBool" description="Check Surface Distance" isxmlparam="0"/>
10 |   <Param name="MaxSurfDist" tooltip="Maximal surface deviation allowed for each local operation" value="0.01" type="RichAbsPerc" min="0" description="Max. Surface Distance" isxmlparam="0" max="1.66844"/>
11 |   <Param name="SplitFlag" tooltip="If checked the remeshing operations will include a refine step." value="true" type="RichBool" description="Refine Step" isxmlparam="0"/>
12 |   <Param name="CollapseFlag" tooltip="If checked the remeshing operations will include a collapse step." value="true" type="RichBool" description="Collapse Step" isxmlparam="0"/>
13 |   <Param name="SwapFlag" tooltip="If checked the remeshing operations will include a edge-swap step, aimed at improving the vertex valence of the resulting mesh." value="true" type="RichBool" description="Edge-Swap Step" isxmlparam="0"/>
14 |   <Param name="SmoothFlag" tooltip="If checked the remeshing operations will include a smoothing step, aimed at relaxing the vertex positions in a Laplacian sense." value="true" type="RichBool" description="Smooth Step" isxmlparam="0"/>
15 |   <Param name="ReprojectFlag" tooltip="If checked the remeshing operations will include a step to reproject the mesh vertices on the original surface." value="true" type="RichBool" description="Reproject Step" isxmlparam="0"/>
16 |  </filter>
17 | </FilterScript>
18 | 


--------------------------------------------------------------------------------
/checkpoints/script/remesh_bac.mlx:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE FilterScript>
 2 | <FilterScript>
 3 |  <filter name="Remeshing: Isotropic Explicit Remeshing">
 4 |   <Param name="Iterations" tooltip="Number of iterations of the remeshing operations to repeat on the mesh." value="3" type="RichInt" description="Iterations" isxmlparam="0"/>
 5 |   <Param name="Adaptive" tooltip="Toggles adaptive isotropic remeshing." value="false" type="RichBool" description="Adaptive remeshing" isxmlparam="0"/>
 6 |   <Param name="SelectedOnly" tooltip="If checked the remeshing operations will be applied only to the selected faces." value="false" type="RichBool" description="Remesh only selected faces" isxmlparam="0"/>
 7 |   <Param name="TargetLen" tooltip="Sets the target length for the remeshed mesh edges." value="0.005" type="RichAbsPerc" min="0" description="Target Length" isxmlparam="0" max="1.66844"/>
 8 |   <Param name="FeatureDeg" tooltip="Minimum angle between faces of the original to consider the shared edge as a feature to be preserved." value="30" type="RichFloat" description="Crease Angle" isxmlparam="0"/>
 9 |   <Param name="CheckSurfDist" tooltip="If toggled each local operation must deviate from original mesh by [Max. surface distance]" value="true" type="RichBool" description="Check Surface Distance" isxmlparam="0"/>
10 |   <Param name="MaxSurfDist" tooltip="Maximal surface deviation allowed for each local operation" value="0.005" type="RichAbsPerc" min="0" description="Max. Surface Distance" isxmlparam="0" max="1.66844"/>
11 |   <Param name="SplitFlag" tooltip="If checked the remeshing operations will include a refine step." value="true" type="RichBool" description="Refine Step" isxmlparam="0"/>
12 |   <Param name="CollapseFlag" tooltip="If checked the remeshing operations will include a collapse step." value="true" type="RichBool" description="Collapse Step" isxmlparam="0"/>
13 |   <Param name="SwapFlag" tooltip="If checked the remeshing operations will include a edge-swap step, aimed at improving the vertex valence of the resulting mesh." value="true" type="RichBool" description="Edge-Swap Step" isxmlparam="0"/>
14 |   <Param name="SmoothFlag" tooltip="If checked the remeshing operations will include a smoothing step, aimed at relaxing the vertex positions in a Laplacian sense." value="true" type="RichBool" description="Smooth Step" isxmlparam="0"/>
15 |   <Param name="ReprojectFlag" tooltip="If checked the remeshing operations will include a step to reproject the mesh vertices on the original surface." value="true" type="RichBool" description="Reproject Step" isxmlparam="0"/>
16 |  </filter>
17 | </FilterScript>
18 | 


--------------------------------------------------------------------------------
/checkpoints/script/wt.mlx:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE FilterScript>
 2 | <FilterScript>
 3 |  <xmlfilter name="Surface Reconstruction: Screened Poisson">
 4 |   <xmlparam name="cgDepth" value="0"/>
 5 |   <xmlparam name="confidence" value="false"/>
 6 |   <xmlparam name="depth" value="8"/>
 7 |   <xmlparam name="fullDepth" value="5"/>
 8 |   <xmlparam name="iters" value="8"/>
 9 |   <xmlparam name="pointWeight" value="4"/>
10 |   <xmlparam name="preClean" value="true"/>
11 |   <xmlparam name="samplesPerNode" value="1.5"/>
12 |   <xmlparam name="scale" value="1.1"/>
13 |   <xmlparam name="visibleLayer" value="false"/>
14 |  </xmlfilter>
15 | </FilterScript>
16 | 


--------------------------------------------------------------------------------
/configs/f3c.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ref_mesh": "data/spot/spot.obj",
 3 |     "random_textures": true,
 4 |     "iter": 10000,
 5 |     "save_interval": 100,
 6 |     "save_interval_fine": 100,
 7 |     "texture_res": [ 1080, 1080 ],
 8 |     "train_res": [1080, 1080],
 9 |     "batch": 1,
10 |     "learning_rate": [0.03, 0.005],
11 |     "ks_min" : [0, 0.001, 0.0],
12 |     "ks_max" : [0, 1.0, 1.0],
13 |     "lock_pos" : false,
14 |     "display": [{"latlong" : true}],
15 |     "background" : "white",
16 |     "denoiser": "bilateral",
17 |     "n_samples" : 24,
18 |     "env_scale" : 2.0,
19 |     "gshell_grid" : 128,
20 |     "validate" : true,
21 |     "laplace_scale" : 6000,
22 |     "boxscale": [1, 1, 1],
23 |     "aabb": [-1, -1, -1, 1, 1, 1]
24 | }
25 | 


--------------------------------------------------------------------------------
/dataset/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/dataset/__pycache__/dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/dataset/__pycache__/dataset_split.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/dataset/__pycache__/dataset_split.cpython-38.pyc


--------------------------------------------------------------------------------
/deform/__pycache__/smplx_exavatar_deformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/__pycache__/smplx_exavatar_deformer.cpython-38.pyc


--------------------------------------------------------------------------------
/deform/smplx_exavatar/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from .body_models import (
18 |     create,
19 |     SMPL,
20 |     SMPLH,
21 |     SMPLX,
22 |     MANO,
23 |     FLAME,
24 |     build_layer,
25 |     SMPLLayer,
26 |     SMPLHLayer,
27 |     SMPLXLayer,
28 |     MANOLayer,
29 |     FLAMELayer,
30 | )
31 | 


--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/body_models.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/body_models.cpython-38.pyc


--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/lbs.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/lbs.cpython-38.pyc


--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/vertex_ids.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/vertex_ids.cpython-38.pyc


--------------------------------------------------------------------------------
/deform/smplx_exavatar/__pycache__/vertex_joint_selector.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/deform/smplx_exavatar/__pycache__/vertex_joint_selector.cpython-38.pyc


--------------------------------------------------------------------------------
/deform/smplx_exavatar/joint_names.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | import numpy as np
 18 | 
 19 | JOINT_NAMES = [
 20 |     "pelvis",
 21 |     "left_hip",
 22 |     "right_hip",
 23 |     "spine1",
 24 |     "left_knee",
 25 |     "right_knee",
 26 |     "spine2",
 27 |     "left_ankle",
 28 |     "right_ankle",
 29 |     "spine3",
 30 |     "left_foot",
 31 |     "right_foot",
 32 |     "neck",
 33 |     "left_collar",
 34 |     "right_collar",
 35 |     "head",
 36 |     "left_shoulder",
 37 |     "right_shoulder",
 38 |     "left_elbow",
 39 |     "right_elbow",
 40 |     "left_wrist",
 41 |     "right_wrist",
 42 |     "jaw",
 43 |     "left_eye_smplhf",
 44 |     "right_eye_smplhf",
 45 |     "left_index1",
 46 |     "left_index2",
 47 |     "left_index3",
 48 |     "left_middle1",
 49 |     "left_middle2",
 50 |     "left_middle3",
 51 |     "left_pinky1",
 52 |     "left_pinky2",
 53 |     "left_pinky3",
 54 |     "left_ring1",
 55 |     "left_ring2",
 56 |     "left_ring3",
 57 |     "left_thumb1",
 58 |     "left_thumb2",
 59 |     "left_thumb3",
 60 |     "right_index1",
 61 |     "right_index2",
 62 |     "right_index3",
 63 |     "right_middle1",
 64 |     "right_middle2",
 65 |     "right_middle3",
 66 |     "right_pinky1",
 67 |     "right_pinky2",
 68 |     "right_pinky3",
 69 |     "right_ring1",
 70 |     "right_ring2",
 71 |     "right_ring3",
 72 |     "right_thumb1",
 73 |     "right_thumb2",
 74 |     "right_thumb3",
 75 |     "nose",
 76 |     "right_eye",
 77 |     "left_eye",
 78 |     "right_ear",
 79 |     "left_ear",
 80 |     "left_big_toe",
 81 |     "left_small_toe",
 82 |     "left_heel",
 83 |     "right_big_toe",
 84 |     "right_small_toe",
 85 |     "right_heel",
 86 |     "left_thumb",
 87 |     "left_index",
 88 |     "left_middle",
 89 |     "left_ring",
 90 |     "left_pinky",
 91 |     "right_thumb",
 92 |     "right_index",
 93 |     "right_middle",
 94 |     "right_ring",
 95 |     "right_pinky",
 96 |     "right_eye_brow1",
 97 |     "right_eye_brow2",
 98 |     "right_eye_brow3",
 99 |     "right_eye_brow4",
100 |     "right_eye_brow5",
101 |     "left_eye_brow5",
102 |     "left_eye_brow4",
103 |     "left_eye_brow3",
104 |     "left_eye_brow2",
105 |     "left_eye_brow1",
106 |     "nose1",
107 |     "nose2",
108 |     "nose3",
109 |     "nose4",
110 |     "right_nose_2",
111 |     "right_nose_1",
112 |     "nose_middle",
113 |     "left_nose_1",
114 |     "left_nose_2",
115 |     "right_eye1",
116 |     "right_eye2",
117 |     "right_eye3",
118 |     "right_eye4",
119 |     "right_eye5",
120 |     "right_eye6",
121 |     "left_eye4",
122 |     "left_eye3",
123 |     "left_eye2",
124 |     "left_eye1",
125 |     "left_eye6",
126 |     "left_eye5",
127 |     "right_mouth_1",
128 |     "right_mouth_2",
129 |     "right_mouth_3",
130 |     "mouth_top",
131 |     "left_mouth_3",
132 |     "left_mouth_2",
133 |     "left_mouth_1",
134 |     "left_mouth_5",  # 59 in OpenPose output
135 |     "left_mouth_4",  # 58 in OpenPose output
136 |     "mouth_bottom",
137 |     "right_mouth_4",
138 |     "right_mouth_5",
139 |     "right_lip_1",
140 |     "right_lip_2",
141 |     "lip_top",
142 |     "left_lip_2",
143 |     "left_lip_1",
144 |     "left_lip_3",
145 |     "lip_bottom",
146 |     "right_lip_3",
147 |     # Face contour
148 |     "right_contour_1",
149 |     "right_contour_2",
150 |     "right_contour_3",
151 |     "right_contour_4",
152 |     "right_contour_5",
153 |     "right_contour_6",
154 |     "right_contour_7",
155 |     "right_contour_8",
156 |     "contour_middle",
157 |     "left_contour_8",
158 |     "left_contour_7",
159 |     "left_contour_6",
160 |     "left_contour_5",
161 |     "left_contour_4",
162 |     "left_contour_3",
163 |     "left_contour_2",
164 |     "left_contour_1",
165 | ]
166 | 
167 | 
168 | SMPLH_JOINT_NAMES = [
169 |     "pelvis",
170 |     "left_hip",
171 |     "right_hip",
172 |     "spine1",
173 |     "left_knee",
174 |     "right_knee",
175 |     "spine2",
176 |     "left_ankle",
177 |     "right_ankle",
178 |     "spine3",
179 |     "left_foot",
180 |     "right_foot",
181 |     "neck",
182 |     "left_collar",
183 |     "right_collar",
184 |     "head",
185 |     "left_shoulder",
186 |     "right_shoulder",
187 |     "left_elbow",
188 |     "right_elbow",
189 |     "left_wrist",
190 |     "right_wrist",
191 |     "left_index1",
192 |     "left_index2",
193 |     "left_index3",
194 |     "left_middle1",
195 |     "left_middle2",
196 |     "left_middle3",
197 |     "left_pinky1",
198 |     "left_pinky2",
199 |     "left_pinky3",
200 |     "left_ring1",
201 |     "left_ring2",
202 |     "left_ring3",
203 |     "left_thumb1",
204 |     "left_thumb2",
205 |     "left_thumb3",
206 |     "right_index1",
207 |     "right_index2",
208 |     "right_index3",
209 |     "right_middle1",
210 |     "right_middle2",
211 |     "right_middle3",
212 |     "right_pinky1",
213 |     "right_pinky2",
214 |     "right_pinky3",
215 |     "right_ring1",
216 |     "right_ring2",
217 |     "right_ring3",
218 |     "right_thumb1",
219 |     "right_thumb2",
220 |     "right_thumb3",
221 |     "nose",
222 |     "right_eye",
223 |     "left_eye",
224 |     "right_ear",
225 |     "left_ear",
226 |     "left_big_toe",
227 |     "left_small_toe",
228 |     "left_heel",
229 |     "right_big_toe",
230 |     "right_small_toe",
231 |     "right_heel",
232 |     "left_thumb",
233 |     "left_index",
234 |     "left_middle",
235 |     "left_ring",
236 |     "left_pinky",
237 |     "right_thumb",
238 |     "right_index",
239 |     "right_middle",
240 |     "right_ring",
241 |     "right_pinky",
242 | ]
243 | 
244 | SMPL_JOINT_NAMES = [
245 |     "pelvis",
246 |     "left_hip",
247 |     "right_hip",
248 |     "spine1",
249 |     "left_knee",
250 |     "right_knee",
251 |     "spine2",
252 |     "left_ankle",
253 |     "right_ankle",
254 |     "spine3",
255 |     "left_foot",
256 |     "right_foot",
257 |     "neck",
258 |     "left_collar",
259 |     "right_collar",
260 |     "head",
261 |     "left_shoulder",
262 |     "right_shoulder",
263 |     "left_elbow",
264 |     "right_elbow",
265 |     "left_wrist",
266 |     "right_wrist",
267 |     "left_hand",
268 |     "right_hand",
269 | ]
270 | 
271 | 
272 | class Body:
273 |     """
274 |     Class for storing a single body pose.
275 |     """
276 | 
277 |     def __init__(self, joints, joint_names):
278 |         assert joints.ndim > 1
279 |         assert joints.shape[0] == len(joint_names)
280 |         self.joints = {}
281 |         for i, j in enumerate(joint_names):
282 |             self.joints[j] = joints[i]
283 | 
284 |     @staticmethod
285 |     def from_smpl(joints):
286 |         """
287 |         Create a Body object from SMPL joints.
288 |         """
289 |         return Body(joints, SMPL_JOINT_NAMES)
290 | 
291 |     @staticmethod
292 |     def from_smplh(joints):
293 |         """
294 |         Create a Body object from SMPLH joints.
295 |         """
296 |         return Body(joints, SMPLH_JOINT_NAMES)
297 | 
298 |     def _as(self, joint_names):
299 |         """
300 |         Return a Body object with the specified joint names.
301 |         """
302 |         joint_list = []
303 |         for j in joint_names:
304 |             if j not in self.joints:
305 |                 joint_list.append(np.zeros_like(self.joints["spine1"]))
306 |             else:
307 |                 joint_list.append(self.joints[j])
308 |         return np.stack(joint_list, axis=0)
309 | 
310 |     def as_smpl(self):
311 |         """
312 |         Convert the body to SMPL joints.
313 |         """
314 |         return self._as(SMPL_JOINT_NAMES)
315 | 
316 |     def as_smplh(self):
317 |         """
318 |         Convert the body to SMPLH joints.
319 |         """
320 |         return self._as(SMPLH_JOINT_NAMES)
321 | 


--------------------------------------------------------------------------------
/deform/smplx_exavatar/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  4 | # holder of all proprietary rights on this computer program.
  5 | # You can only use this computer program if you have closed
  6 | # a license agreement with MPG or you get the right to use the computer
  7 | # program from someone who is authorized to grant you that right.
  8 | # Any use of the computer program without a valid license is prohibited and
  9 | # liable to prosecution.
 10 | #
 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 13 | # for Intelligent Systems. All rights reserved.
 14 | #
 15 | # Contact: ps-license@tuebingen.mpg.de
 16 | 
 17 | from typing import NewType, Union, Optional
 18 | from dataclasses import dataclass, asdict, fields
 19 | import numpy as np
 20 | import torch
 21 | 
 22 | Tensor = NewType('Tensor', torch.Tensor)
 23 | Array = NewType('Array', np.ndarray)
 24 | 
 25 | 
 26 | @dataclass
 27 | class ModelOutput:
 28 |     vertices: Optional[Tensor] = None
 29 |     joints: Optional[Tensor] = None
 30 |     full_pose: Optional[Tensor] = None
 31 |     global_orient: Optional[Tensor] = None
 32 |     transl: Optional[Tensor] = None
 33 |     v_shaped: Optional[Tensor] = None
 34 | 
 35 |     def __getitem__(self, key):
 36 |         return getattr(self, key)
 37 | 
 38 |     def get(self, key, default=None):
 39 |         return getattr(self, key, default)
 40 | 
 41 |     def __iter__(self):
 42 |         return self.keys()
 43 | 
 44 |     def keys(self):
 45 |         keys = [t.name for t in fields(self)]
 46 |         return iter(keys)
 47 | 
 48 |     def values(self):
 49 |         values = [getattr(self, t.name) for t in fields(self)]
 50 |         return iter(values)
 51 | 
 52 |     def items(self):
 53 |         data = [(t.name, getattr(self, t.name)) for t in fields(self)]
 54 |         return iter(data)
 55 | 
 56 | 
 57 | @dataclass
 58 | class SMPLOutput(ModelOutput):
 59 |     betas: Optional[Tensor] = None
 60 |     body_pose: Optional[Tensor] = None
 61 | 
 62 | 
 63 | @dataclass
 64 | class SMPLHOutput(SMPLOutput):
 65 |     left_hand_pose: Optional[Tensor] = None
 66 |     right_hand_pose: Optional[Tensor] = None
 67 |     transl: Optional[Tensor] = None
 68 | 
 69 | 
 70 | @dataclass
 71 | class SMPLXOutput(SMPLHOutput):
 72 |     expression: Optional[Tensor] = None
 73 |     jaw_pose: Optional[Tensor] = None
 74 | 
 75 | 
 76 | @dataclass
 77 | class MANOOutput(ModelOutput):
 78 |     betas: Optional[Tensor] = None
 79 |     hand_pose: Optional[Tensor] = None
 80 | 
 81 | 
 82 | @dataclass
 83 | class FLAMEOutput(ModelOutput):
 84 |     betas: Optional[Tensor] = None
 85 |     expression: Optional[Tensor] = None
 86 |     jaw_pose: Optional[Tensor] = None
 87 |     neck_pose: Optional[Tensor] = None
 88 | 
 89 | 
 90 | def find_joint_kin_chain(joint_id, kinematic_tree):
 91 |     kin_chain = []
 92 |     curr_idx = joint_id
 93 |     while curr_idx != -1:
 94 |         kin_chain.append(curr_idx)
 95 |         curr_idx = kinematic_tree[curr_idx]
 96 |     return kin_chain
 97 | 
 98 | 
 99 | def to_tensor(
100 |         array: Union[Array, Tensor], dtype=torch.float32
101 | ) -> Tensor:
102 |     if torch.is_tensor(array):
103 |         return array
104 |     else:
105 |         return torch.tensor(array, dtype=dtype)
106 | 
107 | 
108 | class Struct(object):
109 |     def __init__(self, **kwargs):
110 |         for key, val in kwargs.items():
111 |             setattr(self, key, val)
112 | 
113 | 
114 | def to_np(array, dtype=np.float32):
115 |     if 'scipy.sparse' in str(type(array)):
116 |         array = array.todense()
117 |     return np.array(array, dtype=dtype)
118 | 
119 | 
120 | def rot_mat_to_euler(rot_mats):
121 |     # Calculates rotation matrix to euler angles
122 |     # Careful for extreme cases of eular angles like [0.0, pi, 0.0]
123 | 
124 |     sy = torch.sqrt(rot_mats[:, 0, 0] * rot_mats[:, 0, 0] +
125 |                     rot_mats[:, 1, 0] * rot_mats[:, 1, 0])
126 |     return torch.atan2(-rot_mats[:, 2, 0], sy)
127 | 


--------------------------------------------------------------------------------
/deform/smplx_exavatar/vertex_ids.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from __future__ import print_function
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | 
21 | # Joint name to vertex mapping. SMPL/SMPL-H/SMPL-X vertices that correspond to
22 | # MSCOCO and OpenPose joints
23 | vertex_ids = {
24 |     'smplh': {
25 |         'nose':		    332,
26 |         'reye':		    6260,
27 |         'leye':		    2800,
28 |         'rear':		    4071,
29 |         'lear':		    583,
30 |         'rthumb':		6191,
31 |         'rindex':		5782,
32 |         'rmiddle':		5905,
33 |         'rring':		6016,
34 |         'rpinky':		6133,
35 |         'lthumb':		2746,
36 |         'lindex':		2319,
37 |         'lmiddle':		2445,
38 |         'lring':		2556,
39 |         'lpinky':		2673,
40 |         'LBigToe':		3216,
41 |         'LSmallToe':	3226,
42 |         'LHeel':		3387,
43 |         'RBigToe':		6617,
44 |         'RSmallToe':    6624,
45 |         'RHeel':		6787
46 |     },
47 |     'smplx': {
48 |         'nose':		    9120,
49 |         'reye':		    9929,
50 |         'leye':		    9448,
51 |         'rear':		    616,
52 |         'lear':		    6,
53 |         'rthumb':		8079,
54 |         'rindex':		7669,
55 |         'rmiddle':		7794,
56 |         'rring':		7905,
57 |         'rpinky':		8022,
58 |         'lthumb':		5361,
59 |         'lindex':		4933,
60 |         'lmiddle':		5058,
61 |         'lring':		5169,
62 |         'lpinky':		5286,
63 |         'LBigToe':		5770,
64 |         'LSmallToe':    5780,
65 |         'LHeel':		8846,
66 |         'RBigToe':		8463,
67 |         'RSmallToe': 	8474,
68 |         'RHeel':  		8635
69 |     },
70 |     'mano': {
71 |             'thumb':		744,
72 |             'index':		320,
73 |             'middle':		443,
74 |             'ring':		    554,
75 |             'pinky':		671,
76 |         }
77 | }
78 | 


--------------------------------------------------------------------------------
/deform/smplx_exavatar/vertex_joint_selector.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
 4 | # holder of all proprietary rights on this computer program.
 5 | # You can only use this computer program if you have closed
 6 | # a license agreement with MPG or you get the right to use the computer
 7 | # program from someone who is authorized to grant you that right.
 8 | # Any use of the computer program without a valid license is prohibited and
 9 | # liable to prosecution.
10 | #
11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
13 | # for Intelligent Systems. All rights reserved.
14 | #
15 | # Contact: ps-license@tuebingen.mpg.de
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import print_function
19 | from __future__ import division
20 | 
21 | import numpy as np
22 | 
23 | import torch
24 | import torch.nn as nn
25 | 
26 | from .utils import to_tensor
27 | 
28 | 
29 | class VertexJointSelector(nn.Module):
30 | 
31 |     def __init__(self, vertex_ids=None,
32 |                  use_hands=True,
33 |                  use_feet_keypoints=True, **kwargs):
34 |         super(VertexJointSelector, self).__init__()
35 | 
36 |         extra_joints_idxs = []
37 | 
38 |         face_keyp_idxs = np.array([
39 |             vertex_ids['nose'],
40 |             vertex_ids['reye'],
41 |             vertex_ids['leye'],
42 |             vertex_ids['rear'],
43 |             vertex_ids['lear']], dtype=np.int64)
44 | 
45 |         extra_joints_idxs = np.concatenate([extra_joints_idxs,
46 |                                             face_keyp_idxs])
47 | 
48 |         if use_feet_keypoints:
49 |             feet_keyp_idxs = np.array([vertex_ids['LBigToe'],
50 |                                        vertex_ids['LSmallToe'],
51 |                                        vertex_ids['LHeel'],
52 |                                        vertex_ids['RBigToe'],
53 |                                        vertex_ids['RSmallToe'],
54 |                                        vertex_ids['RHeel']], dtype=np.int32)
55 | 
56 |             extra_joints_idxs = np.concatenate(
57 |                 [extra_joints_idxs, feet_keyp_idxs])
58 | 
59 |         if use_hands:
60 |             self.tip_names = ['thumb', 'index', 'middle', 'ring', 'pinky']
61 | 
62 |             tips_idxs = []
63 |             for hand_id in ['l', 'r']:
64 |                 for tip_name in self.tip_names:
65 |                     tips_idxs.append(vertex_ids[hand_id + tip_name])
66 | 
67 |             extra_joints_idxs = np.concatenate(
68 |                 [extra_joints_idxs, tips_idxs])
69 | 
70 |         self.register_buffer('extra_joints_idxs',
71 |                              to_tensor(extra_joints_idxs, dtype=torch.long))
72 | 
73 |     def forward(self, vertices, joints):
74 |         extra_joints = torch.index_select(vertices, 1, self.extra_joints_idxs.to(torch.long)) #The '.to(torch.long)'.
75 |                                                                                             # added to make the trace work in c++,
76 |                                                                                             # otherwise you get a runtime error in c++:
77 |                                                                                             # 'index_select(): Expected dtype int32 or int64 for index'
78 |         joints = torch.cat([joints, extra_joints], dim=1)
79 | 
80 |         return joints
81 | 


--------------------------------------------------------------------------------
/denoiser/__pycache__/denoiser.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/denoiser/__pycache__/denoiser.cpython-38.pyc


--------------------------------------------------------------------------------
/denoiser/denoiser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | import numpy as np
 5 | import math
 6 | 
 7 | from render import util
 8 | if "TWOSIDED_TEXTURE" not in os.environ or os.environ["TWOSIDED_TEXTURE"] == "True":
 9 | 	from render import optixutils as ou
10 | else:
11 | 	from render import optixutils_single_sided as ou
12 | 
13 | 
14 | ###############################################################################
15 | # Bilateral denoiser
16 | #
17 | # Loosely based on SVGF, but removing temporal components and variance stopping guides.
18 | # https://research.nvidia.com/publication/2017-07_spatiotemporal-variance-guided-filtering-real-time-reconstruction-path-traced
19 | ###############################################################################
20 | 
21 | class BilateralDenoiser(torch.nn.Module):
22 | 	def __init__(self, influence=1.0):
23 | 		super(BilateralDenoiser, self).__init__()
24 | 		self.set_influence(influence)
25 | 
26 | 	def set_influence(self, factor):
27 | 		self.sigma = max(factor * 2, 0.0001)
28 | 		self.variance = self.sigma**2.
29 | 		self.N = 2 * math.ceil(self.sigma * 2.5) + 1
30 | 
31 | 	def forward(self, input):
32 | 		col    = input[..., 0:3]
33 | 		nrm    = util.safe_normalize(input[..., 3:6]) # Bent normals can produce normals of length < 1 here
34 | 		zdz    = input[..., 6:8]
35 | 		return ou.bilateral_denoiser(col, nrm, zdz, self.sigma)
36 | 


--------------------------------------------------------------------------------
/figs/pipe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/figs/pipe.jpg


--------------------------------------------------------------------------------
/geometry/__pycache__/embedding.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/embedding.cpython-38.pyc


--------------------------------------------------------------------------------
/geometry/__pycache__/gshell_tets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/gshell_tets.cpython-38.pyc


--------------------------------------------------------------------------------
/geometry/__pycache__/hmsdf.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/hmsdf.cpython-38.pyc


--------------------------------------------------------------------------------
/geometry/__pycache__/hmsdf_tets_split.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/hmsdf_tets_split.cpython-38.pyc


--------------------------------------------------------------------------------
/geometry/__pycache__/mlp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/geometry/__pycache__/mlp.cpython-38.pyc


--------------------------------------------------------------------------------
/geometry/embedding.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class Embedding(nn.Module):
 5 |     def __init__(self, in_channels, N_freqs, logscale=True):
 6 |         """
 7 |         Defines a function that embeds x to (x, sin(2^k x), cos(2^k x), ...)
 8 |         in_channels: number of input channels (3 for both xyz and direction)
 9 |         """
10 |         super(Embedding, self).__init__()
11 |         self.N_freqs = N_freqs
12 |         self.in_channels = in_channels
13 |         self.funcs = [torch.sin, torch.cos]
14 |         self.out_channels = in_channels*(len(self.funcs)*N_freqs+1)
15 | 
16 |         if logscale:
17 |             self.freq_bands = 2**torch.linspace(0, N_freqs-1, N_freqs)
18 |         else:
19 |             self.freq_bands = torch.linspace(1, 2**(N_freqs-1), N_freqs)
20 | 
21 |     def forward(self, x):
22 |         """
23 |         Embeds x to (x, sin(2^k x), cos(2^k x), ...) 
24 |         Different from the paper, "x" is also in the output
25 |         See https://github.com/bmild/nerf/issues/12
26 | 
27 |         Inputs:
28 |             x: (B, self.in_channels)
29 | 
30 |         Outputs:
31 |             out: (B, self.out_channels)
32 |         """
33 |         out = [x]
34 |         for freq in self.freq_bands:
35 |             for func in self.funcs:
36 |                 out += [func(freq*x)]
37 | 
38 |         return torch.cat(out, -1)
39 | 
40 | 


--------------------------------------------------------------------------------
/lap_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | def compute_body_laplacian(self):
  5 |     edges_packed = self._body_edges.clone().detach()      
  6 |     body_verts = self.get_xyz[:self.body_verts_num]
  7 |     V = body_verts.shape[0]
  8 | 
  9 |     e0, e1 = edges_packed.unbind(1)
 10 | 
 11 |     idx01 = torch.stack([e0, e1], dim=1)  # (sum(E_n), 2)
 12 |     idx10 = torch.stack([e1, e0], dim=1)  # (sum(E_n), 2)
 13 |     idx = torch.cat([idx01, idx10], dim=0).t()  # (2, 2*sum(E_n))
 14 |     
 15 |     # First, we construct the adjacency matrix,
 16 |     # i.e. A[i, j] = 1 if (i,j) is an edge, or
 17 |     # A[e0, e1] = 1 &  A[e1, e0] = 1
 18 |     ones = torch.ones(idx.shape[1], dtype=torch.float32, device=self._xyz.device)
 19 |     A = torch.sparse.FloatTensor(idx, ones, (V, V))
 20 |     
 21 |     # the sum of i-th row of A gives the degree of the i-th vertex
 22 |     deg = torch.sparse.sum(A, dim=1).to_dense()
 23 | 
 24 |     # We construct the Laplacian matrix by adding the non diagonal values
 25 |     # i.e. L[i, j] = 1 ./ deg(i) if (i, j) is an edge
 26 |     deg0 = deg[e0]
 27 |     deg0 = torch.where(deg0 > 0.0, 1.0 / deg0, deg0)
 28 |     deg1 = deg[e1]
 29 |     deg1 = torch.where(deg1 > 0.0, 1.0 / deg1, deg1)
 30 |     val = torch.cat([deg0, deg1])
 31 |     L = torch.sparse.FloatTensor(idx, val, (V, V))
 32 | 
 33 |     # Then we add the diagonal values L[i, i] = -1.
 34 |     idx = torch.arange(V, device=self._xyz.device)
 35 |     idx = torch.stack([idx, idx], dim=0)
 36 |     ones = torch.ones(idx.shape[1], dtype=torch.float32, device=self._xyz.device)
 37 |     L -= torch.sparse.FloatTensor(idx, ones, (V, V))
 38 |     self.body_laplacian = L
 39 | 
 40 | def body_laplacian_loss(mesh):
 41 |         
 42 |     L = mesh.laplacian
 43 |     V = mesh.v_pos
 44 | 
 45 |     loss = L.mm(V)
 46 |     loss = loss.norm(dim=1)**2
 47 |     return loss.mean()
 48 | 
 49 | 
 50 | def body_normal_loss(mesh):
 51 | 
 52 | 
 53 |     # loss = 1 - torch.cosine_similarity(mesh.face_normals[mesh.connected_faces[:, 0]], mesh.face_normals[mesh.connected_faces[:, 1]], dim=1)
 54 |     
 55 |     return mesh.normal_consistency()
 56 | 
 57 | def find_edges(indices, remove_duplicates=True):
 58 |     # Extract the three edges (in terms of vertex indices) for each face 
 59 |     # edges_0 = [f0_e0, ..., fN_e0]
 60 |     # edges_1 = [f0_e1, ..., fN_e1]
 61 |     # edges_2 = [f0_e2, ..., fN_e2]
 62 |     edges_0 = torch.index_select(indices, 1, torch.tensor([0,1], device=indices.device))
 63 |     edges_1 = torch.index_select(indices, 1, torch.tensor([1,2], device=indices.device))
 64 |     edges_2 = torch.index_select(indices, 1, torch.tensor([2,0], device=indices.device))
 65 | 
 66 |     # Merge the into one tensor so that the three edges of one face appear sequentially
 67 |     # edges = [f0_e0, f0_e1, f0_e2, ..., fN_e0, fN_e1, fN_e2]
 68 |     edges = torch.cat([edges_0, edges_1, edges_2], dim=1).view(indices.shape[0] * 3, -1)
 69 | 
 70 |     if remove_duplicates:
 71 |         edges, _ = torch.sort(edges, dim=1)
 72 |         edges = torch.unique(edges, dim=0)
 73 | 
 74 |     return edges
 75 | 
 76 | def find_connected_faces(indices):
 77 |     edges = find_edges(indices, remove_duplicates=False)
 78 | 
 79 |     # Make sure that two edges that share the same vertices have the vertex ids appear in the same order
 80 |     edges, _ = torch.sort(edges, dim=1)
 81 | 
 82 |     # Now find edges that share the same vertices and make sure there are only manifold edges
 83 |     _, inverse_indices, counts = torch.unique(edges, dim=0, sorted=False, return_inverse=True, return_counts=True)
 84 | 
 85 |     # print("counts.max():", counts.max())
 86 |     assert counts.max() == 2
 87 | 
 88 |     # We now create a tensor that contains corresponding faces.
 89 |     # If the faces with ids fi and fj share the same edge, the tensor contains them as
 90 |     # [..., [fi, fj], ...]
 91 |     face_ids = torch.arange(indices.shape[0])               
 92 |     face_ids = torch.repeat_interleave(face_ids, 3, dim=0) # Tensor with the face id for each edge
 93 | 
 94 |     face_correspondences = torch.zeros((counts.shape[0], 2), dtype=torch.int64)
 95 |     face_correspondences_indices = torch.zeros(counts.shape[0], dtype=torch.int64)
 96 | 
 97 |     # ei = edge index
 98 |     for ei, ei_unique in enumerate(list(inverse_indices.cpu().numpy())):
 99 |         face_correspondences[ei_unique, face_correspondences_indices[ei_unique]] = face_ids[ei] 
100 |         face_correspondences_indices[ei_unique] += 1
101 | 
102 |     face_correspondences = face_correspondences.cuda()
103 | 
104 |     return face_correspondences[counts == 2].to(device=indices.device), edges
105 | 


--------------------------------------------------------------------------------
/render/__pycache__/light.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/light.cpython-38.pyc


--------------------------------------------------------------------------------
/render/__pycache__/material.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/material.cpython-38.pyc


--------------------------------------------------------------------------------
/render/__pycache__/mesh.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/mesh.cpython-38.pyc


--------------------------------------------------------------------------------
/render/__pycache__/mlptexture.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/mlptexture.cpython-38.pyc


--------------------------------------------------------------------------------
/render/__pycache__/obj.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/obj.cpython-38.pyc


--------------------------------------------------------------------------------
/render/__pycache__/regularizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/regularizer.cpython-38.pyc


--------------------------------------------------------------------------------
/render/__pycache__/render.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/render.cpython-38.pyc


--------------------------------------------------------------------------------
/render/__pycache__/render_mask.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/render_mask.cpython-38.pyc


--------------------------------------------------------------------------------
/render/__pycache__/texture.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/texture.cpython-38.pyc


--------------------------------------------------------------------------------
/render/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/__pycache__/util.cpython-38.pyc


--------------------------------------------------------------------------------
/render/light.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import numpy as np
 11 | import torch
 12 | import nvdiffrast.torch as dr
 13 | 
 14 | from . import util
 15 | from . import renderutils as ru
 16 | 
 17 | ######################################################################################
 18 | # Monte-carlo sampled environment light with PDF / CDF computation
 19 | ######################################################################################
 20 | 
 21 | class EnvironmentLight:
 22 |     LIGHT_MIN_RES = 16
 23 | 
 24 |     MIN_ROUGHNESS = 0.08
 25 |     MAX_ROUGHNESS = 0.5
 26 | 
 27 |     def __init__(self, base):
 28 |         self.mtx = None
 29 |         self.base = base
 30 | 
 31 |         self.pdf_scale = (self.base.shape[0] * self.base.shape[1]) / (2 * np.pi * np.pi)
 32 |         self.update_pdf()
 33 | 
 34 |     def xfm(self, mtx):
 35 |         self.mtx = mtx
 36 | 
 37 |     def parameters(self):
 38 |         return [self.base]
 39 | 
 40 |     def clone(self):
 41 |         return EnvironmentLight(self.base.clone().detach())
 42 | 
 43 |     def clamp_(self, min=None, max=None):
 44 |         self.base.clamp_(min, max)
 45 | 
 46 |     def update_pdf(self):
 47 |         with torch.no_grad():
 48 |             # Compute PDF
 49 |             Y = util.pixel_grid(self.base.shape[1], self.base.shape[0])[..., 1]
 50 |             self._pdf = torch.max(self.base, dim=-1)[0] * torch.sin(Y * np.pi) # Scale by sin(theta) for lat-long, https://cs184.eecs.berkeley.edu/sp18/article/25
 51 |             self._pdf = self._pdf / torch.sum(self._pdf)
 52 | 
 53 |             # Compute cumulative sums over the columns and rows
 54 |             self.cols = torch.cumsum(self._pdf, dim=1)
 55 |             self.rows = torch.cumsum(self.cols[:, -1:].repeat([1, self.cols.shape[1]]), dim=0)
 56 | 
 57 |             # Normalize
 58 |             self.cols = self.cols / torch.where(self.cols[:, -1:] > 0, self.cols[:, -1:], torch.ones_like(self.cols))
 59 |             self.rows = self.rows / torch.where(self.rows[-1:, :] > 0, self.rows[-1:, :], torch.ones_like(self.rows))
 60 | 
 61 |     @torch.no_grad()
 62 |     def generate_image(self, res):
 63 |         texcoord = util.pixel_grid(res[1], res[0])
 64 |         return dr.texture(self.base[None, ...].contiguous(), texcoord[None, ...].contiguous(), filter_mode='linear')[0]
 65 | 
 66 | ######################################################################################
 67 | # Load and store
 68 | ######################################################################################
 69 | 
 70 | @torch.no_grad()
 71 | def _load_env_hdr(fn, scale=1.0, res=None, trainable=False):
 72 |     latlong_img = torch.tensor(util.load_image(fn), dtype=torch.float32, device='cuda')*scale
 73 | 
 74 |     if res is not None:
 75 |         texcoord = util.pixel_grid(res[1], res[0])
 76 |         latlong_img = torch.clamp(dr.texture(latlong_img[None, ...], texcoord[None, ...], filter_mode='linear')[0], min=0.0001)
 77 | 
 78 |     print("EnvProbe,", latlong_img.shape, ", min/max", torch.min(latlong_img).item(), torch.max(latlong_img).item())
 79 |     if trainable:
 80 |         print("trainable light loaded")
 81 |         return EnvironmentLight(base=latlong_img.clone().detach().requires_grad_(True))
 82 |     else:
 83 |         return EnvironmentLight(base=latlong_img)
 84 | 
 85 | @torch.no_grad()
 86 | def load_env(fn, scale=1.0, res=None, trainable=False):
 87 |     if os.path.splitext(fn)[1].lower() == ".hdr":
 88 |         return _load_env_hdr(fn, scale, res, trainable=trainable)
 89 |     else:
 90 |         assert False, "Unknown envlight extension %s" % os.path.splitext(fn)[1]
 91 | 
 92 | @torch.no_grad()
 93 | def save_env_map(fn, light):
 94 |     assert isinstance(light, EnvironmentLight)
 95 |     color = light.generate_image([512, 1024])
 96 |     util.save_image_raw(fn, color.detach().cpu().numpy())
 97 | 
 98 | ######################################################################################
 99 | # Create trainable with random initialization
100 | ######################################################################################
101 | 
102 | def create_trainable_env_rnd(base_res, scale=0.5, bias=0.25):  
103 |     base = torch.rand(base_res, base_res, 3, dtype=torch.float32, device='cuda') * scale + bias
104 |     l = EnvironmentLight(base.clone().detach().requires_grad_(True))
105 |     return l
106 |     


--------------------------------------------------------------------------------
/render/material.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | from . import util
 14 | from . import texture
 15 | from . import mlptexture
 16 | 
 17 | ######################################################################################
 18 | # .mtl material format loading / storing
 19 | ######################################################################################
 20 | 
 21 | def load_mtl(fn, clear_ks=True):
 22 |     import re
 23 |     mtl_path = os.path.dirname(fn)
 24 | 
 25 |     # Read file
 26 |     with open(fn, 'r') as f:
 27 |         lines = f.readlines()
 28 | 
 29 |     # Parse materials
 30 |     materials = []
 31 |     for line in lines:
 32 |         split_line = re.split(' +|\t+|\n+', line.strip())
 33 |         prefix = split_line[0].lower()
 34 |         data = split_line[1:]
 35 |         if 'newmtl' in prefix:
 36 |             material = {'name' : data[0]}
 37 |             materials += [material]
 38 |         elif materials:
 39 |             if 'bsdf' in prefix or 'map_kd' in prefix or 'map_ks' in prefix or 'bump' in prefix:
 40 |                 material[prefix] = data[0]
 41 |             else:
 42 |                 material[prefix] = torch.tensor(tuple(float(d) for d in data), dtype=torch.float32, device='cuda')
 43 | 
 44 |     # Convert everything to textures. Our code expects 'kd' and 'ks' to be texture maps. So replace constants with 1x1 maps
 45 |     for mat in materials:
 46 |         if not 'bsdf' in mat:
 47 |             mat['bsdf'] = 'pbr'
 48 | 
 49 |         if 'map_kd' in mat:
 50 |             mat['kd'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_kd']))
 51 |         else:
 52 |             mat['kd'] = texture.Texture2D(mat['kd'])
 53 |         
 54 |         if 'map_ks' in mat:
 55 |             mat['ks'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_ks']), channels=3)
 56 |         else:
 57 |             mat['ks'] = texture.Texture2D(mat['ks'])
 58 | 
 59 |         if 'bump' in mat:
 60 |             mat['normal'] = texture.load_texture2D(os.path.join(mtl_path, mat['bump']), lambda_fn=lambda x: x * 2 - 1, channels=3)
 61 | 
 62 |         # Convert Kd from sRGB to linear RGB
 63 |         mat['kd'] = texture.srgb_to_rgb(mat['kd'])
 64 | 
 65 |         if clear_ks:
 66 |             # Override ORM occlusion (red) channel by zeros. We hijack this channel
 67 |             for mip in mat['ks'].getMips():
 68 |                 mip[..., 0] = 0.0 
 69 | 
 70 |     return materials
 71 | 
 72 | def save_mtl(fn, material):
 73 |     folder = os.path.dirname(fn)
 74 |     with open(fn, "w") as f:
 75 |         f.write('newmtl defaultMat\n')
 76 |         if material is not None:
 77 |             f.write('bsdf   %s\n' % material['bsdf'])
 78 |             if 'kd' in material.keys():
 79 |                 f.write('map_Kd texture_kd.png\n')
 80 |                 texture.save_texture2D(os.path.join(folder, 'texture_kd.png'), texture.rgb_to_srgb(material['kd']))
 81 |             if 'ks' in material.keys():
 82 |                 f.write('map_Ks texture_ks.png\n')
 83 |                 texture.save_texture2D(os.path.join(folder, 'texture_ks.png'), material['ks'])
 84 |             if 'normal' in material.keys():
 85 |                 f.write('bump texture_n.png\n')
 86 |                 texture.save_texture2D(os.path.join(folder, 'texture_n.png'), material['normal'], lambda_fn=lambda x:(util.safe_normalize(x)+1)*0.5)
 87 |         else:
 88 |             f.write('Kd 1 1 1\n')
 89 |             f.write('Ks 0 0 0\n')
 90 |             f.write('Ka 0 0 0\n')
 91 |             f.write('Tf 1 1 1\n')
 92 |             f.write('Ni 1\n')
 93 |             f.write('Ns 0\n')
 94 | 
 95 | ######################################################################################
 96 | # Utility function to convert an existing material and make all textures trainable
 97 | ######################################################################################
 98 | 
 99 | def create_trainable(material):
100 |     result = material.copy()
101 |     for key, val in result.items():
102 |         if isinstance(val, texture.Texture2D):
103 |             result[key] = texture.create_trainable(val)
104 |     return result
105 | 
106 | def get_parameters(material):
107 |     trainable = []
108 |     for key, val in material.items():
109 |         if isinstance(val, texture.Texture2D) or isinstance(val, mlptexture.MLPTexture3D):
110 |             trainable += val.parameters()
111 |     return trainable
112 | 
113 | ######################################################################################
114 | # Merge multiple materials into a single uber-material
115 | ######################################################################################
116 | 
117 | def _upscale_replicate(x, full_res):
118 |     x = x.permute(0, 3, 1, 2)
119 |     x = torch.nn.functional.pad(x, (0, full_res[1] - x.shape[3], 0, full_res[0] - x.shape[2]), 'replicate')
120 |     return x.permute(0, 2, 3, 1).contiguous()
121 | 
122 | def merge_materials(materials, texcoords, tfaces, mfaces):
123 |     assert len(materials) > 0
124 |     for mat in materials:
125 |         assert mat['bsdf'] == materials[0]['bsdf'], "All materials must have the same BSDF (uber shader)"
126 |         assert ('normal' in mat) is ('normal' in materials[0]), "All materials must have either normal map enabled or disabled"
127 | 
128 |     uber_material = {
129 |         'name' : 'uber_material',
130 |         'bsdf' : materials[0]['bsdf'],
131 |     }
132 | 
133 |     textures = ['kd', 'ks', 'normal']
134 | 
135 |     # Find maximum texture resolution across all materials and textures
136 |     max_res = None
137 |     for mat in materials:
138 |         for tex in textures:
139 |             tex_res = np.array(mat[tex].getRes()) if tex in mat else np.array([1, 1])
140 |             max_res = np.maximum(max_res, tex_res) if max_res is not None else tex_res
141 |     
142 |     # Compute size of compund texture and round up to nearest PoT
143 |     full_res = 2**np.ceil(np.log2(max_res * np.array([1, len(materials)]))).astype(np.int)
144 | 
145 |     # Normalize texture resolution across all materials & combine into a single large texture
146 |     for tex in textures:
147 |         if tex in materials[0]:
148 |             tex_data = torch.cat(tuple(util.scale_img_nhwc(mat[tex].data, tuple(max_res)) for mat in materials), dim=2) # Lay out all textures horizontally, NHWC so dim2 is x
149 |             tex_data = _upscale_replicate(tex_data, full_res)
150 |             uber_material[tex] = texture.Texture2D(tex_data)
151 | 
152 |     # Compute scaling values for used / unused texture area
153 |     s_coeff = [full_res[0] / max_res[0], full_res[1] / max_res[1]]
154 | 
155 |     # Recompute texture coordinates to cooincide with new composite texture
156 |     new_tverts = {}
157 |     new_tverts_data = []
158 |     for fi in range(len(tfaces)):
159 |         matIdx = mfaces[fi]
160 |         for vi in range(3):
161 |             ti = tfaces[fi][vi]
162 |             if not (ti in new_tverts):
163 |                 new_tverts[ti] = {}
164 |             if not (matIdx in new_tverts[ti]): # create new vertex
165 |                 new_tverts_data.append([(matIdx + texcoords[ti][0]) / s_coeff[1], texcoords[ti][1] / s_coeff[0]]) # Offset texture coodrinate (x direction) by material id & scale to local space. Note, texcoords are (u,v) but texture is stored (w,h) so the indexes swap here
166 |                 new_tverts[ti][matIdx] = len(new_tverts_data) - 1
167 |             tfaces[fi][vi] = new_tverts[ti][matIdx] # reindex vertex
168 | 
169 |     return uber_material, new_tverts_data, tfaces
170 | 


--------------------------------------------------------------------------------
/render/mlptexture.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
  2 | #
  3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | # property and proprietary rights in and to this material, related
  5 | # documentation and any modifications thereto. Any use, reproduction, 
  6 | # disclosure or distribution of this material and related documentation 
  7 | # without an express license agreement from NVIDIA CORPORATION or 
  8 | # its affiliates is strictly prohibited.
  9 | 
 10 | import torch
 11 | import tinycudann as tcnn
 12 | import numpy as np
 13 | 
 14 | #######################################################################################################################################################
 15 | # Small MLP using PyTorch primitives, internal helper class
 16 | #######################################################################################################################################################
 17 | 
 18 | class _MLP(torch.nn.Module):
 19 |     def __init__(self, cfg, loss_scale=1.0):
 20 |         super(_MLP, self).__init__()
 21 |         self.loss_scale = loss_scale
 22 |         net = (torch.nn.Linear(cfg['n_input_dims'], cfg['n_neurons'], bias=False), torch.nn.ReLU())
 23 |         for i in range(cfg['n_hidden_layers']-1):
 24 |             net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_neurons'], bias=False), torch.nn.ReLU())
 25 |         net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_output_dims'], bias=False),)
 26 |         self.net = torch.nn.Sequential(*net).cuda()
 27 |         
 28 |         self.net.apply(self._init_weights)
 29 |         
 30 |         if self.loss_scale != 1.0:
 31 |             self.net.register_full_backward_hook(lambda module, grad_i, grad_o: (grad_i[0] * self.loss_scale, ))
 32 | 
 33 |     def forward(self, x):
 34 |         return self.net(x.to(torch.float32))
 35 | 
 36 |     @staticmethod
 37 |     def _init_weights(m):
 38 |         if type(m) == torch.nn.Linear:
 39 |             torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
 40 |             if hasattr(m.bias, 'data'):
 41 |                 m.bias.data.fill_(0.0)
 42 | 
 43 | ############################################
 44 | 
 45 | 
 46 | 
 47 | #######################################################################################################################################################
 48 | # Outward visible MLP class
 49 | #######################################################################################################################################################
 50 | 
 51 | class MLPTexture3D(torch.nn.Module):
 52 |     def __init__(self, AABB, channels = 3, internal_dims = 32, hidden = 2, min_max = None, use_float16=False):
 53 |         super(MLPTexture3D, self).__init__()
 54 | 
 55 |         self.channels = channels
 56 |         self.internal_dims = internal_dims
 57 |         self.AABB = AABB
 58 |         self.min_max = min_max
 59 |         self.use_float16 = use_float16
 60 | 
 61 |         # Setup positional encoding, see https://github.com/NVlabs/tiny-cuda-nn for details
 62 |         desired_resolution = 4096
 63 |         base_grid_resolution = 16
 64 |         num_levels = 16
 65 |         per_level_scale = np.exp(np.log(desired_resolution / base_grid_resolution) / (num_levels-1))
 66 | 
 67 | 
 68 |         enc_cfg =  {
 69 |             "otype": "HashGrid",
 70 |             "n_levels": 5, # 16
 71 |             "n_features_per_level": 2, #2
 72 |             "log2_hashmap_size": 21, # 21
 73 |             "base_resolution": base_grid_resolution, # 16
 74 |             "per_level_scale" : per_level_scale # 1.4472692374403782
 75 | 	    }
 76 | 
 77 | 
 78 |         gradient_scaling = 128.0
 79 |         self.encoder = tcnn.Encoding(3, enc_cfg)
 80 | 
 81 |         # Setup MLP
 82 |         mlp_cfg = {
 83 |             "n_input_dims" : self.encoder.n_output_dims,
 84 |             "n_output_dims" : self.channels,
 85 |             "n_hidden_layers" : hidden,
 86 |             "n_neurons" : self.internal_dims
 87 |         }
 88 |         self.net = _MLP(mlp_cfg, gradient_scaling)
 89 |         print("Encoder output: %d dims" % (self.encoder.n_output_dims))
 90 | 
 91 |     def sample(self, texc, frame_id):
 92 | 
 93 |         ###################################
 94 |         bbox = torch.tensor([0.6, 0.6, 0.2]).cuda(), torch.tensor([-0.8, -1.2, -0.2]).cuda()
 95 |         _texc = (texc.view(-1, 3) - bbox[0][None, ...]) / (bbox[1][None, ...] - bbox[0][None, ...])
 96 |         _texc = torch.clamp(_texc, min=0, max=1)
 97 |         
 98 |         p_enc = self.encoder(_texc.contiguous())
 99 | 
100 |         with torch.autocast('cuda', dtype=torch.float16, enabled=self.use_float16):
101 |             out = self.net.forward(p_enc)
102 | 
103 |         # Sigmoid limit and scale to the allowed range
104 |         out = torch.sigmoid(out) * (self.min_max[1][None, :] - self.min_max[0][None, :]) + self.min_max[0][None, :]
105 | 
106 | 
107 |         return out.view(*texc.shape[:-1], self.channels) # Remap to [n, h, w, c]
108 | 
109 | 
110 |     # In-place clamp with no derivative to make sure values are in valid range after training
111 |     def clamp_(self):
112 |         pass
113 | 
114 |     def cleanup(self):
115 |         tcnn.free_temporary_memory()
116 | 
117 | 
118 | 
119 | class MeshTexture3D(torch.nn.Module):
120 |     def __init__(self, v):
121 |         pass
122 | 
123 |     def __get_load_Texture3d(self, mesh, FLAGS):
124 | 
125 |         kd_min, kd_max = torch.tensor(FLAGS.kd_min, dtype=torch.float32, device='cuda'), torch.tensor(FLAGS.kd_max, dtype=torch.float32, device='cuda')
126 |         ks_min, ks_max = torch.tensor(FLAGS.ks_min, dtype=torch.float32, device='cuda'), torch.tensor(FLAGS.ks_max, dtype=torch.float32, device='cuda')
127 | 
128 |         mlp_min = torch.cat((kd_min[0:3], ks_min), dim=0)
129 |         mlp_max = torch.cat((kd_max[0:3], ks_max), dim=0)
130 |         
131 |         self.mlp_material = MLPTexture3D(mesh.getAABB(), channels=6, min_max=[mlp_min, mlp_max], use_float16=FLAGS.use_float16)
132 | 
133 |         
134 | 
135 | 


--------------------------------------------------------------------------------
/render/optixutils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto. Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | from .ops import OptiXContext, optix_build_bvh, optix_env_shade, bilateral_denoiser
10 | __all__ = ["OptiXContext", "optix_build_bvh", "optix_env_shade", 'bilateral_denoiser']
11 | 


--------------------------------------------------------------------------------
/render/optixutils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/render/optixutils/__pycache__/ops.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/__pycache__/ops.cpython-38.pyc


--------------------------------------------------------------------------------
/render/optixutils/build/build.ninja:
--------------------------------------------------------------------------------
 1 | ninja_required_version = 1.3
 2 | cxx = c++
 3 | nvcc = /usr/local/cuda/bin/nvcc
 4 | 
 5 | cflags = -DTORCH_EXTENSION_NAME=optixutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/nas_data/chh/D3Human_main/render/optixutils/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -DNVDR_TORCH
 6 | post_cflags = 
 7 | cuda_cflags = -DTORCH_EXTENSION_NAME=optixutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/nas_data/chh/D3Human_main/render/optixutils/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -DNVDR_TORCH -std=c++14
 8 | cuda_post_cflags = 
 9 | cuda_dlink_post_cflags = 
10 | ldflags = -shared -lcuda -lnvrtc -L/home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart
11 | 
12 | rule compile
13 |   command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
14 |   depfile = $out.d
15 |   deps = gcc
16 | 
17 | rule cuda_compile
18 |   depfile = $out.d
19 |   deps = gcc
20 |   command = $nvcc  $cuda_cflags -c $in -o $out $cuda_post_cflags
21 | 
22 | 
23 | 
24 | rule link
25 |   command = $cxx $in $ldflags -o $out
26 | 
27 | build denoising.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/optixutils/c_src/denoising.cu
28 | build optix_wrapper.o: compile /nas_data/chh/D3Human_main/render/optixutils/c_src/optix_wrapper.cpp
29 | build torch_bindings.o: compile /nas_data/chh/D3Human_main/render/optixutils/c_src/torch_bindings.cpp
30 | 
31 | 
32 | 
33 | build optixutils_plugin.so: link denoising.cuda.o optix_wrapper.o torch_bindings.o
34 | 
35 | default optixutils_plugin.so
36 | 
37 | 


--------------------------------------------------------------------------------
/render/optixutils/build/denoising.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/denoising.cuda.o


--------------------------------------------------------------------------------
/render/optixutils/build/optix_wrapper.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/optix_wrapper.o


--------------------------------------------------------------------------------
/render/optixutils/build/optixutils_plugin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/optixutils_plugin.so


--------------------------------------------------------------------------------
/render/optixutils/build/torch_bindings.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/optixutils/build/torch_bindings.o


--------------------------------------------------------------------------------
/render/optixutils/c_src/common.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | 
11 | // Helper functions to do broadcast guarded fetches
12 | #if defined(__CUDACC__)
13 |     template<class T, typename U, typename... Args>
14 |     static __device__ inline float3 fetch3(const T &tensor, U idx, Args... args) {
15 |     return tensor.size(0) == 1 ? fetch3(tensor[0], args...) : fetch3(tensor[idx], args...);
16 |     }
17 |     template<class T> static __device__ inline float3 fetch3(const T &tensor) {
18 |     return tensor.size(0) == 1 ? make_float3(tensor[0], tensor[0], tensor[0]) : make_float3(tensor[0], tensor[1], tensor[2]);
19 |     }
20 | 
21 |     template<class T, typename U, typename... Args>
22 |     static __device__ inline float2 fetch2(const T &tensor, U idx, Args... args) {
23 |     return tensor.size(0) == 1 ? fetch2(tensor[0], args...) : fetch2(tensor[idx], args...);
24 |     }
25 |     template<class T> static __device__ inline float2 fetch2(const T &tensor) {
26 |     return tensor.size(0) == 1 ? make_float2(tensor[0], tensor[0]) : make_float2(tensor[0], tensor[1]);
27 |     }
28 | 
29 |     #include "math_utils.h"
30 |     #include "bsdf.h"
31 | #endif
32 | 
33 | //------------------------------------------------------------------------------
34 | // CUDA error-checking macros
35 | //------------------------------------------------------------------------------
36 | 
37 | #define CUDA_CHECK( call )                                                     \
38 |     do                                                                         \
39 |     {                                                                          \
40 |         cudaError_t error = call;                                              \
41 |         if( error != cudaSuccess )                                             \
42 |         {                                                                      \
43 |             std::stringstream ss;                                              \
44 |             ss << "CUDA call (" << #call << " ) failed with error: '"          \
45 |                << cudaGetErrorString( error )                                  \
46 |                << "' (" __FILE__ << ":" << __LINE__ << ")\n";                  \
47 |         }                                                                      \
48 |     } while( 0 )
49 | 
50 | 
51 | #define OPTIX_CHECK( call )                                                    \
52 |     do                                                                         \
53 |     {                                                                          \
54 |         OptixResult res = call;                                                \
55 |         if( res != OPTIX_SUCCESS )                                             \
56 |         {                                                                      \
57 |             std::stringstream ss;                                              \
58 |             ss << "Optix call '" << #call << "' failed: " __FILE__ ":"         \
59 |                << __LINE__ << ")\n";                                           \
60 |         }                                                                      \
61 |     } while( 0 )
62 | 
63 | #define OPTIX_CHECK_LOG( call )                                                \
64 |     do                                                                         \
65 |     {                                                                          \
66 |         OptixResult res = call;                                                \
67 |         const size_t sizeof_log_returned = sizeof_log;                         \
68 |         sizeof_log = sizeof( log ); /* reset sizeof_log for future calls */    \
69 |         if( res != OPTIX_SUCCESS )                                             \
70 |         {                                                                      \
71 |             std::stringstream ss;                                              \
72 |             ss << "Optix call '" << #call << "' failed: " __FILE__ ":"         \
73 |                << __LINE__ << ")\nLog:\n" << log                               \
74 |                << ( sizeof_log_returned > sizeof( log ) ? "<TRUNCATED>" : "" ) \
75 |                << "\n";                                                        \
76 |         }                                                                      \
77 |     } while( 0 )
78 | 
79 | #define NVRTC_CHECK_ERROR( func )                                                                                           \
80 |     do                                                                                                                      \
81 |     {                                                                                                                       \
82 |         nvrtcResult code = func;                                                                                            \
83 |         if( code != NVRTC_SUCCESS )                                                                                         \
84 |             throw std::runtime_error( "ERROR: " __FILE__ "(): " + std::string( nvrtcGetErrorString( code ) ) );             \
85 |     } while( 0 )
86 | 


--------------------------------------------------------------------------------
/render/optixutils/c_src/denoising.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
  2 | //
  3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | // and proprietary rights in and to this software, related documentation
  5 | // and any modifications thereto. Any use, reproduction, disclosure or
  6 | // distribution of this software and related documentation without an express
  7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | #include "common.h"
 10 | #include "denoising.h"
 11 | 
 12 | #define FLT_EPS 0.0001f
 13 | 
 14 | __global__ void bilateral_denoiser_fwd_kernel(BilateralDenoiserParams params)
 15 | {
 16 |     uint3 idx = make_uint3(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z);
 17 | 
 18 |     if (idx.z >= params.col.size(0) || idx.y >= params.col.size(1) || idx.x >= params.col.size(2))
 19 |         return;
 20 | 
 21 |     // Fetch central tap
 22 |     float3 c_nrm = fetch3(params.nrm, idx.z, idx.y, idx.x);
 23 |     float2 c_zdz = fetch2(params.zdz, idx.z, idx.y, idx.x);
 24 | 
 25 |     float variance = params.sigma * params.sigma;
 26 |     int filter_rad = 2 * ceil(params.sigma * 2.5) + 1;
 27 | 
 28 |     float accum_w = 0.0f;
 29 |     float3 accum_col = make_float3(0.0f);
 30 |     for (int32_t fy = -filter_rad; fy <= filter_rad; ++fy)
 31 |     {
 32 |         for (int32_t fx = -filter_rad; fx <= filter_rad; ++fx)
 33 |         {
 34 |             // Compute tap coordinates, used for input activations and bilateral guides
 35 |             int32_t y = idx.y + fy;
 36 |             int32_t x = idx.x + fx;
 37 | 
 38 |             if (y < 0 || x < 0 || y >= params.col.size(1) || x >= params.col.size(2))
 39 |                 continue;
 40 | 
 41 |             // Fetch current tap
 42 |             float3 t_col = fetch3(params.col, idx.z, y, x);
 43 |             float3 t_nrm = fetch3(params.nrm, idx.z, y, x);
 44 |             float2 t_zdz = fetch2(params.zdz, idx.z, y, x);
 45 | 
 46 |             /////////////////////////////////////////////////////////
 47 |             // Compute bilateral weight
 48 |             /////////////////////////////////////////////////////////
 49 | 
 50 |             // Distance
 51 |             float dist_sqr = fx * fx + fy * fy;
 52 |             float dist = sqrtf(dist_sqr);
 53 |             float w_xy = expf(-dist_sqr / (2.0f * variance));
 54 | 
 55 |             // Normal
 56 |             float w_normal = powf(min(max(dot(t_nrm, c_nrm), FLT_EPS), 1.0f), 128.0f);
 57 | 
 58 |             // Depth
 59 |             float w_depth = expf(-(abs(t_zdz.x - c_zdz.x) / max(c_zdz.y * dist, FLT_EPS)));
 60 | 
 61 |             float w = w_xy * w_normal * w_depth;
 62 | 
 63 |             accum_col = accum_col + t_col * w;
 64 |             accum_w += w;
 65 |         }
 66 |     }
 67 | 
 68 |     params.out[idx.z][idx.y][idx.x][0] = accum_col.x;
 69 |     params.out[idx.z][idx.y][idx.x][1] = accum_col.y;
 70 |     params.out[idx.z][idx.y][idx.x][2] = accum_col.z;
 71 |     params.out[idx.z][idx.y][idx.x][3] = max(accum_w, 0.0001f);
 72 | }
 73 | 
 74 | __global__ void bilateral_denoiser_bwd_kernel(BilateralDenoiserParams params)
 75 | {
 76 |     uint3 idx = make_uint3(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z);
 77 | 
 78 |     if (idx.z >= params.col.size(0) || idx.y >= params.col.size(1) || idx.x >= params.col.size(2))
 79 |         return;
 80 | 
 81 |     // Fetch central tap
 82 |     float3 c_nrm = fetch3(params.nrm, idx.z, idx.y, idx.x);
 83 |     float2 c_zdz = fetch2(params.zdz, idx.z, idx.y, idx.x);
 84 | 
 85 |     float variance = params.sigma * params.sigma;
 86 |     int filter_rad = 2 * ceil(params.sigma * 2.5) + 1;
 87 | 
 88 |     float3 accum_grad = make_float3(0.0f);
 89 |     for (int32_t fy = -filter_rad; fy <= filter_rad; ++fy)
 90 |     {
 91 |         for (int32_t fx = -filter_rad; fx <= filter_rad; ++fx)
 92 |         {
 93 |             // Compute tap coordinates, used for input activations and bilateral guides
 94 |             int32_t y = idx.y + fy;
 95 |             int32_t x = idx.x + fx;
 96 | 
 97 |             if (y < 0 || x < 0 || y >= params.col.size(1) || x >= params.col.size(2))
 98 |                 continue;
 99 | 
100 |             // Fetch current tap
101 |             float3 t_col = fetch3(params.col, idx.z, y, x);
102 |             float3 t_nrm = fetch3(params.nrm, idx.z, y, x);
103 |             float2 t_zdz = fetch2(params.zdz, idx.z, y, x);
104 | 
105 |             /////////////////////////////////////////////////////////
106 |             // Compute bilateral weight
107 |             /////////////////////////////////////////////////////////
108 | 
109 |             // Distance, transposing fx & fy doesn't affect distance
110 |             float dist_sqr = fx * fx + fy * fy;
111 |             float dist = sqrtf(dist_sqr);
112 |             float w_xy = expf(-dist_sqr / (2.0f * variance));
113 | 
114 |             // Normal, transpose c_ and t_ (it's symmetric so doesn't matter)
115 |             float w_normal = powf(min(max(dot(t_nrm, c_nrm), FLT_EPS), 1.0f), 128.0f);
116 | 
117 |             // Depth, transpose c_ and t_ (matters for the denominator)
118 |             float w_depth = expf(-(abs(t_zdz.x - c_zdz.x) / max(t_zdz.y * dist, FLT_EPS)));
119 | 
120 |             float w = w_xy * w_normal * w_depth;
121 | 
122 |             float3 t_col_grad = w * fetch3(params.out_grad, idx.z, y, x);
123 |             accum_grad += t_col_grad;
124 |         }
125 |     }
126 | 
127 |     params.col_grad[idx.z][idx.y][idx.x][0] = accum_grad.x;
128 |     params.col_grad[idx.z][idx.y][idx.x][1] = accum_grad.y;
129 |     params.col_grad[idx.z][idx.y][idx.x][2] = accum_grad.z;
130 | }
131 | 


--------------------------------------------------------------------------------
/render/optixutils/c_src/denoising.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | #include "accessor.h"
11 | 
12 | struct BilateralDenoiserParams
13 | {
14 |     PackedTensorAccessor32<float, 4> col;
15 |     PackedTensorAccessor32<float, 4> col_grad;  
16 |     PackedTensorAccessor32<float, 4> nrm;
17 |     PackedTensorAccessor32<float, 4> zdz;
18 |     PackedTensorAccessor32<float, 4> out;
19 |     PackedTensorAccessor32<float, 4> out_grad;
20 |     float sigma;
21 | };
22 | 


--------------------------------------------------------------------------------
/render/optixutils/c_src/envsampling/params.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #include "../accessor.h"
10 | 
11 | struct EnvSamplingParams
12 | {
13 |     // Ray data
14 |     PackedTensorAccessor32<float, 4>    ro;             // ray origin
15 |     
16 |     // GBuffer
17 |     PackedTensorAccessor32<float, 3>    mask;
18 |     PackedTensorAccessor32<float, 4>    gb_pos;
19 |     PackedTensorAccessor32<float, 4>    gb_pos_grad;
20 |     PackedTensorAccessor32<float, 4>    gb_normal;
21 |     PackedTensorAccessor32<float, 4>    gb_normal_grad;
22 |     PackedTensorAccessor32<float, 4>    gb_view_pos;
23 |     PackedTensorAccessor32<float, 4>    gb_kd;
24 |     PackedTensorAccessor32<float, 4>    gb_kd_grad;
25 |     PackedTensorAccessor32<float, 4>    gb_ks;
26 |     PackedTensorAccessor32<float, 4>    gb_ks_grad;
27 |     
28 |     // Light
29 |     PackedTensorAccessor32<float, 3>    light;
30 |     PackedTensorAccessor32<float, 3>    light_grad;
31 |     PackedTensorAccessor32<float, 2>    pdf;        // light pdf
32 |     PackedTensorAccessor32<float, 1>    rows;       // light sampling cdf
33 |     PackedTensorAccessor32<float, 2>    cols;       // light sampling cdf
34 | 
35 |     // Output
36 |     PackedTensorAccessor32<float, 4>    diff;
37 |     PackedTensorAccessor32<float, 4>    diff_grad;
38 |     PackedTensorAccessor32<float, 4>    spec;
39 |     PackedTensorAccessor32<float, 4>    spec_grad;
40 | 
41 |     // Table with random permutations for stratified sampling
42 |     PackedTensorAccessor32<int, 2>      perms;
43 | 
44 |     OptixTraversableHandle              handle;
45 |     unsigned int                        BSDF;
46 |     unsigned int                        n_samples_x;
47 |     unsigned int                        rnd_seed;
48 |     unsigned int                        backward;
49 |     float                               shadow_scale;
50 | };


--------------------------------------------------------------------------------
/render/optixutils/c_src/optix_wrapper.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | 
11 | #include <optix.h>
12 | #include <string>
13 | 
14 | //------------------------------------------------------------------------
15 | // Python OptiX state wrapper.
16 | 
17 | struct OptiXState
18 | {
19 |     OptixDeviceContext context;
20 |     OptixTraversableHandle gas_handle;
21 |     CUdeviceptr            d_gas_output_buffer;
22 | 
23 |     // Differentiable env sampling
24 |     OptixPipeline pipelineEnvSampling;
25 |     OptixShaderBindingTable sbtEnvSampling;
26 |     OptixModule moduleEnvSampling;
27 | };
28 | 
29 | 
30 | class OptiXStateWrapper
31 | {
32 | public:
33 |     OptiXStateWrapper     (const std::string &path, const std::string &cuda_path);
34 |     ~OptiXStateWrapper    (void);
35 |     
36 |     OptiXState*           pState;
37 | };
38 | 
39 | 


--------------------------------------------------------------------------------
/render/optixutils/include/optix.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
 4 |  *
 5 |  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
 6 |  * rights in and to this software, related documentation and any modifications thereto.
 7 |  * Any use, reproduction, disclosure or distribution of this software and related
 8 |  * documentation without an express license agreement from NVIDIA Corporation is strictly
 9 |  * prohibited.
10 |  *
11 |  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
12 |  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
13 |  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 |  * PARTICULAR PURPOSE.  IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
15 |  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
16 |  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
17 |  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18 |  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
19 |  * SUCH DAMAGES
20 |  */
21 | 
22 | /// @file
23 | /// @author NVIDIA Corporation
24 | /// @brief  OptiX public API header
25 | ///
26 | /// Includes the host api if compiling host code, includes the cuda api if compiling device code.
27 | /// For the math library routines include optix_math.h
28 | 
29 | #ifndef __optix_optix_h__
30 | #define __optix_optix_h__
31 | 
32 | /// The OptiX version.
33 | ///
34 | /// - major =  OPTIX_VERSION/10000
35 | /// - minor = (OPTIX_VERSION%10000)/100
36 | /// - micro =  OPTIX_VERSION%100
37 | #define OPTIX_VERSION 70300
38 | 
39 | 
40 | #ifdef __CUDACC__
41 | #include "optix_device.h"
42 | #else
43 | #include "optix_host.h"
44 | #endif
45 | 
46 | 
47 | #endif  // __optix_optix_h__
48 | 


--------------------------------------------------------------------------------
/render/optixutils/include/optix_device.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
 4 |  *
 5 |  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
 6 |  * rights in and to this software, related documentation and any modifications thereto.
 7 |  * Any use, reproduction, disclosure or distribution of this software and related
 8 |  * documentation without an express license agreement from NVIDIA Corporation is strictly
 9 |  * prohibited.
10 |  *
11 |  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
12 |  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
13 |  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 |  * PARTICULAR PURPOSE.  IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
15 |  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
16 |  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
17 |  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18 |  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
19 |  * SUCH DAMAGES
20 |  */
21 | 
22 |  /**
23 |  * @file   optix_device.h
24 |  * @author NVIDIA Corporation
25 |  * @brief  OptiX public API
26 |  *
27 |  * OptiX public API Reference - Host/Device side
28 |  */
29 | 
30 | /******************************************************************************\
31 |  * optix_cuda.h
32 |  *
33 |  * This file provides the nvcc interface for generating PTX that the OptiX is
34 |  * capable of parsing and weaving into the final kernel.  This is included by
35 |  * optix.h automatically if compiling device code.  It can be included explicitly
36 |  * in host code if desired.
37 |  *
38 | \******************************************************************************/
39 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__)
40 | #  define __OPTIX_INCLUDE_INTERNAL_HEADERS__
41 | #  define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__
42 | #endif
43 | #include "optix_7_device.h"
44 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ )
45 | #  undef __OPTIX_INCLUDE_INTERNAL_HEADERS__
46 | #  undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__
47 | #endif
48 | 


--------------------------------------------------------------------------------
/render/optixutils/include/optix_function_table_definition.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
 3 |  *
 4 |  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
 5 |  * rights in and to this software, related documentation and any modifications thereto.
 6 |  * Any use, reproduction, disclosure or distribution of this software and related
 7 |  * documentation without an express license agreement from NVIDIA Corporation is strictly
 8 |  * prohibited.
 9 |  *
10 |  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
11 |  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
12 |  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
13 |  * PARTICULAR PURPOSE.  IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
14 |  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
15 |  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
16 |  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
17 |  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
18 |  * SUCH DAMAGES
19 |  */
20 | 
21 | /// @file
22 | /// @author NVIDIA Corporation
23 | /// @brief  OptiX public API header
24 | 
25 | #ifndef __optix_optix_function_table_definition_h__
26 | #define __optix_optix_function_table_definition_h__
27 | 
28 | #include "optix_function_table.h"
29 | 
30 | #ifdef __cplusplus
31 | extern "C" {
32 | #endif
33 | 
34 | /** \addtogroup optix_function_table
35 | @{
36 | */
37 | 
38 | /// If the stubs in optix_stubs.h are used, then the function table needs to be defined in exactly
39 | /// one translation unit. This can be achieved by including this header file in that translation
40 | /// unit.
41 | OptixFunctionTable g_optixFunctionTable;
42 | 
43 | /*@}*/  // end group optix_function_table
44 | 
45 | #ifdef __cplusplus
46 | }
47 | #endif
48 | 
49 | #endif  // __optix_optix_function_table_definition_h__
50 | 


--------------------------------------------------------------------------------
/render/optixutils/include/optix_host.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
 4 |  *
 5 |  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
 6 |  * rights in and to this software, related documentation and any modifications thereto.
 7 |  * Any use, reproduction, disclosure or distribution of this software and related
 8 |  * documentation without an express license agreement from NVIDIA Corporation is strictly
 9 |  * prohibited.
10 |  *
11 |  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
12 |  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
13 |  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 |  * PARTICULAR PURPOSE.  IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
15 |  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
16 |  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
17 |  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18 |  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
19 |  * SUCH DAMAGES
20 |  */
21 | 
22 | /**
23 |  * @file   optix_host.h
24 |  * @author NVIDIA Corporation
25 |  * @brief  OptiX public API
26 |  *
27 |  * OptiX public API Reference - Host side
28 |  */
29 | 
30 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__)
31 | #  define __OPTIX_INCLUDE_INTERNAL_HEADERS__
32 | #  define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__
33 | #endif
34 | #include "optix_7_host.h"
35 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ )
36 | #  undef __OPTIX_INCLUDE_INTERNAL_HEADERS__
37 | #  undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__
38 | #endif
39 | 


--------------------------------------------------------------------------------
/render/optixutils/include/optix_types.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
 3 |  *
 4 |  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
 5 |  * rights in and to this software, related documentation and any modifications thereto.
 6 |  * Any use, reproduction, disclosure or distribution of this software and related
 7 |  * documentation without an express license agreement from NVIDIA Corporation is strictly
 8 |  * prohibited.
 9 |  *
10 |  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
11 |  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
12 |  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
13 |  * PARTICULAR PURPOSE.  IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
14 |  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
15 |  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
16 |  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
17 |  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
18 |  * SUCH DAMAGES
19 |  */
20 | 
21 | /**
22 |  * @file   optix_types.h
23 |  * @author NVIDIA Corporation
24 |  * @brief  OptiX public API header
25 |  *
26 |  */
27 | 
28 | #ifndef __optix_optix_types_h__
29 | #define __optix_optix_types_h__
30 | 
31 | // clang-format off
32 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__)
33 | #  define __OPTIX_INCLUDE_INTERNAL_HEADERS__
34 | #  define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__
35 | #endif
36 | #include "optix_7_types.h"
37 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ )
38 | #  undef __OPTIX_INCLUDE_INTERNAL_HEADERS__
39 | #  undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__
40 | #endif
41 | // clang-format on
42 | 
43 | #endif // #ifndef __optix_optix_types_h__
44 | 


--------------------------------------------------------------------------------
/render/optixutils/ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import numpy as np
 10 | import os
 11 | import sys
 12 | import torch
 13 | import torch.utils.cpp_extension
 14 | 
 15 | #----------------------------------------------------------------------------
 16 | # C++/Cuda plugin compiler/loader.
 17 | 
 18 | _plugin = None
 19 | if _plugin is None:
 20 | 
 21 |     # Make sure we can find the necessary compiler and libary binaries.
 22 |     if os.name == 'nt':
 23 |         optix_include_dir = os.path.dirname(__file__) + r"\include"
 24 | 
 25 |         def find_cl_path():
 26 |             import glob
 27 |             for edition in ['Enterprise', 'Professional', 'BuildTools', 'Community']:
 28 |                 vs_editions = glob.glob(r"C:\Program Files (x86)\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition) \
 29 |                     + glob.glob(r"C:\Program Files\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition)
 30 |                 paths = sorted(vs_editions, reverse=True)
 31 |                 if paths:
 32 |                     return paths[0]
 33 | 
 34 |         # If cl.exe is not on path, try to find it.
 35 |         if os.system("where cl.exe >nul 2>nul") != 0:
 36 |             cl_path = find_cl_path()
 37 |             if cl_path is None:
 38 |                 raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
 39 |             os.environ['PATH'] += ';' + cl_path
 40 | 
 41 |     elif os.name == 'posix':
 42 |         optix_include_dir = os.path.dirname(__file__) + r"/include"
 43 | 
 44 |     include_paths = [optix_include_dir]
 45 | 
 46 |     # Compiler options.
 47 |     opts = ['-DNVDR_TORCH']
 48 | 
 49 |     # Linker options.
 50 |     if os.name == 'posix':
 51 |         ldflags = ['-lcuda', '-lnvrtc']
 52 |     elif os.name == 'nt':
 53 |         ldflags = ['cuda.lib', 'advapi32.lib', 'nvrtc.lib']
 54 | 
 55 |     # List of sources.
 56 |     source_files = [
 57 |         'c_src/denoising.cu',
 58 |         'c_src/optix_wrapper.cpp',
 59 |         'c_src/torch_bindings.cpp'
 60 |     ]
 61 | 
 62 |     # Some containers set this to contain old architectures that won't compile. We only need the one installed in the machine.
 63 |     os.environ['TORCH_CUDA_ARCH_LIST'] = ''
 64 | 
 65 |     # Compile and load.
 66 |     build_dir = os.path.join(os. path. dirname(__file__), 'build')
 67 |     os.makedirs(build_dir, exist_ok=True)
 68 |     source_paths = [os.path.join(os.path.dirname(__file__), fn) for fn in source_files]
 69 |     torch.utils.cpp_extension.load(name='optixutils_plugin', sources=source_paths, extra_cflags=opts,
 70 |          build_directory=build_dir,
 71 |          extra_cuda_cflags=opts, extra_ldflags=ldflags, extra_include_paths=include_paths, with_cuda=True, verbose=True)
 72 | 
 73 |     # Import, cache, and return the compiled module.
 74 |     import optixutils_plugin
 75 |     _plugin = optixutils_plugin
 76 | 
 77 | #----------------------------------------------------------------------------
 78 | # OptiX autograd func
 79 | #----------------------------------------------------------------------------
 80 | 
 81 | class _optix_env_shade_func(torch.autograd.Function):
 82 |     _random_perm = {}
 83 | 
 84 |     @staticmethod
 85 |     def forward(ctx, optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, BSDF, n_samples_x, rnd_seed, shadow_scale):
 86 |         _rnd_seed = np.random.randint(2**31) if rnd_seed is None else rnd_seed
 87 |         if n_samples_x not in _optix_env_shade_func._random_perm:
 88 |             # Generate (32k) tables with random permutations to decorrelate the BSDF and light stratified samples
 89 |             _optix_env_shade_func._random_perm[n_samples_x] = torch.argsort(torch.rand(32768, n_samples_x * n_samples_x, device="cuda"), dim=-1).int()
 90 | 
 91 |         diff, spec = _plugin.env_shade_fwd(optix_ctx.cpp_wrapper, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, _optix_env_shade_func._random_perm[n_samples_x], BSDF, n_samples_x, _rnd_seed, shadow_scale)
 92 |         ctx.save_for_backward(mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols)
 93 |         ctx.optix_ctx = optix_ctx
 94 |         ctx.BSDF = BSDF
 95 |         ctx.n_samples_x = n_samples_x
 96 |         ctx.rnd_seed = rnd_seed
 97 |         ctx.shadow_scale = shadow_scale
 98 |         return diff, spec
 99 |     
100 |     @staticmethod
101 |     def backward(ctx, diff_grad, spec_grad):
102 |         optix_ctx = ctx.optix_ctx
103 |         _rnd_seed = np.random.randint(2**31) if ctx.rnd_seed is None else ctx.rnd_seed
104 |         mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols = ctx.saved_variables
105 |         gb_pos_grad, gb_normal_grad, gb_kd_grad, gb_ks_grad, light_grad = _plugin.env_shade_bwd(
106 |             optix_ctx.cpp_wrapper, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, _optix_env_shade_func._random_perm[ctx.n_samples_x], 
107 |             ctx.BSDF, ctx.n_samples_x, _rnd_seed, ctx.shadow_scale, diff_grad, spec_grad)
108 |         return None, None, None, gb_pos_grad, gb_normal_grad, None, gb_kd_grad, gb_ks_grad, light_grad, None, None, None, None, None, None, None
109 | 
110 | class _bilateral_denoiser_func(torch.autograd.Function):
111 |     @staticmethod
112 |     def forward(ctx, col, nrm, zdz, sigma):
113 |         ctx.save_for_backward(col, nrm, zdz)
114 |         ctx.sigma = sigma
115 |         out = _plugin.bilateral_denoiser_fwd(col, nrm, zdz, sigma)
116 |         return out
117 |     
118 |     @staticmethod
119 |     def backward(ctx, out_grad):
120 |         col, nrm, zdz = ctx.saved_variables
121 |         col_grad = _plugin.bilateral_denoiser_bwd(col, nrm, zdz, ctx.sigma, out_grad)
122 |         return col_grad, None, None, None
123 | 
124 | #----------------------------------------------------------------------------
125 | # OptiX ray tracing utils
126 | #----------------------------------------------------------------------------
127 | 
128 | class OptiXContext:
129 |     def __init__(self):
130 |         print("Cuda path", torch.utils.cpp_extension.CUDA_HOME)
131 |         self.cpp_wrapper = _plugin.OptiXStateWrapper(os.path.dirname(__file__), torch.utils.cpp_extension.CUDA_HOME)
132 | 
133 | def optix_build_bvh(optix_ctx, verts, tris, rebuild):
134 |     '''
135 |         choose not to raise error since we may have msdf supervision.. should clean the code later
136 |     '''
137 |     # assert tris.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)"
138 |     # assert verts.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)"
139 |     _plugin.optix_build_bvh(optix_ctx.cpp_wrapper, verts.view(-1, 3), tris.view(-1, 3), rebuild)
140 | 
141 | def optix_env_shade(optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, BSDF='pbr', n_samples_x=8, rnd_seed=None, shadow_scale=1.0):
142 |     iBSDF = ['pbr', 'diffuse', 'white'].index(BSDF) # Ordering important, must match the order of the fwd/bwdPbrBSDF kernel.
143 |     return _optix_env_shade_func.apply(optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, iBSDF, n_samples_x, rnd_seed, shadow_scale)
144 | 
145 | def bilateral_denoiser(col, nrm, zdz, sigma):
146 |     col_w = _bilateral_denoiser_func.apply(col, nrm, zdz, sigma)
147 |     return col_w[..., 0:3] / col_w[..., 3:4]
148 | 


--------------------------------------------------------------------------------
/render/optixutils/tests/filter_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | from pickletools import read_float8
 10 | import torch
 11 | 
 12 | import os
 13 | import sys
 14 | import math
 15 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
 16 | import optixutils as ou
 17 | import numpy as np
 18 | 
 19 | RES = 1024
 20 | DTYPE = torch.float32
 21 | 
 22 | def length(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor:
 23 | 	return torch.sqrt(torch.clamp(dot(x,x), min=eps)) # Clamp to avoid nan gradients because grad(sqrt(0)) = NaN
 24 | 
 25 | def safe_normalize(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor:
 26 | 	return x / length(x, eps)
 27 | 
 28 | def dot(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 29 | 	return torch.sum(x*y, -1, keepdim=True)
 30 | 
 31 | class BilateralDenoiser(torch.nn.Module):
 32 | 	def __init__(self, sigma=1.0):
 33 | 		super(BilateralDenoiser, self).__init__()
 34 | 		self.set_sigma(sigma)
 35 | 
 36 | 	def set_sigma(self, sigma):
 37 | 		self.sigma = max(sigma, 0.0001)
 38 | 		self.variance = self.sigma**2.
 39 | 		self.N = 2 * math.ceil(self.sigma * 2.5) + 1
 40 | 
 41 | 	def forward(self, input):
 42 | 		eps    = 0.0001
 43 | 		col    = input[..., 0:3]
 44 | 		nrm    = input[..., 3:6]
 45 | 		kd     = input[..., 6:9]
 46 | 		zdz    = input[..., 9:11]
 47 | 
 48 | 		accum_col = torch.zeros_like(col)
 49 | 		accum_w = torch.zeros_like(col[..., 0:1])
 50 | 		for y in range(-self.N, self.N+1):
 51 | 			for x in range(-self.N, self.N+1):
 52 | 
 53 | 				ty, tx = torch.meshgrid(torch.arange(0, input.shape[1], dtype=torch.float32, device="cuda"), torch.arange(0, input.shape[2], dtype=torch.float32, device="cuda"))
 54 | 				tx = tx[None, ..., None] + x
 55 | 				ty = ty[None, ..., None] + y
 56 | 
 57 | 				dist_sqr = (x**2 + y**2)
 58 | 				dist = np.sqrt(dist_sqr)
 59 | 				w_xy = np.exp(-dist_sqr / (2 * self.variance))
 60 | 
 61 | 				with torch.no_grad():
 62 | 					nrm_tap = torch.roll(nrm, (-y, -x), (1, 2))
 63 | 					w_normal = torch.pow(torch.clamp(dot(nrm_tap, nrm), min=eps, max=1.0), 128.0)           # From SVGF
 64 | 
 65 | 					zdz_tap = torch.roll(zdz, (-y, -x), (1, 2))
 66 | 					w_depth = torch.exp(-(torch.abs(zdz_tap[..., 0:1] - zdz[..., 0:1]) / torch.clamp(zdz[..., 1:2] * dist, min=eps)) ) # From SVGF	
 67 | 
 68 | 					w = w_xy * w_normal * w_depth
 69 | 					w = torch.where((tx >= 0) & (tx < input.shape[2]) & (ty >= 0) & (ty < input.shape[1]), w, torch.zeros_like(w))
 70 | 
 71 | 				col_tap = torch.roll(col, (-y, -x), (1, 2))
 72 | 				accum_col += col_tap * w
 73 | 				accum_w += w
 74 | 		return accum_col / torch.clamp(accum_w, min=eps)
 75 | 
 76 | def relative_loss(name, ref, cuda):
 77 | 	ref = ref.float()
 78 | 	cuda = cuda.float()
 79 | 	denom = torch.where(ref > 1e-7, ref, torch.ones_like(ref))
 80 | 	relative = torch.abs(ref - cuda) / denom
 81 | 	print(name, torch.max(relative).item())
 82 | 
 83 | 
 84 | def test_filter():
 85 | 	img_cuda = torch.rand(1, RES, RES, 11, dtype=DTYPE, device='cuda')
 86 | 	img_cuda[..., 3:6] = safe_normalize(img_cuda[..., 3:6])
 87 | 	img_ref = img_cuda.clone().detach().requires_grad_(True)
 88 | 	img_cuda = img_cuda.clone().detach().requires_grad_(True)
 89 | 	target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
 90 | 	target_ref = target_cuda.clone().detach().requires_grad_(True)
 91 | 	
 92 | 	SIGMA = 2.0
 93 | 
 94 | 	start = torch.cuda.Event(enable_timing=True)
 95 | 	end = torch.cuda.Event(enable_timing=True)
 96 | 
 97 | 	start.record()
 98 | 	denoiser = BilateralDenoiser(sigma=SIGMA)
 99 | 	denoised_ref = denoiser.forward(img_ref)
100 | 	ref_loss = torch.nn.MSELoss()(denoised_ref, target_ref)
101 | 	ref_loss.backward()
102 | 	end.record()
103 | 	torch.cuda.synchronize()
104 | 	print("Python:", start.elapsed_time(end))
105 | 
106 | 	start.record()
107 | 	denoised_cuda = ou.svgf(img_cuda[..., 0:3], img_cuda[..., 3:6], img_cuda[..., 9:11], img_cuda[..., 6:9], SIGMA)
108 | 	cuda_loss = torch.nn.MSELoss()(denoised_cuda, target_cuda)
109 | 	cuda_loss.backward()
110 | 	end.record()
111 | 	torch.cuda.synchronize()
112 | 	print("CUDA:", start.elapsed_time(end))
113 | 
114 | 	print("-------------------------------------------------------------")
115 | 	print("    Filter loss:")
116 | 	print("-------------------------------------------------------------")
117 | 
118 | 	relative_loss("denoised:", denoised_ref[..., 0:3], denoised_cuda[..., 0:3])
119 | 	relative_loss("grad:", img_ref.grad[..., 0:3], img_cuda.grad[..., 0:3])
120 | 
121 | test_filter()


--------------------------------------------------------------------------------
/render/regularizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
  2 | #
  3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | # property and proprietary rights in and to this material, related
  5 | # documentation and any modifications thereto. Any use, reproduction, 
  6 | # disclosure or distribution of this material and related documentation 
  7 | # without an express license agreement from NVIDIA CORPORATION or 
  8 | # its affiliates is strictly prohibited.
  9 | 
 10 | import torch
 11 | import nvdiffrast.torch as dr
 12 | import torch.nn.functional as F
 13 | 
 14 | from render import util
 15 | from . import mesh
 16 | 
 17 | def luma(x):
 18 |     return ((x[..., 0:1] + x[..., 1:2] + x[..., 2:3]) / 3).repeat(1, 1, 1, 3)
 19 | def value(x):
 20 |     return torch.max(x[..., 0:3], dim=-1, keepdim=True)[0].repeat(1, 1, 1, 3)
 21 | 
 22 | def chroma_loss(kd, color_ref, lambda_chroma):
 23 |     eps = 0.001
 24 |     ref_chroma = color_ref[..., 0:3] / torch.clip(value(color_ref), min=eps)
 25 |     opt_chroma = kd[..., 0:3] / torch.clip(value(kd), min=eps)
 26 |     return torch.mean(torch.abs((opt_chroma - ref_chroma) * color_ref[..., 3:])) * lambda_chroma
 27 | 
 28 | # Diffuse luma regularizer + specular 
 29 | def shading_loss(diffuse_light, specular_light, color_ref, lambda_diffuse, lambda_specular):
 30 |     diffuse_luma  = luma(diffuse_light)
 31 |     specular_luma = luma(specular_light)
 32 |     ref_luma      = value(color_ref)
 33 |     
 34 |     eps = 0.001
 35 |     img    = util.rgb_to_srgb(torch.log(torch.clamp((diffuse_luma + specular_luma) * color_ref[..., 3:], min=0, max=65535) + 1))
 36 |     target = util.rgb_to_srgb(torch.log(torch.clamp(ref_luma * color_ref[..., 3:], min=0, max=65535) + 1))
 37 |     # error  = torch.abs(img - target) * diffuse_luma / torch.clamp(diffuse_luma + specular_luma, min=eps) ### encourage specular component to take control
 38 |     error  = torch.abs(img - target) ### the original version in the paper
 39 |     loss   = torch.mean(error) * lambda_diffuse
 40 |     loss  += torch.mean(specular_luma) / torch.clamp(torch.mean(diffuse_luma), min=eps) * lambda_specular
 41 |     return loss
 42 | 
 43 | ######################################################################################
 44 | # Material smoothness loss
 45 | ######################################################################################
 46 | 
 47 | def material_smoothness_grad(kd_grad, ks_grad, nrm_grad, lambda_kd=0.25, lambda_ks=0.1, lambda_nrm=0.0):
 48 |     kd_luma_grad = (kd_grad[..., 0] + kd_grad[..., 1] + kd_grad[..., 2]) / 3
 49 |     loss  = torch.mean(kd_luma_grad * kd_grad[..., -1]) * lambda_kd
 50 |     loss += torch.mean(ks_grad[..., :-1] * ks_grad[..., -1:]) * lambda_ks
 51 |     loss += torch.mean(nrm_grad[..., :-1] * nrm_grad[..., -1:]) * lambda_nrm
 52 |     return loss
 53 | 
 54 | ######################################################################################
 55 | # Computes the image gradient, useful for kd/ks smoothness losses
 56 | ######################################################################################
 57 | def image_grad(buf, std=0.01):
 58 |     t, s = torch.meshgrid(torch.linspace(-1.0 + 1.0 / buf.shape[1], 1.0 - 1.0 / buf.shape[1], buf.shape[1], device="cuda"), 
 59 |                           torch.linspace(-1.0 + 1.0 / buf.shape[2], 1.0 - 1.0 / buf.shape[2], buf.shape[2], device="cuda"),
 60 |                           indexing='ij')
 61 |     tc   = torch.normal(mean=0, std=std, size=(buf.shape[0], buf.shape[1], buf.shape[2], 2), device="cuda") + torch.stack((s, t), dim=-1)[None, ...]
 62 |     tap  = dr.texture(buf, tc, filter_mode='linear', boundary_mode='clamp')
 63 |     return torch.abs(tap[..., :-1] - buf[..., :-1]) * tap[..., -1:] * buf[..., -1:]
 64 | 
 65 | ######################################################################################
 66 | # Computes the avergage edge length of a mesh. 
 67 | # Rough estimate of the tessellation of a mesh. Can be used e.g. to clamp gradients
 68 | ######################################################################################
 69 | def avg_edge_length(v_pos, t_pos_idx):
 70 |     e_pos_idx = mesh.compute_edges(t_pos_idx)
 71 |     edge_len  = util.length(v_pos[e_pos_idx[:, 0]] - v_pos[e_pos_idx[:, 1]])
 72 |     return torch.mean(edge_len)
 73 | 
 74 | ######################################################################################
 75 | # Laplacian regularization using umbrella operator (Fujiwara / Desbrun).
 76 | # https://mgarland.org/class/geom04/material/smoothing.pdf
 77 | ######################################################################################
 78 | def laplace_regularizer_const(v_pos, t_pos_idx):
 79 |     term = torch.zeros_like(v_pos)
 80 |     norm = torch.zeros_like(v_pos[..., 0:1])
 81 | 
 82 |     v0 = v_pos[t_pos_idx[:, 0], :]
 83 |     v1 = v_pos[t_pos_idx[:, 1], :]
 84 |     v2 = v_pos[t_pos_idx[:, 2], :]
 85 | 
 86 |     term.scatter_add_(0, t_pos_idx[:, 0:1].repeat(1,3), (v1 - v0) + (v2 - v0))
 87 |     term.scatter_add_(0, t_pos_idx[:, 1:2].repeat(1,3), (v0 - v1) + (v2 - v1))
 88 |     term.scatter_add_(0, t_pos_idx[:, 2:3].repeat(1,3), (v0 - v2) + (v1 - v2))
 89 | 
 90 |     two = torch.ones_like(v0) * 2.0
 91 |     norm.scatter_add_(0, t_pos_idx[:, 0:1], two)
 92 |     norm.scatter_add_(0, t_pos_idx[:, 1:2], two)
 93 |     norm.scatter_add_(0, t_pos_idx[:, 2:3], two)
 94 | 
 95 |     term = term / torch.clamp(norm, min=1.0)
 96 | 
 97 |     return torch.mean(term**2)
 98 | 
 99 | ######################################################################################
100 | # Smooth vertex normals
101 | ######################################################################################
102 | def normal_consistency(v_pos, t_pos_idx):
103 |     # Compute face normals
104 |     v0 = v_pos[t_pos_idx[:, 0], :]
105 |     v1 = v_pos[t_pos_idx[:, 1], :]
106 |     v2 = v_pos[t_pos_idx[:, 2], :]
107 | 
108 |     face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0))
109 | 
110 |     tris_per_edge = mesh.compute_edge_to_face_mapping(t_pos_idx)
111 | 
112 |     # Fetch normals for both faces sharind an edge
113 |     n0 = face_normals[tris_per_edge[:, 0], :]
114 |     n1 = face_normals[tris_per_edge[:, 1], :]
115 | 
116 |     # Compute error metric based on normal difference
117 |     term = torch.clamp(util.dot(n0, n1), min=-1.0, max=1.0)
118 |     term = (1.0 - term) * 0.5
119 | 
120 |     return torch.mean(torch.abs(term))
121 | 
122 | def ssim_loss(pred, target):
123 |     # 定义窗口大小
124 |     window_size = 11
125 |     # 计算均值
126 |     mu_x = F.conv2d(pred, window, padding=window_size//2, groups=3)
127 |     mu_y = F.conv2d(target, window, padding=window_size//2, groups=3)
128 |     
129 |     # 计算方差
130 |     sigma_x = F.conv2d(pred**2, window, padding=window_size//2, groups=3) - mu_x**2
131 |     sigma_y = F.conv2d(target**2, window, padding=window_size//2, groups=3) - mu_y**2
132 |     sigma_xy = F.conv2d(pred*target, window, padding=window_size//2, groups=3) - mu_x*mu_y
133 |     
134 |     # 常数，为了数值稳定性
135 |     C1 = 0.01**2
136 |     C2 = 0.03**2
137 |     
138 |     # 计算 SSIM
139 |     ssim = ((2 * mu_x * mu_y + C1) * (2 * sigma_xy + C2)) / ((mu_x**2 + mu_y**2 + C1) * (sigma_x + sigma_y + C2))
140 |     
141 |     return 1 - ssim.mean()


--------------------------------------------------------------------------------
/render/renderutils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | from .ops import xfm_points, xfm_vectors, image_loss, diffuse_cubemap, specular_cubemap, prepare_shading_normal, lambert, frostbite_diffuse, pbr_specular, pbr_bsdf, _fresnel_shlick, _ndf_ggx, _lambda_ggx, _masking_smith
11 | __all__ = ["xfm_vectors", "xfm_points", "image_loss", "diffuse_cubemap","specular_cubemap", "prepare_shading_normal", "lambert", "frostbite_diffuse", "pbr_specular", "pbr_bsdf", "_fresnel_shlick", "_ndf_ggx", "_lambda_ggx", "_masking_smith", ]
12 | 


--------------------------------------------------------------------------------
/render/renderutils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/render/renderutils/__pycache__/bsdf.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/bsdf.cpython-38.pyc


--------------------------------------------------------------------------------
/render/renderutils/__pycache__/loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/loss.cpython-38.pyc


--------------------------------------------------------------------------------
/render/renderutils/__pycache__/ops.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/__pycache__/ops.cpython-38.pyc


--------------------------------------------------------------------------------
/render/renderutils/bsdf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
  2 | #
  3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | # property and proprietary rights in and to this material, related
  5 | # documentation and any modifications thereto. Any use, reproduction, 
  6 | # disclosure or distribution of this material and related documentation 
  7 | # without an express license agreement from NVIDIA CORPORATION or 
  8 | # its affiliates is strictly prohibited.
  9 | 
 10 | import math
 11 | import torch
 12 | 
 13 | NORMAL_THRESHOLD = 0.1
 14 | 
 15 | ################################################################################
 16 | # Vector utility functions
 17 | ################################################################################
 18 | 
 19 | def _dot(x, y):
 20 |     return torch.sum(x*y, -1, keepdim=True)
 21 | 
 22 | def _reflect(x, n):
 23 |     return 2*_dot(x, n)*n - x
 24 | 
 25 | def _safe_normalize(x):
 26 |     return torch.nn.functional.normalize(x, dim = -1)
 27 | 
 28 | def _bend_normal(view_vec, smooth_nrm, geom_nrm, two_sided_shading):
 29 |     # Swap normal direction for backfacing surfaces
 30 |     if two_sided_shading:
 31 |         smooth_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, smooth_nrm, -smooth_nrm)
 32 |         geom_nrm   = torch.where(_dot(geom_nrm, view_vec) > 0, geom_nrm, -geom_nrm)
 33 | 
 34 |     t = torch.clamp(_dot(view_vec, smooth_nrm) / NORMAL_THRESHOLD, min=0, max=1)
 35 |     return torch.lerp(geom_nrm, smooth_nrm, t)
 36 | 
 37 | 
 38 | def _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl):
 39 |     smooth_bitang = _safe_normalize(torch.cross(smooth_tng, smooth_nrm))
 40 |     if opengl:
 41 |         shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] - smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
 42 |     else:
 43 |         shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] + smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
 44 |     return _safe_normalize(shading_nrm)
 45 | 
 46 | def bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl):
 47 |     smooth_nrm = _safe_normalize(smooth_nrm)
 48 |     smooth_tng = _safe_normalize(smooth_tng)
 49 |     view_vec   = _safe_normalize(view_pos - pos)
 50 |     shading_nrm = _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl)
 51 |     return _bend_normal(view_vec, shading_nrm, geom_nrm, two_sided_shading)
 52 | 
 53 | ################################################################################
 54 | # Simple lambertian diffuse BSDF
 55 | ################################################################################
 56 | 
 57 | def bsdf_lambert(nrm, wi):
 58 |     return torch.clamp(_dot(nrm, wi), min=0.0) / math.pi
 59 | 
 60 | ################################################################################
 61 | # Frostbite diffuse
 62 | ################################################################################
 63 | 
 64 | def bsdf_frostbite(nrm, wi, wo, linearRoughness):
 65 |     wiDotN = _dot(wi, nrm)
 66 |     woDotN = _dot(wo, nrm)
 67 | 
 68 |     h = _safe_normalize(wo + wi)
 69 |     wiDotH = _dot(wi, h)
 70 | 
 71 |     energyBias = 0.5 * linearRoughness
 72 |     energyFactor = 1.0 - (0.51 / 1.51) * linearRoughness
 73 |     f90 = energyBias + 2.0 * wiDotH * wiDotH * linearRoughness
 74 |     f0 = 1.0
 75 | 
 76 |     wiScatter = bsdf_fresnel_shlick(f0, f90, wiDotN)
 77 |     woScatter = bsdf_fresnel_shlick(f0, f90, woDotN)
 78 |     res = wiScatter * woScatter * energyFactor
 79 |     return torch.where((wiDotN > 0.0) & (woDotN > 0.0), res, torch.zeros_like(res))
 80 | 
 81 | ################################################################################
 82 | # Phong specular, loosely based on mitsuba implementation
 83 | ################################################################################
 84 | 
 85 | def bsdf_phong(nrm, wo, wi, N):
 86 |     dp_r = torch.clamp(_dot(_reflect(wo, nrm), wi), min=0.0, max=1.0)
 87 |     dp_l = torch.clamp(_dot(nrm, wi), min=0.0, max=1.0)
 88 |     return (dp_r ** N) * dp_l * (N + 2) / (2 * math.pi)
 89 | 
 90 | ################################################################################
 91 | # PBR's implementation of GGX specular
 92 | ################################################################################
 93 | 
 94 | specular_epsilon = 1e-4
 95 | 
 96 | def bsdf_fresnel_shlick(f0, f90, cosTheta):
 97 |     _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
 98 |     return f0 + (f90 - f0) * (1.0 - _cosTheta) ** 5.0
 99 | 
100 | def bsdf_ndf_ggx(alphaSqr, cosTheta):
101 |     _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
102 |     d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1
103 |     return alphaSqr / (d * d * math.pi)
104 | 
105 | def bsdf_lambda_ggx(alphaSqr, cosTheta):
106 |     _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
107 |     cosThetaSqr = _cosTheta * _cosTheta
108 |     tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr
109 |     res = 0.5 * (torch.sqrt(1 + alphaSqr * tanThetaSqr) - 1.0)
110 |     return res
111 | 
112 | def bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO):
113 |     lambdaI = bsdf_lambda_ggx(alphaSqr, cosThetaI)
114 |     lambdaO = bsdf_lambda_ggx(alphaSqr, cosThetaO)
115 |     return 1 / (1 + lambdaI + lambdaO)
116 | 
117 | def bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08):
118 |     _alpha = torch.clamp(alpha, min=min_roughness*min_roughness, max=1.0)
119 |     alphaSqr = _alpha * _alpha
120 | 
121 |     h = _safe_normalize(wo + wi)
122 |     woDotN = _dot(wo, nrm)
123 |     wiDotN = _dot(wi, nrm)
124 |     woDotH = _dot(wo, h)
125 |     nDotH  = _dot(nrm, h)
126 | 
127 |     D = bsdf_ndf_ggx(alphaSqr, nDotH)
128 |     G = bsdf_masking_smith_ggx_correlated(alphaSqr, woDotN, wiDotN)
129 |     F = bsdf_fresnel_shlick(col, 1, woDotH)
130 | 
131 |     w = F * D * G * 0.25 / torch.clamp(woDotN, min=specular_epsilon)
132 | 
133 |     frontfacing = (woDotN > specular_epsilon) & (wiDotN > specular_epsilon)
134 |     return torch.where(frontfacing, w, torch.zeros_like(w))
135 | 
136 | def bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF):
137 |     wo = _safe_normalize(view_pos - pos)
138 |     wi = _safe_normalize(light_pos - pos)
139 | 
140 |     spec_str  = arm[..., 0:1] # x component
141 |     roughness = arm[..., 1:2] # y component
142 |     metallic  = arm[..., 2:3] # z component
143 |     ks = (0.04 * (1.0 - metallic) + kd * metallic) * (1 - spec_str)
144 |     kd = kd * (1.0 - metallic)
145 | 
146 |     if BSDF == 0:
147 |         diffuse = kd * bsdf_lambert(nrm, wi)
148 |     else:
149 |         diffuse = kd * bsdf_frostbite(nrm, wi, wo, roughness)
150 |     specular = bsdf_pbr_specular(ks, nrm, wo, wi, roughness*roughness, min_roughness=min_roughness)
151 |     return diffuse + specular
152 | 


--------------------------------------------------------------------------------
/render/renderutils/build/bsdf.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/bsdf.cuda.o


--------------------------------------------------------------------------------
/render/renderutils/build/build.ninja:
--------------------------------------------------------------------------------
 1 | ninja_required_version = 1.3
 2 | cxx = c++
 3 | nvcc = /usr/local/cuda/bin/nvcc
 4 | 
 5 | cflags = -DTORCH_EXTENSION_NAME=renderutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -DNVDR_TORCH
 6 | post_cflags = 
 7 | cuda_cflags = -DTORCH_EXTENSION_NAME=renderutils_plugin -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/TH -isystem /home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/chh/miniconda3/envs/gshell/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -DNVDR_TORCH -std=c++14
 8 | cuda_post_cflags = 
 9 | cuda_dlink_post_cflags = 
10 | ldflags = -shared -lcuda -lnvrtc -L/home/chh/miniconda3/envs/gshell/lib/python3.8/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart
11 | 
12 | rule compile
13 |   command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
14 |   depfile = $out.d
15 |   deps = gcc
16 | 
17 | rule cuda_compile
18 |   depfile = $out.d
19 |   deps = gcc
20 |   command = $nvcc  $cuda_cflags -c $in -o $out $cuda_post_cflags
21 | 
22 | 
23 | 
24 | rule link
25 |   command = $cxx $in $ldflags -o $out
26 | 
27 | build mesh.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/mesh.cu
28 | build loss.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/loss.cu
29 | build bsdf.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/bsdf.cu
30 | build normal.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/normal.cu
31 | build cubemap.cuda.o: cuda_compile /nas_data/chh/D3Human_main/render/renderutils/c_src/cubemap.cu
32 | build common.o: compile /nas_data/chh/D3Human_main/render/renderutils/c_src/common.cpp
33 | build torch_bindings.o: compile /nas_data/chh/D3Human_main/render/renderutils/c_src/torch_bindings.cpp
34 | 
35 | 
36 | 
37 | build renderutils_plugin.so: link mesh.cuda.o loss.cuda.o bsdf.cuda.o normal.cuda.o cubemap.cuda.o common.o torch_bindings.o
38 | 
39 | default renderutils_plugin.so
40 | 
41 | 


--------------------------------------------------------------------------------
/render/renderutils/build/common.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/common.o


--------------------------------------------------------------------------------
/render/renderutils/build/cubemap.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/cubemap.cuda.o


--------------------------------------------------------------------------------
/render/renderutils/build/loss.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/loss.cuda.o


--------------------------------------------------------------------------------
/render/renderutils/build/mesh.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/mesh.cuda.o


--------------------------------------------------------------------------------
/render/renderutils/build/normal.cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/normal.cuda.o


--------------------------------------------------------------------------------
/render/renderutils/build/renderutils_plugin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/renderutils_plugin.so


--------------------------------------------------------------------------------
/render/renderutils/build/torch_bindings.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/render/renderutils/build/torch_bindings.o


--------------------------------------------------------------------------------
/render/renderutils/c_src/bsdf.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include "common.h"
15 | 
16 | struct LambertKernelParams
17 | {
18 |     Tensor  nrm;
19 |     Tensor  wi;
20 |     Tensor  out;
21 |     dim3    gridSize;
22 | };
23 | 
24 | struct FrostbiteDiffuseKernelParams
25 | {
26 |     Tensor  nrm;
27 |     Tensor  wi;
28 |     Tensor  wo;
29 |     Tensor  linearRoughness;
30 |     Tensor  out;
31 |     dim3    gridSize;
32 | };
33 | 
34 | struct FresnelShlickKernelParams
35 | {
36 |     Tensor  f0;
37 |     Tensor  f90;
38 |     Tensor  cosTheta;
39 |     Tensor  out;
40 |     dim3    gridSize;
41 | };
42 | 
43 | struct NdfGGXParams
44 | {
45 |     Tensor  alphaSqr;
46 |     Tensor  cosTheta;
47 |     Tensor  out;
48 |     dim3    gridSize;
49 | };
50 | 
51 | struct MaskingSmithParams
52 | {
53 |     Tensor  alphaSqr;
54 |     Tensor  cosThetaI;
55 |     Tensor  cosThetaO;
56 |     Tensor  out;
57 |     dim3    gridSize;
58 | };
59 | 
60 | struct PbrSpecular
61 | {
62 |     Tensor  col;
63 |     Tensor  nrm;
64 |     Tensor  wo;
65 |     Tensor  wi;
66 |     Tensor  alpha;
67 |     Tensor  out;
68 |     dim3    gridSize;
69 |     float   min_roughness;
70 | };
71 | 
72 | struct PbrBSDF
73 | {
74 |     Tensor  kd;
75 |     Tensor  arm;
76 |     Tensor  pos;
77 |     Tensor  nrm;
78 |     Tensor  view_pos;
79 |     Tensor  light_pos;
80 |     Tensor  out;
81 |     dim3    gridSize;
82 |     float   min_roughness;
83 |     int     BSDF;
84 | };
85 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/common.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #include <cuda_runtime.h>
13 | #include <algorithm>
14 | 
15 | //------------------------------------------------------------------------
16 | // Block and grid size calculators for kernel launches.
17 | 
18 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims)
19 | {
20 |     int maxThreads = maxWidth * maxHeight;
21 |     if (maxThreads <= 1 || (dims.x * dims.y) <= 1)
22 |         return dim3(1, 1, 1); // Degenerate.
23 | 
24 |     // Start from max size.
25 |     int bw = maxWidth;
26 |     int bh = maxHeight;
27 | 
28 |     // Optimizations for weirdly sized buffers.
29 |     if (dims.x < bw)
30 |     {
31 |         // Decrease block width to smallest power of two that covers the buffer width.
32 |         while ((bw >> 1) >= dims.x)
33 |             bw >>= 1;
34 | 
35 |         // Maximize height.
36 |         bh = maxThreads / bw;
37 |         if (bh > dims.y)
38 |             bh = dims.y;
39 |     }
40 |     else if (dims.y < bh)
41 |     {
42 |         // Halve height and double width until fits completely inside buffer vertically.
43 |         while (bh > dims.y)
44 |         {
45 |             bh >>= 1;
46 |             if (bw < dims.x)
47 |                 bw <<= 1;
48 |         }
49 |     }
50 | 
51 |     // Done.
52 |     return dim3(bw, bh, 1);
53 | }
54 | 
55 | // returns the size of a block that can be reduced using horizontal SIMD operations (e.g. __shfl_xor_sync)
56 | dim3 getWarpSize(dim3 blockSize)
57 | {
58 |     return dim3(
59 |         std::min(blockSize.x, 32u), 
60 |         std::min(std::max(32u / blockSize.x, 1u), std::min(32u, blockSize.y)), 
61 |         std::min(std::max(32u / (blockSize.x * blockSize.y), 1u), std::min(32u, blockSize.z))
62 |     );
63 | }
64 | 
65 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims)
66 | {
67 |     dim3 gridSize;
68 |     gridSize.x = (dims.x  - 1) / blockSize.x + 1;
69 |     gridSize.y = (dims.y - 1) / blockSize.y + 1;
70 |     gridSize.z = (dims.z  - 1) / blockSize.z + 1;
71 |     return gridSize;
72 | }
73 | 
74 | //------------------------------------------------------------------------
75 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | #include <cuda.h>
14 | #include <stdint.h>
15 | 
16 | #include "vec3f.h"
17 | #include "vec4f.h"
18 | #include "tensor.h"
19 | 
20 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims);
21 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims);
22 | 
23 | #ifdef __CUDACC__
24 | 
25 | #ifdef _MSC_VER
26 | #define M_PI 3.14159265358979323846f
27 | #endif
28 | 
29 | __host__ __device__ static inline dim3 getWarpSize(dim3 blockSize)
30 | {
31 |     return dim3(
32 |         min(blockSize.x, 32u),
33 |         min(max(32u / blockSize.x, 1u), min(32u, blockSize.y)),
34 |         min(max(32u / (blockSize.x * blockSize.y), 1u), min(32u, blockSize.z))
35 |     );
36 | }
37 | 
38 | __device__ static inline float clamp(float val, float mn, float mx) { return min(max(val, mn), mx); }
39 | #else
40 | dim3 getWarpSize(dim3 blockSize);
41 | #endif


--------------------------------------------------------------------------------
/render/renderutils/c_src/cubemap.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include "common.h"
15 | 
16 | struct DiffuseCubemapKernelParams
17 | {
18 |     Tensor  cubemap;
19 |     Tensor  out;
20 |     dim3    gridSize;
21 | };
22 | 
23 | struct SpecularCubemapKernelParams
24 | {
25 |     Tensor  cubemap;
26 |     Tensor  bounds;
27 |     Tensor  out;
28 |     dim3    gridSize;
29 |     float   costheta_cutoff;
30 |     float   roughness;
31 | };
32 | 
33 | struct SpecularBoundsKernelParams
34 | {
35 |     float   costheta_cutoff;
36 |     Tensor  out;
37 |     dim3    gridSize;
38 | };
39 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/loss.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include "common.h"
15 | 
16 | enum TonemapperType
17 | {
18 |     TONEMAPPER_NONE = 0,
19 |     TONEMAPPER_LOG_SRGB = 1
20 | };
21 | 
22 | enum LossType
23 | {
24 |     LOSS_L1 = 0,
25 |     LOSS_MSE = 1,
26 |     LOSS_RELMSE = 2,
27 |     LOSS_SMAPE = 3
28 | };
29 | 
30 | struct LossKernelParams
31 | {
32 |     Tensor          img;
33 |     Tensor          target;
34 |     Tensor          out;
35 |     dim3            gridSize;
36 |     TonemapperType  tonemapper;
37 |     LossType        loss;
38 | };
39 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/mesh.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #include <cuda.h>
13 | #include <stdio.h>
14 | 
15 | #include "common.h"
16 | #include "mesh.h"
17 | 
18 | 
19 | //------------------------------------------------------------------------
20 | // Kernels
21 | 
22 | __global__ void xfmPointsFwdKernel(XfmKernelParams p)
23 | {
24 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
25 |     unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z;
26 | 
27 |     __shared__ float mtx[4][4];
28 |     if (threadIdx.x < 16)
29 |         mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0));
30 |     __syncthreads();
31 |     
32 |     if (px >= p.gridSize.x)
33 |         return;
34 | 
35 |     vec3f pos(
36 |         p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)),
37 |         p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)),
38 |         p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0))
39 |     );
40 | 
41 |     if (p.isPoints)
42 |     {
43 |         p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0] + mtx[3][0]);
44 |         p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1] + mtx[3][1]);
45 |         p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2] + mtx[3][2]);
46 |         p.out.store(p.out.nhwcIndex(pz, px, 3, 0), pos.x * mtx[0][3] + pos.y * mtx[1][3] + pos.z * mtx[2][3] + mtx[3][3]);
47 |     }
48 |     else
49 |     {
50 |         p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0]);
51 |         p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1]);
52 |         p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2]);
53 |     }
54 | }
55 | 
56 | __global__ void xfmPointsBwdKernel(XfmKernelParams p)
57 | { 
58 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
59 |     unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z;
60 | 
61 |     __shared__ float mtx[4][4];
62 |     if (threadIdx.x < 16)
63 |         mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0));
64 |     __syncthreads();
65 | 
66 |     if (px >= p.gridSize.x)
67 |         return;
68 | 
69 |     vec3f pos(
70 |         p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)),
71 |         p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)),
72 |         p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0))
73 |     );
74 | 
75 |     vec4f d_out(
76 |         p.out.fetch(p.out.nhwcIndex(pz, px, 0, 0)),
77 |         p.out.fetch(p.out.nhwcIndex(pz, px, 1, 0)),
78 |         p.out.fetch(p.out.nhwcIndex(pz, px, 2, 0)),
79 |         p.out.fetch(p.out.nhwcIndex(pz, px, 3, 0))
80 |     );
81 | 
82 |     if (p.isPoints)
83 |     {
84 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2] + d_out.w * mtx[0][3]);
85 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2] + d_out.w * mtx[1][3]);
86 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2] + d_out.w * mtx[2][3]);
87 |     }
88 |     else
89 |     {
90 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2]);
91 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2]);
92 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2]);
93 |     }
94 | }


--------------------------------------------------------------------------------
/render/renderutils/c_src/mesh.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include "common.h"
15 | 
16 | struct XfmKernelParams
17 | {
18 |     bool            isPoints;
19 |     Tensor          points;
20 |     Tensor          matrix;
21 |     Tensor          out;
22 |     dim3            gridSize;
23 | };
24 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/normal.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 |  *
  4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 |  * property and proprietary rights in and to this material, related 
  6 |  * documentation and any modifications thereto. Any use, reproduction, 
  7 |  * disclosure or distribution of this material and related documentation
  8 |  * without an express license agreement from NVIDIA CORPORATION or 
  9 |  * its affiliates is strictly prohibited.
 10 |  */
 11 | 
 12 | #include "common.h"
 13 | #include "normal.h"
 14 | 
 15 | #define NORMAL_THRESHOLD 0.1f
 16 | 
 17 | //------------------------------------------------------------------------
 18 | // Perturb shading normal by tangent frame
 19 | 
 20 | __device__ vec3f fwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, bool opengl)
 21 | {
 22 |     vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm);
 23 |     vec3f smooth_bitng = safeNormalize(_smooth_bitng);
 24 |     vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f);
 25 |     return safeNormalize(_shading_nrm);
 26 | }
 27 | 
 28 | __device__ void bwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, vec3f &d_perturbed_nrm, vec3f &d_smooth_nrm, vec3f &d_smooth_tng, const vec3f d_out, bool opengl)
 29 | {
 30 |     ////////////////////////////////////////////////////////////////////////
 31 |     // FWD
 32 |     vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm);
 33 |     vec3f smooth_bitng = safeNormalize(_smooth_bitng);
 34 |     vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f);
 35 |         
 36 |     ////////////////////////////////////////////////////////////////////////
 37 |     // BWD
 38 |     vec3f d_shading_nrm(0);
 39 |     bwdSafeNormalize(_shading_nrm, d_shading_nrm, d_out);
 40 | 
 41 |     vec3f d_smooth_bitng(0);
 42 |     
 43 |     if (perturbed_nrm.z > 0.0f)
 44 |     {
 45 |         d_smooth_nrm += d_shading_nrm * perturbed_nrm.z;
 46 |         d_perturbed_nrm.z += sum(d_shading_nrm * smooth_nrm);
 47 |     }
 48 | 
 49 |     d_smooth_bitng += (opengl ? -1 : 1) * d_shading_nrm * perturbed_nrm.y;
 50 |     d_perturbed_nrm.y += (opengl ? -1 : 1) * sum(d_shading_nrm * smooth_bitng);
 51 | 
 52 |     d_smooth_tng += d_shading_nrm * perturbed_nrm.x;
 53 |     d_perturbed_nrm.x += sum(d_shading_nrm * smooth_tng);
 54 | 
 55 |     vec3f d__smooth_bitng(0);
 56 |     bwdSafeNormalize(_smooth_bitng, d__smooth_bitng, d_smooth_bitng);
 57 | 
 58 |     bwdCross(smooth_tng, smooth_nrm, d_smooth_tng, d_smooth_nrm, d__smooth_bitng);
 59 | }
 60 | 
 61 | //------------------------------------------------------------------------
 62 | #define bent_nrm_eps 0.001f
 63 | 
 64 | __device__ vec3f fwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm)
 65 | {
 66 |     float dp = dot(view_vec, smooth_nrm);
 67 |     float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f);
 68 |     return geom_nrm * (1.0f - t) + smooth_nrm * t;
 69 | }
 70 | 
 71 | __device__ void bwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm, vec3f& d_view_vec, vec3f& d_smooth_nrm, vec3f& d_geom_nrm, const vec3f d_out)
 72 | {
 73 |     ////////////////////////////////////////////////////////////////////////
 74 |     // FWD
 75 |     float dp = dot(view_vec, smooth_nrm);
 76 |     float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f);
 77 | 
 78 |     ////////////////////////////////////////////////////////////////////////
 79 |     // BWD
 80 |     if (dp > NORMAL_THRESHOLD)
 81 |         d_smooth_nrm += d_out;
 82 |     else
 83 |     {
 84 |         // geom_nrm * (1.0f - t) + smooth_nrm * t;
 85 |         d_geom_nrm   += d_out * (1.0f - t);
 86 |         d_smooth_nrm += d_out * t;
 87 |         float d_t = sum(d_out * (smooth_nrm - geom_nrm));
 88 | 
 89 |         float d_dp = dp < 0.0f || dp > NORMAL_THRESHOLD ? 0.0f : d_t / NORMAL_THRESHOLD;
 90 | 
 91 |         bwdDot(view_vec, smooth_nrm, d_view_vec, d_smooth_nrm, d_dp);
 92 |     }
 93 | }
 94 | 
 95 | //------------------------------------------------------------------------
 96 | // Kernels
 97 | 
 98 | __global__ void PrepareShadingNormalFwdKernel(PrepareShadingNormalKernelParams p) 
 99 | {
100 |     // Calculate pixel position.
101 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
102 |     unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
103 |     unsigned int pz = blockIdx.z;
104 |     if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
105 |         return;
106 | 
107 |     vec3f pos = p.pos.fetch3(px, py, pz);
108 |     vec3f view_pos = p.view_pos.fetch3(px, py, pz);
109 |     vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz);
110 |     vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz);
111 |     vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz);
112 |     vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz);
113 | 
114 |     vec3f smooth_nrm = safeNormalize(_smooth_nrm);
115 |     vec3f smooth_tng = safeNormalize(_smooth_tng);
116 |     vec3f view_vec = safeNormalize(view_pos - pos);
117 |     vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl);
118 | 
119 |     vec3f res;
120 |     if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f)
121 |         res = fwdBendNormal(view_vec, -shading_nrm, -geom_nrm);
122 |     else
123 |         res = fwdBendNormal(view_vec, shading_nrm, geom_nrm);
124 | 
125 |     p.out.store(px, py, pz, res);
126 | }
127 | 
128 | __global__ void PrepareShadingNormalBwdKernel(PrepareShadingNormalKernelParams p) 
129 | { 
130 |     // Calculate pixel position.
131 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
132 |     unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
133 |     unsigned int pz = blockIdx.z;
134 |     if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
135 |         return;
136 | 
137 |     vec3f pos = p.pos.fetch3(px, py, pz);
138 |     vec3f view_pos = p.view_pos.fetch3(px, py, pz);
139 |     vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz);
140 |     vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz);
141 |     vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz);
142 |     vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz);
143 |     vec3f d_out = p.out.fetch3(px, py, pz);
144 | 
145 |     ///////////////////////////////////////////////////////////////////////////////////////////////////
146 |     // FWD
147 | 
148 |     vec3f smooth_nrm = safeNormalize(_smooth_nrm);
149 |     vec3f smooth_tng = safeNormalize(_smooth_tng);
150 |     vec3f _view_vec = view_pos - pos;
151 |     vec3f view_vec = safeNormalize(view_pos - pos);
152 | 
153 |     vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl);
154 | 
155 |     ///////////////////////////////////////////////////////////////////////////////////////////////////
156 |     // BWD
157 | 
158 |     vec3f d_view_vec(0), d_shading_nrm(0), d_geom_nrm(0);
159 |     if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f)
160 |     {
161 |         bwdBendNormal(view_vec, -shading_nrm, -geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out);
162 |         d_shading_nrm = -d_shading_nrm;
163 |         d_geom_nrm = -d_geom_nrm;
164 |     }
165 |     else
166 |         bwdBendNormal(view_vec, shading_nrm, geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out);
167 | 
168 |     vec3f d_perturbed_nrm(0), d_smooth_nrm(0), d_smooth_tng(0);
169 |     bwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, d_perturbed_nrm, d_smooth_nrm, d_smooth_tng, d_shading_nrm, p.opengl);
170 | 
171 |     vec3f d__view_vec(0), d__smooth_nrm(0), d__smooth_tng(0);
172 |     bwdSafeNormalize(_view_vec, d__view_vec, d_view_vec);
173 |     bwdSafeNormalize(_smooth_nrm, d__smooth_nrm, d_smooth_nrm);
174 |     bwdSafeNormalize(_smooth_tng, d__smooth_tng, d_smooth_tng);
175 | 
176 |     p.pos.store_grad(px, py, pz, -d__view_vec);
177 |     p.view_pos.store_grad(px, py, pz, d__view_vec);
178 |     p.perturbed_nrm.store_grad(px, py, pz, d_perturbed_nrm);
179 |     p.smooth_nrm.store_grad(px, py, pz, d__smooth_nrm);
180 |     p.smooth_tng.store_grad(px, py, pz, d__smooth_tng);
181 |     p.geom_nrm.store_grad(px, py, pz, d_geom_nrm);
182 | }


--------------------------------------------------------------------------------
/render/renderutils/c_src/normal.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include "common.h"
15 | 
16 | struct PrepareShadingNormalKernelParams
17 | {
18 |     Tensor  pos;
19 |     Tensor  view_pos;
20 |     Tensor  perturbed_nrm;
21 |     Tensor  smooth_nrm;
22 |     Tensor  smooth_tng;
23 |     Tensor  geom_nrm;
24 |     Tensor  out;
25 |     dim3    gridSize;
26 |     bool    two_sided_shading, opengl;
27 | };
28 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/tensor.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | #if defined(__CUDACC__) && defined(BFLOAT16)
14 | #include <cuda_bf16.h> // bfloat16 is float32 compatible with less mantissa bits
15 | #endif
16 | 
17 | //---------------------------------------------------------------------------------
18 | // CUDA-side Tensor class for in/out parameter parsing. Can be float32 or bfloat16
19 | 
20 | struct Tensor
21 | {
22 |     void*   val;
23 |     void*   d_val;
24 |     int     dims[4], _dims[4];
25 |     int     strides[4];
26 |     bool    fp16;
27 | 
28 | #if defined(__CUDA__) && !defined(__CUDA_ARCH__)
29 |     Tensor() : val(nullptr), d_val(nullptr), fp16(true), dims{ 0, 0, 0, 0 }, _dims{ 0, 0, 0, 0 }, strides{ 0, 0, 0, 0 } {}
30 | #endif
31 | 
32 | #ifdef __CUDACC__
33 |     // Helpers to index and read/write a single element
34 |     __device__ inline int   _nhwcIndex(int n, int h, int w, int c) const { return n * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]; }
35 |     __device__ inline int   nhwcIndex(int n, int h, int w, int c) const { return (dims[0] == 1 ? 0 : n * strides[0]) + (dims[1] == 1 ? 0 : h * strides[1]) + (dims[2] == 1 ? 0 : w * strides[2]) + (dims[3] == 1 ? 0 : c * strides[3]); }
36 |     __device__ inline int   nhwcIndexContinuous(int n, int h, int w, int c) const { return ((n * _dims[1] + h) * _dims[2] + w) * _dims[3] + c; }
37 | #ifdef BFLOAT16
38 |     __device__ inline float fetch(unsigned int idx) const { return fp16 ? __bfloat162float(((__nv_bfloat16*)val)[idx]) : ((float*)val)[idx]; }
39 |     __device__ inline void  store(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)val)[idx] = __float2bfloat16(_val); else ((float*)val)[idx] = _val; }
40 |     __device__ inline void  store_grad(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)d_val)[idx] = __float2bfloat16(_val); else ((float*)d_val)[idx] = _val; }
41 | #else
42 |     __device__ inline float fetch(unsigned int idx) const { return ((float*)val)[idx]; }
43 |     __device__ inline void  store(unsigned int idx, float _val) { ((float*)val)[idx] = _val; }
44 |     __device__ inline void  store_grad(unsigned int idx, float _val) { ((float*)d_val)[idx] = _val; }
45 | #endif
46 | 
47 |     //////////////////////////////////////////////////////////////////////////////////////////
48 |     // Fetch, use broadcasting for tensor dimensions of size 1
49 |     __device__ inline float fetch1(unsigned int x, unsigned int y, unsigned int z) const
50 |     {
51 |         return fetch(nhwcIndex(z, y, x, 0));
52 |     }
53 | 
54 |     __device__ inline vec3f fetch3(unsigned int x, unsigned int y, unsigned int z) const
55 |     {
56 |         return vec3f(
57 |             fetch(nhwcIndex(z, y, x, 0)),
58 |             fetch(nhwcIndex(z, y, x, 1)),
59 |             fetch(nhwcIndex(z, y, x, 2))
60 |         );
61 |     }
62 | 
63 |     /////////////////////////////////////////////////////////////////////////////////////////////////////////////
64 |     // Store, no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside
65 |     __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, float _val)
66 |     {
67 |         store(_nhwcIndex(z, y, x, 0), _val);
68 |     }
69 | 
70 |     __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, vec3f _val)
71 |     {
72 |         store(_nhwcIndex(z, y, x, 0), _val.x);
73 |         store(_nhwcIndex(z, y, x, 1), _val.y);
74 |         store(_nhwcIndex(z, y, x, 2), _val.z);
75 |     }
76 | 
77 |     /////////////////////////////////////////////////////////////////////////////////////////////////////////////
78 |     // Store gradient , no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside
79 |     __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, float _val)
80 |     {
81 |         store_grad(nhwcIndexContinuous(z, y, x, 0), _val);
82 |     }
83 | 
84 |     __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, vec3f _val)
85 |     {
86 |         store_grad(nhwcIndexContinuous(z, y, x, 0), _val.x);
87 |         store_grad(nhwcIndexContinuous(z, y, x, 1), _val.y);
88 |         store_grad(nhwcIndexContinuous(z, y, x, 2), _val.z);
89 |     }
90 | #endif
91 | 
92 | };
93 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/vec3f.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 |  *
  4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 |  * property and proprietary rights in and to this material, related 
  6 |  * documentation and any modifications thereto. Any use, reproduction, 
  7 |  * disclosure or distribution of this material and related documentation
  8 |  * without an express license agreement from NVIDIA CORPORATION or 
  9 |  * its affiliates is strictly prohibited.
 10 |  */
 11 | 
 12 | #pragma once 
 13 | 
 14 | struct vec3f
 15 | {
 16 |     float x, y, z;
 17 | 
 18 | #ifdef __CUDACC__
 19 |     __device__ vec3f() { }
 20 |     __device__ vec3f(float v) { x = v; y = v; z = v; }
 21 |     __device__ vec3f(float _x, float _y, float _z) { x = _x; y = _y; z = _z; }
 22 |     __device__ vec3f(float3 v) { x = v.x; y = v.y; z = v.z; }
 23 | 
 24 |     __device__ inline vec3f& operator+=(const vec3f& b) { x += b.x; y += b.y; z += b.z; return *this; }
 25 |     __device__ inline vec3f& operator-=(const vec3f& b) { x -= b.x; y -= b.y; z -= b.z; return *this; }
 26 |     __device__ inline vec3f& operator*=(const vec3f& b) { x *= b.x; y *= b.y; z *= b.z; return *this; }
 27 |     __device__ inline vec3f& operator/=(const vec3f& b) { x /= b.x; y /= b.y; z /= b.z; return *this; }
 28 | #endif
 29 | };
 30 | 
 31 | #ifdef __CUDACC__
 32 | __device__ static inline vec3f operator+(const vec3f& a, const vec3f& b) { return vec3f(a.x + b.x, a.y + b.y, a.z + b.z); }
 33 | __device__ static inline vec3f operator-(const vec3f& a, const vec3f& b) { return vec3f(a.x - b.x, a.y - b.y, a.z - b.z); }
 34 | __device__ static inline vec3f operator*(const vec3f& a, const vec3f& b) { return vec3f(a.x * b.x, a.y * b.y, a.z * b.z); }
 35 | __device__ static inline vec3f operator/(const vec3f& a, const vec3f& b) { return vec3f(a.x / b.x, a.y / b.y, a.z / b.z); }
 36 | __device__ static inline vec3f operator-(const vec3f& a) { return vec3f(-a.x, -a.y, -a.z); }
 37 | 
 38 | __device__ static inline float sum(vec3f a)
 39 | {
 40 |     return a.x + a.y + a.z;
 41 | }
 42 | 
 43 | __device__ static inline vec3f cross(vec3f a, vec3f b)
 44 | {
 45 |     vec3f out;
 46 |     out.x = a.y * b.z - a.z * b.y;
 47 |     out.y = a.z * b.x - a.x * b.z;
 48 |     out.z = a.x * b.y - a.y * b.x;
 49 |     return out;
 50 | }
 51 | 
 52 | __device__ static inline void bwdCross(vec3f a, vec3f b, vec3f &d_a, vec3f &d_b, vec3f d_out)
 53 | {
 54 |     d_a.x += d_out.z * b.y - d_out.y * b.z;
 55 |     d_a.y += d_out.x * b.z - d_out.z * b.x;
 56 |     d_a.z += d_out.y * b.x - d_out.x * b.y;
 57 | 
 58 |     d_b.x += d_out.y * a.z - d_out.z * a.y;
 59 |     d_b.y += d_out.z * a.x - d_out.x * a.z;
 60 |     d_b.z += d_out.x * a.y - d_out.y * a.x;
 61 | }
 62 | 
 63 | __device__ static inline float dot(vec3f a, vec3f b)
 64 | {
 65 |     return a.x * b.x + a.y * b.y + a.z * b.z;
 66 | }
 67 | 
 68 | __device__ static inline void bwdDot(vec3f a, vec3f b, vec3f& d_a, vec3f& d_b, float d_out)
 69 | {
 70 |     d_a.x += d_out * b.x; d_a.y += d_out * b.y; d_a.z += d_out * b.z;
 71 |     d_b.x += d_out * a.x; d_b.y += d_out * a.y; d_b.z += d_out * a.z;
 72 | }
 73 | 
 74 | __device__ static inline vec3f reflect(vec3f x, vec3f n)
 75 | {
 76 |     return n * 2.0f * dot(n, x) - x;
 77 | }
 78 | 
 79 | __device__ static inline void bwdReflect(vec3f x, vec3f n, vec3f& d_x, vec3f& d_n, const vec3f d_out)
 80 | {
 81 |     d_x.x += d_out.x * (2 * n.x * n.x - 1) + d_out.y * (2 * n.x * n.y) + d_out.z * (2 * n.x * n.z);
 82 |     d_x.y += d_out.x * (2 * n.x * n.y) + d_out.y * (2 * n.y * n.y - 1) + d_out.z * (2 * n.y * n.z);
 83 |     d_x.z += d_out.x * (2 * n.x * n.z) + d_out.y * (2 * n.y * n.z) + d_out.z * (2 * n.z * n.z - 1);
 84 | 
 85 |     d_n.x += d_out.x * (2 * (2 * n.x * x.x + n.y * x.y + n.z * x.z)) + d_out.y * (2 * n.y * x.x) + d_out.z * (2 * n.z * x.x);
 86 |     d_n.y += d_out.x * (2 * n.x * x.y) + d_out.y * (2 * (n.x * x.x + 2 * n.y * x.y + n.z * x.z)) + d_out.z * (2 * n.z * x.y);
 87 |     d_n.z += d_out.x * (2 * n.x * x.z) + d_out.y * (2 * n.y * x.z) + d_out.z * (2 * (n.x * x.x + n.y * x.y + 2 * n.z * x.z));
 88 | }
 89 | 
 90 | __device__ static inline vec3f safeNormalize(vec3f v)
 91 | {
 92 |     float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
 93 |     return l > 0.0f ? (v / l) : vec3f(0.0f);
 94 | }
 95 | 
 96 | __device__ static inline void bwdSafeNormalize(const vec3f v, vec3f& d_v, const vec3f d_out)
 97 | {
 98 | 
 99 |     float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
100 |     if (l > 0.0f)
101 |     {
102 |         float fac = 1.0 / powf(v.x * v.x + v.y * v.y + v.z * v.z, 1.5f);
103 |         d_v.x += (d_out.x * (v.y * v.y + v.z * v.z) - d_out.y * (v.x * v.y) - d_out.z * (v.x * v.z)) * fac;
104 |         d_v.y += (d_out.y * (v.x * v.x + v.z * v.z) - d_out.x * (v.y * v.x) - d_out.z * (v.y * v.z)) * fac;
105 |         d_v.z += (d_out.z * (v.x * v.x + v.y * v.y) - d_out.x * (v.z * v.x) - d_out.y * (v.z * v.y)) * fac;
106 |     }
107 | }
108 | 
109 | #endif


--------------------------------------------------------------------------------
/render/renderutils/c_src/vec4f.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once 
13 | 
14 | struct vec4f
15 | {
16 |     float x, y, z, w;
17 | 
18 | #ifdef __CUDACC__
19 |     __device__ vec4f() { }
20 |     __device__ vec4f(float v) { x = v; y = v; z = v; w = v; }
21 |     __device__ vec4f(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; }
22 |     __device__ vec4f(float4 v) { x = v.x; y = v.y; z = v.z; w = v.w; }
23 | #endif
24 | };
25 | 
26 | 


--------------------------------------------------------------------------------
/render/renderutils/loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import torch
11 | 
12 | #----------------------------------------------------------------------------
13 | # HDR image losses
14 | #----------------------------------------------------------------------------
15 | 
16 | def _tonemap_srgb(f, exposure=5):
17 |     f = f * exposure
18 |     return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
19 | 
20 | def _SMAPE(img, target, eps=0.01):
21 |     nom = torch.abs(img - target)
22 |     denom = torch.abs(img) + torch.abs(target) + 0.01
23 |     return torch.mean(nom / denom)
24 | 
25 | def _RELMSE(img, target, eps=0.1):
26 |     nom = (img - target) * (img - target)
27 |     denom = img * img + target * target + 0.1 
28 |     return torch.mean(nom / denom)
29 | 
30 | def image_loss_fn(img, target, loss, tonemapper):
31 |     if tonemapper == 'log_srgb':
32 |         img    = _tonemap_srgb(torch.log(torch.clamp(img, min=0, max=65535) + 1))
33 |         target = _tonemap_srgb(torch.log(torch.clamp(target, min=0, max=65535) + 1))
34 | 
35 |     if loss == 'mse':
36 |         return torch.nn.functional.mse_loss(img, target)
37 |     elif loss == 'smape':
38 |         return _SMAPE(img, target)
39 |     elif loss == 'relmse':
40 |         return _RELMSE(img, target)
41 |     else:
42 |         return torch.nn.functional.l1_loss(img, target)
43 | 


--------------------------------------------------------------------------------
/render/renderutils/tests/test_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import torch
11 | 
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 | 
17 | RES = 8
18 | DTYPE = torch.float32
19 | 
20 | def tonemap_srgb(f):
21 |     return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
22 | 
23 | def l1(output, target):
24 |     x = torch.clamp(output, min=0, max=65535)
25 |     r = torch.clamp(target, min=0, max=65535)
26 |     x = tonemap_srgb(torch.log(x + 1))
27 |     r = tonemap_srgb(torch.log(r + 1))
28 |     return torch.nn.functional.l1_loss(x,r)
29 | 
30 | def relative_loss(name, ref, cuda):
31 | 	ref = ref.float()
32 | 	cuda = cuda.float()
33 | 	print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item())
34 | 
35 | def test_loss(loss, tonemapper):
36 | 	img_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
37 | 	img_ref = img_cuda.clone().detach().requires_grad_(True)
38 | 	target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
39 | 	target_ref = target_cuda.clone().detach().requires_grad_(True)
40 | 
41 | 	ref_loss = ru.image_loss(img_ref, target_ref, loss=loss, tonemapper=tonemapper, use_python=True)
42 | 	ref_loss.backward()
43 | 
44 | 	cuda_loss = ru.image_loss(img_cuda, target_cuda, loss=loss, tonemapper=tonemapper)
45 | 	cuda_loss.backward()
46 | 
47 | 	print("-------------------------------------------------------------")
48 | 	print("    Loss: %s, %s" % (loss, tonemapper))
49 | 	print("-------------------------------------------------------------")
50 | 
51 | 	relative_loss("res:", ref_loss, cuda_loss)
52 | 	relative_loss("img:", img_ref.grad, img_cuda.grad)
53 | 	relative_loss("target:", target_ref.grad, target_cuda.grad)
54 | 
55 | 
56 | test_loss('l1', 'none')
57 | test_loss('l1', 'log_srgb')
58 | test_loss('mse', 'log_srgb')
59 | test_loss('smape', 'none')
60 | test_loss('relmse', 'none')
61 | test_loss('mse', 'none')


--------------------------------------------------------------------------------
/render/renderutils/tests/test_mesh.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import torch
11 | 
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 | 
17 | BATCH = 8
18 | RES = 1024
19 | DTYPE = torch.float32
20 | 
21 | torch.manual_seed(0)
22 | 
23 | def tonemap_srgb(f):
24 |     return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
25 | 
26 | def l1(output, target):
27 |     x = torch.clamp(output, min=0, max=65535)
28 |     r = torch.clamp(target, min=0, max=65535)
29 |     x = tonemap_srgb(torch.log(x + 1))
30 |     r = tonemap_srgb(torch.log(r + 1))
31 |     return torch.nn.functional.l1_loss(x,r)
32 | 
33 | def relative_loss(name, ref, cuda):
34 | 	ref = ref.float()
35 | 	cuda = cuda.float()
36 | 	print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref)).item())
37 | 
38 | def test_xfm_points():
39 | 	points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
40 | 	points_ref = points_cuda.clone().detach().requires_grad_(True)
41 | 	mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False)
42 | 	mtx_ref = mtx_cuda.clone().detach().requires_grad_(True)
43 | 	target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True)
44 | 
45 | 	ref_out = ru.xfm_points(points_ref, mtx_ref, use_python=True)
46 | 	ref_loss = torch.nn.MSELoss()(ref_out, target)
47 | 	ref_loss.backward()
48 | 
49 | 	cuda_out = ru.xfm_points(points_cuda, mtx_cuda)
50 | 	cuda_loss = torch.nn.MSELoss()(cuda_out, target)
51 | 	cuda_loss.backward()
52 | 
53 | 	print("-------------------------------------------------------------")
54 | 
55 | 	relative_loss("res:", ref_out, cuda_out)
56 | 	relative_loss("points:", points_ref.grad, points_cuda.grad)
57 | 
58 | def test_xfm_vectors():
59 | 	points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
60 | 	points_ref = points_cuda.clone().detach().requires_grad_(True)
61 | 	points_cuda_p = points_cuda.clone().detach().requires_grad_(True)
62 | 	points_ref_p = points_cuda.clone().detach().requires_grad_(True)
63 | 	mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False)
64 | 	mtx_ref = mtx_cuda.clone().detach().requires_grad_(True)
65 | 	target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True)
66 | 
67 | 	ref_out = ru.xfm_vectors(points_ref.contiguous(), mtx_ref, use_python=True)
68 | 	ref_loss = torch.nn.MSELoss()(ref_out, target[..., 0:3])
69 | 	ref_loss.backward()
70 | 
71 | 	cuda_out = ru.xfm_vectors(points_cuda.contiguous(), mtx_cuda)
72 | 	cuda_loss = torch.nn.MSELoss()(cuda_out, target[..., 0:3])
73 | 	cuda_loss.backward()
74 | 
75 | 	ref_out_p = ru.xfm_points(points_ref_p.contiguous(), mtx_ref, use_python=True)
76 | 	ref_loss_p = torch.nn.MSELoss()(ref_out_p, target)
77 | 	ref_loss_p.backward()
78 | 	
79 | 	cuda_out_p = ru.xfm_points(points_cuda_p.contiguous(), mtx_cuda)
80 | 	cuda_loss_p = torch.nn.MSELoss()(cuda_out_p, target)
81 | 	cuda_loss_p.backward()
82 | 
83 | 	print("-------------------------------------------------------------")
84 | 
85 | 	relative_loss("res:", ref_out, cuda_out)
86 | 	relative_loss("points:", points_ref.grad, points_cuda.grad)
87 | 	relative_loss("points_p:", points_ref_p.grad, points_cuda_p.grad)
88 | 
89 | test_xfm_points()
90 | test_xfm_vectors()
91 | 


--------------------------------------------------------------------------------
/render/renderutils/tests/test_perf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import torch
11 | 
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 | 
17 | DTYPE=torch.float32
18 | 
19 | def test_bsdf(BATCH, RES, ITR):
20 | 	kd_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
21 | 	kd_ref = kd_cuda.clone().detach().requires_grad_(True)
22 | 	arm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
23 | 	arm_ref = arm_cuda.clone().detach().requires_grad_(True)
24 | 	pos_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
25 | 	pos_ref = pos_cuda.clone().detach().requires_grad_(True)
26 | 	nrm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
27 | 	nrm_ref = nrm_cuda.clone().detach().requires_grad_(True)
28 | 	view_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
29 | 	view_ref = view_cuda.clone().detach().requires_grad_(True)
30 | 	light_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
31 | 	light_ref = light_cuda.clone().detach().requires_grad_(True)
32 | 	target = torch.rand(BATCH, RES, RES, 3, device='cuda')
33 | 
34 | 	start = torch.cuda.Event(enable_timing=True)
35 | 	end = torch.cuda.Event(enable_timing=True)
36 | 
37 | 	ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
38 | 
39 | 	print("--- Testing: [%d, %d, %d] ---" % (BATCH, RES, RES))
40 | 
41 | 	start.record()
42 | 	for i in range(ITR):
43 | 		ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True)
44 | 	end.record()
45 | 	torch.cuda.synchronize()
46 | 	print("Pbr BSDF python:", start.elapsed_time(end))
47 | 
48 | 	start.record()
49 | 	for i in range(ITR):
50 | 		cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
51 | 	end.record()
52 | 	torch.cuda.synchronize()
53 | 	print("Pbr BSDF cuda:", start.elapsed_time(end))
54 | 
55 | test_bsdf(1, 512, 1000)
56 | test_bsdf(16, 512, 1000)
57 | test_bsdf(1, 2048, 1000)
58 | 


--------------------------------------------------------------------------------
/script/__pycache__/connet_face_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/script/__pycache__/connet_face_head.cpython-38.pyc


--------------------------------------------------------------------------------
/script/__pycache__/get_tet_smpl.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/script/__pycache__/get_tet_smpl.cpython-38.pyc


--------------------------------------------------------------------------------
/script/__pycache__/process_body_cloth_head_msdfcut.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/script/__pycache__/process_body_cloth_head_msdfcut.cpython-38.pyc


--------------------------------------------------------------------------------
/script/get_tet_smpl.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import pyvista as pv
 4 | import pytetwild
 5 | 
 6 | import numpy as np
 7 | import tetgen
 8 | 
 9 | def get_tet_mesh(mesh_path, save_npz_path):
10 | 
11 |     surface_mesh = pv.read(mesh_path)
12 |     tet = tetgen.TetGen(surface_mesh)
13 |     tet.make_manifold(verbose=True)
14 |     tet_grid_volume=6e-3
15 |     vertices, indices = tet.tetrahedralize( fixedvolume=1, 
16 |                                         maxvolume=tet_grid_volume, 
17 |                                         regionattrib=1, 
18 |                                         nobisect=False, steinerleft=-1, order=1, metric=1, meditview=1, nonodewritten=0, verbose=2)
19 |         # shell = tet.grid.extract_surface()
20 |     # tet_path = "tet_smpl2.obj"
21 |     # vertices = vertices.to(np.float64)
22 |     vertices = vertices.astype(np.float32)
23 |     tet_path = save_npz_path.replace("npz", "obj")
24 |     save_tet_mesh_as_obj(vertices, indices, tet_path)
25 |     np.savez(save_npz_path, v=vertices, f=indices)
26 | 
27 |     return vertices, indices
28 | 
29 | 
30 | def get_tet_mesh_test(mesh_path, save_npz_path):
31 | 
32 |     surface_mesh = pv.read(mesh_path)
33 |     tetrahedral_mesh = pytetwild.tetrahedralize_pv(surface_mesh, edge_length_fac=0.1)
34 |     tetrahedral_mesh.explode(1).plot(show_edges=True)
35 | 
36 |     v = tetrahedral_mesh.points
37 |     f = tetrahedral_mesh.cells.reshape(-1, 5)[:, 1:]
38 | 
39 |     np.savez(save_npz_path, v=v, f=f)
40 | 
41 |     return v, f
42 |     
43 | 
44 | def save_tet_mesh_as_obj(vertices, tetrahedra, filename):
45 |     with open(filename, 'w') as f:
46 |         for vertex in vertices:
47 |             f.write(f"v {vertex[0]} {vertex[1]} {vertex[2]}\n")
48 |         
49 |         if tetrahedra is not None:
50 |             for tet in tetrahedra:
51 |                 f.write(f"f {tet[0]+1} {tet[1]+1} {tet[2]+1} {tet[3]+1}\n")
52 | 
53 | 


--------------------------------------------------------------------------------
/ssim_loss.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import torch
13 | import torch.nn.functional as F
14 | from torch.autograd import Variable
15 | from math import exp
16 | 
17 | def l1_loss(network_output, gt):
18 |     return torch.abs((network_output - gt)).mean()
19 | 
20 | def l2_loss(network_output, gt):
21 |     return ((network_output - gt) ** 2).mean()
22 | 
23 | def gaussian(window_size, sigma):
24 |     gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
25 |     return gauss / gauss.sum()
26 | 
27 | def create_window(window_size, channel):
28 |     _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
29 |     _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
30 |     window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
31 |     return window
32 | 
33 | def ssim(img1, img2, window_size=11, size_average=True):
34 |     channel = img1.size(-3)
35 |     window = create_window(window_size, channel)
36 | 
37 |     if img1.is_cuda:
38 |         window = window.cuda(img1.get_device())
39 |     window = window.type_as(img1)
40 | 
41 |     return _ssim(img1, img2, window, window_size, channel, size_average)
42 | 
43 | def _ssim(img1, img2, window, window_size, channel, size_average=True):
44 |     mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
45 |     mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
46 | 
47 |     mu1_sq = mu1.pow(2)
48 |     mu2_sq = mu2.pow(2)
49 |     mu1_mu2 = mu1 * mu2
50 | 
51 |     sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
52 |     sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
53 |     sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
54 | 
55 |     C1 = 0.01 ** 2
56 |     C2 = 0.03 ** 2
57 | 
58 |     ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
59 | 
60 |     if size_average:
61 |         return ssim_map.mean()
62 |     else:
63 |         return ssim_map.mean(1).mean(1).mean(1)
64 | 
65 | 


--------------------------------------------------------------------------------
/third_parties/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/__init__.py


--------------------------------------------------------------------------------
/third_parties/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/third_parties/lpips/__init__.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | # from torch.autograd import Variable
  9 | 
 10 | from lpips.trainer import *
 11 | from lpips.lpips import *
 12 | 
 13 | def normalize_tensor(in_feat,eps=1e-10):
 14 |     norm_factor = torch.sqrt(torch.sum(in_feat**2,dim=1,keepdim=True))
 15 |     return in_feat/(norm_factor+eps)
 16 | 
 17 | def l2(p0, p1, range=255.):
 18 |     return .5*np.mean((p0 / range - p1 / range)**2)
 19 | 
 20 | def psnr(p0, p1, peak=255.):
 21 |     return 10*np.log10(peak**2/np.mean((1.*p0-1.*p1)**2))
 22 | 
 23 | def dssim(p0, p1, range=255.):
 24 |     from skimage.measure import compare_ssim
 25 |     return (1 - compare_ssim(p0, p1, data_range=range, multichannel=True)) / 2.
 26 | 
 27 | def rgb2lab(in_img,mean_cent=False):
 28 |     from skimage import color
 29 |     img_lab = color.rgb2lab(in_img)
 30 |     if(mean_cent):
 31 |         img_lab[:,:,0] = img_lab[:,:,0]-50
 32 |     return img_lab
 33 | 
 34 | def tensor2np(tensor_obj):
 35 |     # change dimension of a tensor object into a numpy array
 36 |     return tensor_obj[0].cpu().float().numpy().transpose((1,2,0))
 37 | 
 38 | def np2tensor(np_obj):
 39 |      # change dimenion of np array into tensor array
 40 |     return torch.Tensor(np_obj[:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
 41 | 
 42 | def tensor2tensorlab(image_tensor,to_norm=True,mc_only=False):
 43 |     # image tensor to lab tensor
 44 |     from skimage import color
 45 | 
 46 |     img = tensor2im(image_tensor)
 47 |     img_lab = color.rgb2lab(img)
 48 |     if(mc_only):
 49 |         img_lab[:,:,0] = img_lab[:,:,0]-50
 50 |     if(to_norm and not mc_only):
 51 |         img_lab[:,:,0] = img_lab[:,:,0]-50
 52 |         img_lab = img_lab/100.
 53 | 
 54 |     return np2tensor(img_lab)
 55 | 
 56 | def tensorlab2tensor(lab_tensor,return_inbnd=False):
 57 |     from skimage import color
 58 |     import warnings
 59 |     warnings.filterwarnings("ignore")
 60 | 
 61 |     lab = tensor2np(lab_tensor)*100.
 62 |     lab[:,:,0] = lab[:,:,0]+50
 63 | 
 64 |     rgb_back = 255.*np.clip(color.lab2rgb(lab.astype('float')),0,1)
 65 |     if(return_inbnd):
 66 |         # convert back to lab, see if we match
 67 |         lab_back = color.rgb2lab(rgb_back.astype('uint8'))
 68 |         mask = 1.*np.isclose(lab_back,lab,atol=2.)
 69 |         mask = np2tensor(np.prod(mask,axis=2)[:,:,np.newaxis])
 70 |         return (im2tensor(rgb_back),mask)
 71 |     else:
 72 |         return im2tensor(rgb_back)
 73 | 
 74 | def load_image(path):
 75 |     if(path[-3:] == 'dng'):
 76 |         import rawpy
 77 |         with rawpy.imread(path) as raw:
 78 |             img = raw.postprocess()
 79 |     elif(path[-3:]=='bmp' or path[-3:]=='jpg' or path[-3:]=='png' or path[-4:]=='jpeg'):
 80 |         import cv2
 81 |         return cv2.imread(path)[:,:,::-1]
 82 |     else:
 83 |         img = (255*plt.imread(path)[:,:,:3]).astype('uint8')
 84 | 
 85 |     return img
 86 | 
 87 | def rgb2lab(input):
 88 |     from skimage import color
 89 |     return color.rgb2lab(input / 255.)
 90 | 
 91 | def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.):
 92 |     image_numpy = image_tensor[0].cpu().float().numpy()
 93 |     image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
 94 |     return image_numpy.astype(imtype)
 95 | 
 96 | def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.):
 97 |     return torch.Tensor((image / factor - cent)
 98 |                         [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
 99 | 
100 | def tensor2vec(vector_tensor):
101 |     return vector_tensor.data.cpu().numpy()[:, :, 0, 0]
102 | 
103 | 
104 | def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.):
105 | # def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=1.):
106 |     image_numpy = image_tensor[0].cpu().float().numpy()
107 |     image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
108 |     return image_numpy.astype(imtype)
109 | 
110 | def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.):
111 | # def im2tensor(image, imtype=np.uint8, cent=1., factor=1.):
112 |     return torch.Tensor((image / factor - cent)
113 |                         [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
114 | 
115 | 
116 | 
117 | def voc_ap(rec, prec, use_07_metric=False):
118 |     """ ap = voc_ap(rec, prec, [use_07_metric])
119 |     Compute VOC AP given precision and recall.
120 |     If use_07_metric is true, uses the
121 |     VOC 07 11 point method (default:False).
122 |     """
123 |     if use_07_metric:
124 |         # 11 point metric
125 |         ap = 0.
126 |         for t in np.arange(0., 1.1, 0.1):
127 |             if np.sum(rec >= t) == 0:
128 |                 p = 0
129 |             else:
130 |                 p = np.max(prec[rec >= t])
131 |             ap = ap + p / 11.
132 |     else:
133 |         # correct AP calculation
134 |         # first append sentinel values at the end
135 |         mrec = np.concatenate(([0.], rec, [1.]))
136 |         mpre = np.concatenate(([0.], prec, [0.]))
137 | 
138 |         # compute the precision envelope
139 |         for i in range(mpre.size - 1, 0, -1):
140 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
141 | 
142 |         # to calculate area under PR curve, look for points
143 |         # where X axis (recall) changes value
144 |         i = np.where(mrec[1:] != mrec[:-1])[0]
145 | 
146 |         # and sum (\Delta recall) * prec
147 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
148 |     return ap
149 | 
150 | 


--------------------------------------------------------------------------------
/third_parties/lpips/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/third_parties/lpips/pretrained_networks.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import torch
  3 | from torchvision import models as tv
  4 | 
  5 | class squeezenet(torch.nn.Module):
  6 |     def __init__(self, requires_grad=False, pretrained=True):
  7 |         super(squeezenet, self).__init__()
  8 |         pretrained_features = tv.squeezenet1_1(pretrained=pretrained).features
  9 |         self.slice1 = torch.nn.Sequential()
 10 |         self.slice2 = torch.nn.Sequential()
 11 |         self.slice3 = torch.nn.Sequential()
 12 |         self.slice4 = torch.nn.Sequential()
 13 |         self.slice5 = torch.nn.Sequential()
 14 |         self.slice6 = torch.nn.Sequential()
 15 |         self.slice7 = torch.nn.Sequential()
 16 |         self.N_slices = 7
 17 |         for x in range(2):
 18 |             self.slice1.add_module(str(x), pretrained_features[x])
 19 |         for x in range(2,5):
 20 |             self.slice2.add_module(str(x), pretrained_features[x])
 21 |         for x in range(5, 8):
 22 |             self.slice3.add_module(str(x), pretrained_features[x])
 23 |         for x in range(8, 10):
 24 |             self.slice4.add_module(str(x), pretrained_features[x])
 25 |         for x in range(10, 11):
 26 |             self.slice5.add_module(str(x), pretrained_features[x])
 27 |         for x in range(11, 12):
 28 |             self.slice6.add_module(str(x), pretrained_features[x])
 29 |         for x in range(12, 13):
 30 |             self.slice7.add_module(str(x), pretrained_features[x])
 31 |         if not requires_grad:
 32 |             for param in self.parameters():
 33 |                 param.requires_grad = False
 34 | 
 35 |     def forward(self, X):
 36 |         h = self.slice1(X)
 37 |         h_relu1 = h
 38 |         h = self.slice2(h)
 39 |         h_relu2 = h
 40 |         h = self.slice3(h)
 41 |         h_relu3 = h
 42 |         h = self.slice4(h)
 43 |         h_relu4 = h
 44 |         h = self.slice5(h)
 45 |         h_relu5 = h
 46 |         h = self.slice6(h)
 47 |         h_relu6 = h
 48 |         h = self.slice7(h)
 49 |         h_relu7 = h
 50 |         vgg_outputs = namedtuple("SqueezeOutputs", ['relu1','relu2','relu3','relu4','relu5','relu6','relu7'])
 51 |         out = vgg_outputs(h_relu1,h_relu2,h_relu3,h_relu4,h_relu5,h_relu6,h_relu7)
 52 | 
 53 |         return out
 54 | 
 55 | 
 56 | class alexnet(torch.nn.Module):
 57 |     def __init__(self, requires_grad=False, pretrained=True):
 58 |         super(alexnet, self).__init__()
 59 |         weights = tv.AlexNet_Weights.DEFAULT if pretrained else None
 60 |         alexnet_pretrained_features = tv.alexnet(weights=weights).features
 61 |         self.slice1 = torch.nn.Sequential()
 62 |         self.slice2 = torch.nn.Sequential()
 63 |         self.slice3 = torch.nn.Sequential()
 64 |         self.slice4 = torch.nn.Sequential()
 65 |         self.slice5 = torch.nn.Sequential()
 66 |         self.N_slices = 5
 67 |         for x in range(2):
 68 |             self.slice1.add_module(str(x), alexnet_pretrained_features[x])
 69 |         for x in range(2, 5):
 70 |             self.slice2.add_module(str(x), alexnet_pretrained_features[x])
 71 |         for x in range(5, 8):
 72 |             self.slice3.add_module(str(x), alexnet_pretrained_features[x])
 73 |         for x in range(8, 10):
 74 |             self.slice4.add_module(str(x), alexnet_pretrained_features[x])
 75 |         for x in range(10, 12):
 76 |             self.slice5.add_module(str(x), alexnet_pretrained_features[x])
 77 |         if not requires_grad:
 78 |             for param in self.parameters():
 79 |                 param.requires_grad = False
 80 | 
 81 |     def forward(self, X):
 82 |         h = self.slice1(X)
 83 |         h_relu1 = h
 84 |         h = self.slice2(h)
 85 |         h_relu2 = h
 86 |         h = self.slice3(h)
 87 |         h_relu3 = h
 88 |         h = self.slice4(h)
 89 |         h_relu4 = h
 90 |         h = self.slice5(h)
 91 |         h_relu5 = h
 92 |         alexnet_outputs = namedtuple("AlexnetOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5'])
 93 |         out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5)
 94 | 
 95 |         return out
 96 | 
 97 | class vgg16(torch.nn.Module):
 98 |     def __init__(self, requires_grad=False, pretrained=True):
 99 |         super(vgg16, self).__init__()
100 |         weights = tv.VGG16_Weights.DEFAULT if pretrained else None
101 |         vgg_pretrained_features = tv.vgg16(weights=weights).features
102 |         self.slice1 = torch.nn.Sequential()
103 |         self.slice2 = torch.nn.Sequential()
104 |         self.slice3 = torch.nn.Sequential()
105 |         self.slice4 = torch.nn.Sequential()
106 |         self.slice5 = torch.nn.Sequential()
107 |         self.N_slices = 5
108 |         for x in range(4):
109 |             self.slice1.add_module(str(x), vgg_pretrained_features[x])
110 |         for x in range(4, 9):
111 |             self.slice2.add_module(str(x), vgg_pretrained_features[x])
112 |         for x in range(9, 16):
113 |             self.slice3.add_module(str(x), vgg_pretrained_features[x])
114 |         for x in range(16, 23):
115 |             self.slice4.add_module(str(x), vgg_pretrained_features[x])
116 |         for x in range(23, 30):
117 |             self.slice5.add_module(str(x), vgg_pretrained_features[x])
118 |         if not requires_grad:
119 |             for param in self.parameters():
120 |                 param.requires_grad = False
121 | 
122 |     def forward(self, X):
123 |         h = self.slice1(X)
124 |         h_relu1_2 = h
125 |         h = self.slice2(h)
126 |         h_relu2_2 = h
127 |         h = self.slice3(h)
128 |         h_relu3_3 = h
129 |         h = self.slice4(h)
130 |         h_relu4_3 = h
131 |         h = self.slice5(h)
132 |         h_relu5_3 = h
133 |         vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'])
134 |         out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
135 | 
136 |         return out
137 | 
138 | 
139 | 
140 | class resnet(torch.nn.Module):
141 |     def __init__(self, requires_grad=False, pretrained=True, num=18):
142 |         super(resnet, self).__init__()
143 |         if(num==18):
144 |             self.net = tv.resnet18(pretrained=pretrained)
145 |         elif(num==34):
146 |             self.net = tv.resnet34(pretrained=pretrained)
147 |         elif(num==50):
148 |             self.net = tv.resnet50(pretrained=pretrained)
149 |         elif(num==101):
150 |             self.net = tv.resnet101(pretrained=pretrained)
151 |         elif(num==152):
152 |             self.net = tv.resnet152(pretrained=pretrained)
153 |         self.N_slices = 5
154 | 
155 |         self.conv1 = self.net.conv1
156 |         self.bn1 = self.net.bn1
157 |         self.relu = self.net.relu
158 |         self.maxpool = self.net.maxpool
159 |         self.layer1 = self.net.layer1
160 |         self.layer2 = self.net.layer2
161 |         self.layer3 = self.net.layer3
162 |         self.layer4 = self.net.layer4
163 | 
164 |     def forward(self, X):
165 |         h = self.conv1(X)
166 |         h = self.bn1(h)
167 |         h = self.relu(h)
168 |         h_relu1 = h
169 |         h = self.maxpool(h)
170 |         h = self.layer1(h)
171 |         h_conv2 = h
172 |         h = self.layer2(h)
173 |         h_conv3 = h
174 |         h = self.layer3(h)
175 |         h_conv4 = h
176 |         h = self.layer4(h)
177 |         h_conv5 = h
178 | 
179 |         outputs = namedtuple("Outputs", ['relu1','conv2','conv3','conv4','conv5'])
180 |         out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5)
181 | 
182 |         return out
183 | 


--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.0/alex.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.0/alex.pth


--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.0/squeeze.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.0/squeeze.pth


--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.0/vgg.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.0/vgg.pth


--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.1/alex.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.1/alex.pth


--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.1/squeeze.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.1/squeeze.pth


--------------------------------------------------------------------------------
/third_parties/lpips/weights/v0.1/vgg.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/lpips/weights/v0.1/vgg.pth


--------------------------------------------------------------------------------
/third_parties/pytorch3d/__init__.py:
--------------------------------------------------------------------------------
1 | from . import ops
2 | 


--------------------------------------------------------------------------------
/third_parties/pytorch3d/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/pytorch3d/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/third_parties/pytorch3d/__pycache__/ops.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USTC3DV/D3Human-code/8841b45bc8adb3d0790d68164ffa1f1fee140f5a/third_parties/pytorch3d/__pycache__/ops.cpython-38.pyc


--------------------------------------------------------------------------------
/third_parties/pytorch3d/cuda/knn.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  * All rights reserved.
  4 |  *
  5 |  * This source code is licensed under the BSD-style license found in the
  6 |  * LICENSE file in the root directory of this source tree.
  7 |  */
  8 | 
  9 | #pragma once
 10 | #include <torch/extension.h>
 11 | #include <tuple>
 12 | #include "utils/pytorch3d_cutils.h"
 13 | #define WITH_CUDA true
 14 | 
 15 | // Compute indices of K nearest neighbors in pointcloud p2 to points
 16 | // in pointcloud p1.
 17 | //
 18 | // Args:
 19 | //    p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each
 20 | //        containing P1 points of dimension D.
 21 | //    p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each
 22 | //        containing P2 points of dimension D.
 23 | //    lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud.
 24 | //    lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud.
 25 | //    norm: int specifying the norm for the distance (1 for L1, 2 for L2)
 26 | //    K: int giving the number of nearest points to return.
 27 | //    version: Integer telling which implementation to use.
 28 | //
 29 | // Returns:
 30 | //    p1_neighbor_idx: LongTensor of shape (N, P1, K), where
 31 | //        p1_neighbor_idx[n, i, k] = j means that the kth nearest
 32 | //        neighbor to p1[n, i] in the cloud p2[n] is p2[n, j].
 33 | //        It is padded with zeros so that it can be used easily in a later
 34 | //        gather() operation.
 35 | //
 36 | //    p1_neighbor_dists: FloatTensor of shape (N, P1, K) containing the squared
 37 | //        distance from each point p1[n, p, :] to its K neighbors
 38 | //        p2[n, p1_neighbor_idx[n, p, k], :].
 39 | 
 40 | // CPU implementation.
 41 | std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCpu(
 42 |     const at::Tensor& p1,
 43 |     const at::Tensor& p2,
 44 |     const at::Tensor& lengths1,
 45 |     const at::Tensor& lengths2,
 46 |     const int norm,
 47 |     const int K);
 48 | 
 49 | // CUDA implementation
 50 | std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCuda(
 51 |     const at::Tensor& p1,
 52 |     const at::Tensor& p2,
 53 |     const at::Tensor& lengths1,
 54 |     const at::Tensor& lengths2,
 55 |     const int norm,
 56 |     const int K,
 57 |     const int version);
 58 | 
 59 | // Implementation which is exposed.
 60 | std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdx(
 61 |     const at::Tensor& p1,
 62 |     const at::Tensor& p2,
 63 |     const at::Tensor& lengths1,
 64 |     const at::Tensor& lengths2,
 65 |     const int norm,
 66 |     const int K,
 67 |     const int version) {
 68 |   if (p1.is_cuda() || p2.is_cuda()) {
 69 | #ifdef WITH_CUDA
 70 |     CHECK_CUDA(p1);
 71 |     CHECK_CUDA(p2);
 72 |     return KNearestNeighborIdxCuda(
 73 |         p1, p2, lengths1, lengths2, norm, K, version);
 74 | #else
 75 |     AT_ERROR("Not compiled with GPU support.");
 76 | #endif
 77 |   }
 78 |   return KNearestNeighborIdxCpu(p1, p2, lengths1, lengths2, norm, K);
 79 | }
 80 | 
 81 | // Compute gradients with respect to p1 and p2
 82 | //
 83 | // Args:
 84 | //    p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each
 85 | //        containing P1 points of dimension D.
 86 | //    p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each
 87 | //        containing P2 points of dimension D.
 88 | //    lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud.
 89 | //    lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud.
 90 | //    p1_neighbor_idx: LongTensor of shape (N, P1, K), where
 91 | //        p1_neighbor_idx[n, i, k] = j means that the kth nearest
 92 | //        neighbor to p1[n, i] in the cloud p2[n] is p2[n, j].
 93 | //        It is padded with zeros so that it can be used easily in a later
 94 | //        gather() operation. This is computed from the forward pass.
 95 | //    norm: int specifying the norm for the distance (1 for L1, 2 for L2)
 96 | //    grad_dists: FLoatTensor of shape (N, P1, K) which contains the input
 97 | //        gradients.
 98 | //
 99 | // Returns:
100 | //    grad_p1: FloatTensor of shape (N, P1, D) containing the output gradients
101 | //        wrt p1.
102 | //    grad_p2: FloatTensor of shape (N, P2, D) containing the output gradients
103 | //        wrt p2.
104 | 
105 | // CPU implementation.
106 | std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackwardCpu(
107 |     const at::Tensor& p1,
108 |     const at::Tensor& p2,
109 |     const at::Tensor& lengths1,
110 |     const at::Tensor& lengths2,
111 |     const at::Tensor& idxs,
112 |     const int norm,
113 |     const at::Tensor& grad_dists);
114 | 
115 | // CUDA implementation
116 | std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackwardCuda(
117 |     const at::Tensor& p1,
118 |     const at::Tensor& p2,
119 |     const at::Tensor& lengths1,
120 |     const at::Tensor& lengths2,
121 |     const at::Tensor& idxs,
122 |     const int norm,
123 |     const at::Tensor& grad_dists);
124 | 
125 | // Implementation which is exposed.
126 | std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackward(
127 |     const at::Tensor& p1,
128 |     const at::Tensor& p2,
129 |     const at::Tensor& lengths1,
130 |     const at::Tensor& lengths2,
131 |     const at::Tensor& idxs,
132 |     const int norm,
133 |     const at::Tensor& grad_dists) {
134 |   if (p1.is_cuda() || p2.is_cuda()) {
135 | #ifdef WITH_CUDA
136 |     CHECK_CUDA(p1);
137 |     CHECK_CUDA(p2);
138 |     return KNearestNeighborBackwardCuda(
139 |         p1, p2, lengths1, lengths2, idxs, norm, grad_dists);
140 | #else
141 |     AT_ERROR("Not compiled with GPU support.");
142 | #endif
143 |   }
144 |   return KNearestNeighborBackwardCpu(
145 |       p1, p2, lengths1, lengths2, idxs, norm, grad_dists);
146 | }
147 | 
148 | // Utility to check whether a KNN version can be used.
149 | //
150 | // Args:
151 | //    version: Integer in the range 0 <= version <= 3 indicating one of our
152 | //        KNN implementations.
153 | //    D: Number of dimensions for the input and query point clouds
154 | //    K: Number of neighbors to be found
155 | //
156 | // Returns:
157 | //    Whether the indicated KNN version can be used.
158 | bool KnnCheckVersion(int version, const int64_t D, const int64_t K);
159 | 
160 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
161 |   m.def("knn_points_idx", &KNearestNeighborIdx);
162 |   m.def("knn_points_backward", &KNearestNeighborBackward);
163 | }
164 | 


--------------------------------------------------------------------------------
/third_parties/pytorch3d/cuda/knn_cpu.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  * All rights reserved.
  4 |  *
  5 |  * This source code is licensed under the BSD-style license found in the
  6 |  * LICENSE file in the root directory of this source tree.
  7 |  */
  8 | 
  9 | #include <torch/extension.h>
 10 | #include <queue>
 11 | #include <tuple>
 12 | 
 13 | std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCpu(
 14 |     const at::Tensor& p1,
 15 |     const at::Tensor& p2,
 16 |     const at::Tensor& lengths1,
 17 |     const at::Tensor& lengths2,
 18 |     const int norm,
 19 |     const int K) {
 20 |   const int N = p1.size(0);
 21 |   const int P1 = p1.size(1);
 22 |   const int D = p1.size(2);
 23 | 
 24 |   auto long_opts = lengths1.options().dtype(torch::kInt64);
 25 |   torch::Tensor idxs = torch::full({N, P1, K}, 0, long_opts);
 26 |   torch::Tensor dists = torch::full({N, P1, K}, 0, p1.options());
 27 | 
 28 |   auto p1_a = p1.accessor<float, 3>();
 29 |   auto p2_a = p2.accessor<float, 3>();
 30 |   auto lengths1_a = lengths1.accessor<int64_t, 1>();
 31 |   auto lengths2_a = lengths2.accessor<int64_t, 1>();
 32 |   auto idxs_a = idxs.accessor<int64_t, 3>();
 33 |   auto dists_a = dists.accessor<float, 3>();
 34 | 
 35 |   for (int n = 0; n < N; ++n) {
 36 |     const int64_t length1 = lengths1_a[n];
 37 |     const int64_t length2 = lengths2_a[n];
 38 |     for (int64_t i1 = 0; i1 < length1; ++i1) {
 39 |       // Use a priority queue to store (distance, index) tuples.
 40 |       std::priority_queue<std::tuple<float, int>> q;
 41 |       for (int64_t i2 = 0; i2 < length2; ++i2) {
 42 |         float dist = 0;
 43 |         for (int d = 0; d < D; ++d) {
 44 |           float diff = p1_a[n][i1][d] - p2_a[n][i2][d];
 45 |           if (norm == 1) {
 46 |             dist += abs(diff);
 47 |           } else { // norm is 2 (default)
 48 |             dist += diff * diff;
 49 |           }
 50 |         }
 51 |         int size = static_cast<int>(q.size());
 52 |         if (size < K || dist < std::get<0>(q.top())) {
 53 |           q.emplace(dist, i2);
 54 |           if (size >= K) {
 55 |             q.pop();
 56 |           }
 57 |         }
 58 |       }
 59 |       while (!q.empty()) {
 60 |         auto t = q.top();
 61 |         q.pop();
 62 |         const int k = q.size();
 63 |         dists_a[n][i1][k] = std::get<0>(t);
 64 |         idxs_a[n][i1][k] = std::get<1>(t);
 65 |       }
 66 |     }
 67 |   }
 68 |   return std::make_tuple(idxs, dists);
 69 | }
 70 | 
 71 | // ------------------------------------------------------------- //
 72 | //                   Backward Operators                          //
 73 | // ------------------------------------------------------------- //
 74 | 
 75 | std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackwardCpu(
 76 |     const at::Tensor& p1,
 77 |     const at::Tensor& p2,
 78 |     const at::Tensor& lengths1,
 79 |     const at::Tensor& lengths2,
 80 |     const at::Tensor& idxs,
 81 |     const int norm,
 82 |     const at::Tensor& grad_dists) {
 83 |   const int N = p1.size(0);
 84 |   const int P1 = p1.size(1);
 85 |   const int D = p1.size(2);
 86 |   const int P2 = p2.size(1);
 87 |   const int K = idxs.size(2);
 88 | 
 89 |   torch::Tensor grad_p1 = torch::full({N, P1, D}, 0, p1.options());
 90 |   torch::Tensor grad_p2 = torch::full({N, P2, D}, 0, p2.options());
 91 | 
 92 |   auto p1_a = p1.accessor<float, 3>();
 93 |   auto p2_a = p2.accessor<float, 3>();
 94 |   auto lengths1_a = lengths1.accessor<int64_t, 1>();
 95 |   auto lengths2_a = lengths2.accessor<int64_t, 1>();
 96 |   auto idxs_a = idxs.accessor<int64_t, 3>();
 97 |   auto grad_dists_a = grad_dists.accessor<float, 3>();
 98 |   auto grad_p1_a = grad_p1.accessor<float, 3>();
 99 |   auto grad_p2_a = grad_p2.accessor<float, 3>();
100 | 
101 |   for (int n = 0; n < N; ++n) {
102 |     const int64_t length1 = lengths1_a[n];
103 |     int64_t length2 = lengths2_a[n];
104 |     length2 = (length2 < K) ? length2 : K;
105 |     for (int64_t i1 = 0; i1 < length1; ++i1) {
106 |       for (int64_t k = 0; k < length2; ++k) {
107 |         const int64_t i2 = idxs_a[n][i1][k];
108 |         // If the index is the pad value of -1 then ignore it
109 |         if (i2 == -1) {
110 |           continue;
111 |         }
112 |         for (int64_t d = 0; d < D; ++d) {
113 |           float diff = 0.0;
114 |           if (norm == 1) {
115 |             float sign = (p1_a[n][i1][d] > p2_a[n][i2][d]) ? 1.0 : -1.0;
116 |             diff = grad_dists_a[n][i1][k] * sign;
117 |           } else { // norm is 2 (default)
118 |             diff = 2.0f * grad_dists_a[n][i1][k] *
119 |                 (p1_a[n][i1][d] - p2_a[n][i2][d]);
120 |           }
121 |           grad_p1_a[n][i1][d] += diff;
122 |           grad_p2_a[n][i2][d] += -1.0f * diff;
123 |         }
124 |       }
125 |     }
126 |   }
127 |   return std::make_tuple(grad_p1, grad_p2);
128 | }
129 | 


--------------------------------------------------------------------------------
/third_parties/pytorch3d/cuda/utils/index_utils.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  * All rights reserved.
  4 |  *
  5 |  * This source code is licensed under the BSD-style license found in the
  6 |  * LICENSE file in the root directory of this source tree.
  7 |  */
  8 | 
  9 | // This converts dynamic array lookups into static array lookups, for small
 10 | // arrays up to size 32.
 11 | //
 12 | // Suppose we have a small thread-local array:
 13 | //
 14 | // float vals[10];
 15 | //
 16 | // Ideally we should only index this array using static indices:
 17 | //
 18 | // for (int i = 0; i < 10; ++i) vals[i] = i * i;
 19 | //
 20 | // If we do so, then the CUDA compiler may be able to place the array into
 21 | // registers, which can have a big performance improvement. However if we
 22 | // access the array dynamically, the the compiler may force the array into
 23 | // local memory, which has the same latency as global memory.
 24 | //
 25 | // These functions convert dynamic array access into static array access
 26 | // using a brute-force lookup table. It can be used like this:
 27 | //
 28 | // float vals[10];
 29 | // int idx = 3;
 30 | // float val = 3.14f;
 31 | // RegisterIndexUtils<float, 10>::set(vals, idx, val);
 32 | // float val2 = RegisterIndexUtils<float, 10>::get(vals, idx);
 33 | //
 34 | // The implementation is based on fbcuda/RegisterUtils.cuh:
 35 | // https://github.com/facebook/fbcuda/blob/master/RegisterUtils.cuh
 36 | // To avoid depending on the entire library, we just reimplement these two
 37 | // functions. The fbcuda implementation is a bit more sophisticated, and uses
 38 | // the preprocessor to generate switch statements that go up to N for each
 39 | // value of N. We are lazy and just have a giant explicit switch statement.
 40 | //
 41 | // We might be able to use a template metaprogramming approach similar to
 42 | // DispatchKernel1D for this. However DispatchKernel1D is intended to be used
 43 | // for dispatching to the correct CUDA kernel on the host, while this is
 44 | // is intended to run on the device. I was concerned that a metaprogramming
 45 | // approach for this might lead to extra function calls at runtime if the
 46 | // compiler fails to optimize them away, which could be very slow on device.
 47 | // However I didn't actually benchmark or test this.
 48 | template <typename T, int N>
 49 | struct RegisterIndexUtils {
 50 |   __device__ __forceinline__ static T get(const T arr[N], int idx) {
 51 |     if (idx < 0 || idx >= N)
 52 |       return T();
 53 |     switch (idx) {
 54 |       case 0:
 55 |         return arr[0];
 56 |       case 1:
 57 |         return arr[1];
 58 |       case 2:
 59 |         return arr[2];
 60 |       case 3:
 61 |         return arr[3];
 62 |       case 4:
 63 |         return arr[4];
 64 |       case 5:
 65 |         return arr[5];
 66 |       case 6:
 67 |         return arr[6];
 68 |       case 7:
 69 |         return arr[7];
 70 |       case 8:
 71 |         return arr[8];
 72 |       case 9:
 73 |         return arr[9];
 74 |       case 10:
 75 |         return arr[10];
 76 |       case 11:
 77 |         return arr[11];
 78 |       case 12:
 79 |         return arr[12];
 80 |       case 13:
 81 |         return arr[13];
 82 |       case 14:
 83 |         return arr[14];
 84 |       case 15:
 85 |         return arr[15];
 86 |       case 16:
 87 |         return arr[16];
 88 |       case 17:
 89 |         return arr[17];
 90 |       case 18:
 91 |         return arr[18];
 92 |       case 19:
 93 |         return arr[19];
 94 |       case 20:
 95 |         return arr[20];
 96 |       case 21:
 97 |         return arr[21];
 98 |       case 22:
 99 |         return arr[22];
100 |       case 23:
101 |         return arr[23];
102 |       case 24:
103 |         return arr[24];
104 |       case 25:
105 |         return arr[25];
106 |       case 26:
107 |         return arr[26];
108 |       case 27:
109 |         return arr[27];
110 |       case 28:
111 |         return arr[28];
112 |       case 29:
113 |         return arr[29];
114 |       case 30:
115 |         return arr[30];
116 |       case 31:
117 |         return arr[31];
118 |     };
119 |     return T();
120 |   }
121 | 
122 |   __device__ __forceinline__ static void set(T arr[N], int idx, T val) {
123 |     if (idx < 0 || idx >= N)
124 |       return;
125 |     switch (idx) {
126 |       case 0:
127 |         arr[0] = val;
128 |         break;
129 |       case 1:
130 |         arr[1] = val;
131 |         break;
132 |       case 2:
133 |         arr[2] = val;
134 |         break;
135 |       case 3:
136 |         arr[3] = val;
137 |         break;
138 |       case 4:
139 |         arr[4] = val;
140 |         break;
141 |       case 5:
142 |         arr[5] = val;
143 |         break;
144 |       case 6:
145 |         arr[6] = val;
146 |         break;
147 |       case 7:
148 |         arr[7] = val;
149 |         break;
150 |       case 8:
151 |         arr[8] = val;
152 |         break;
153 |       case 9:
154 |         arr[9] = val;
155 |         break;
156 |       case 10:
157 |         arr[10] = val;
158 |         break;
159 |       case 11:
160 |         arr[11] = val;
161 |         break;
162 |       case 12:
163 |         arr[12] = val;
164 |         break;
165 |       case 13:
166 |         arr[13] = val;
167 |         break;
168 |       case 14:
169 |         arr[14] = val;
170 |         break;
171 |       case 15:
172 |         arr[15] = val;
173 |         break;
174 |       case 16:
175 |         arr[16] = val;
176 |         break;
177 |       case 17:
178 |         arr[17] = val;
179 |         break;
180 |       case 18:
181 |         arr[18] = val;
182 |         break;
183 |       case 19:
184 |         arr[19] = val;
185 |         break;
186 |       case 20:
187 |         arr[20] = val;
188 |         break;
189 |       case 21:
190 |         arr[21] = val;
191 |         break;
192 |       case 22:
193 |         arr[22] = val;
194 |         break;
195 |       case 23:
196 |         arr[23] = val;
197 |         break;
198 |       case 24:
199 |         arr[24] = val;
200 |         break;
201 |       case 25:
202 |         arr[25] = val;
203 |         break;
204 |       case 26:
205 |         arr[26] = val;
206 |         break;
207 |       case 27:
208 |         arr[27] = val;
209 |         break;
210 |       case 28:
211 |         arr[28] = val;
212 |         break;
213 |       case 29:
214 |         arr[29] = val;
215 |         break;
216 |       case 30:
217 |         arr[30] = val;
218 |         break;
219 |       case 31:
220 |         arr[31] = val;
221 |         break;
222 |     }
223 |   }
224 | };
225 | 


--------------------------------------------------------------------------------
/third_parties/pytorch3d/cuda/utils/mink.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  * All rights reserved.
  4 |  *
  5 |  * This source code is licensed under the BSD-style license found in the
  6 |  * LICENSE file in the root directory of this source tree.
  7 |  */
  8 | 
  9 | #pragma once
 10 | #define MINK_H
 11 | 
 12 | #include "index_utils.cuh"
 13 | 
 14 | // A data structure to keep track of the smallest K keys seen so far as well
 15 | // as their associated values, intended to be used in device code.
 16 | // This data structure doesn't allocate any memory; keys and values are stored
 17 | // in arrays passed to the constructor.
 18 | //
 19 | // The implementation is generic; it can be used for any key type that supports
 20 | // the < operator, and can be used with any value type.
 21 | //
 22 | // Example usage:
 23 | //
 24 | // float keys[K];
 25 | // int values[K];
 26 | // MinK<float, int> mink(keys, values, K);
 27 | // for (...) {
 28 | //   // Produce some key and value from somewhere
 29 | //   mink.add(key, value);
 30 | // }
 31 | // mink.sort();
 32 | //
 33 | // Now keys and values store the smallest K keys seen so far and the values
 34 | // associated to these keys:
 35 | //
 36 | // for (int k = 0; k < K; ++k) {
 37 | //   float key_k = keys[k];
 38 | //   int value_k = values[k];
 39 | // }
 40 | template <typename key_t, typename value_t>
 41 | class MinK {
 42 |  public:
 43 |   // Constructor.
 44 |   //
 45 |   // Arguments:
 46 |   //   keys: Array in which to store keys
 47 |   //   values: Array in which to store values
 48 |   //   K: How many values to keep track of
 49 |   __device__ MinK(key_t* keys, value_t* vals, int K)
 50 |       : keys(keys), vals(vals), K(K), _size(0) {}
 51 | 
 52 |   // Try to add a new key and associated value to the data structure. If the key
 53 |   // is one of the smallest K seen so far then it will be kept; otherwise it
 54 |   // it will not be kept.
 55 |   //
 56 |   // This takes O(1) operations if the new key is not kept, or if the structure
 57 |   // currently contains fewer than K elements. Otherwise this takes O(K) time.
 58 |   //
 59 |   // Arguments:
 60 |   //   key: The key to add
 61 |   //   val: The value associated to the key
 62 |   __device__ __forceinline__ void add(const key_t& key, const value_t& val) {
 63 |     if (_size < K) {
 64 |       keys[_size] = key;
 65 |       vals[_size] = val;
 66 |       if (_size == 0 || key > max_key) {
 67 |         max_key = key;
 68 |         max_idx = _size;
 69 |       }
 70 |       _size++;
 71 |     } else if (key < max_key) {
 72 |       keys[max_idx] = key;
 73 |       vals[max_idx] = val;
 74 |       max_key = key;
 75 |       for (int k = 0; k < K; ++k) {
 76 |         key_t cur_key = keys[k];
 77 |         if (cur_key > max_key) {
 78 |           max_key = cur_key;
 79 |           max_idx = k;
 80 |         }
 81 |       }
 82 |     }
 83 |   }
 84 | 
 85 |   // Get the number of items currently stored in the structure.
 86 |   // This takes O(1) time.
 87 |   __device__ __forceinline__ int size() {
 88 |     return _size;
 89 |   }
 90 | 
 91 |   // Sort the items stored in the structure using bubble sort.
 92 |   // This takes O(K^2) time.
 93 |   __device__ __forceinline__ void sort() {
 94 |     for (int i = 0; i < _size - 1; ++i) {
 95 |       for (int j = 0; j < _size - i - 1; ++j) {
 96 |         if (keys[j + 1] < keys[j]) {
 97 |           key_t key = keys[j];
 98 |           value_t val = vals[j];
 99 |           keys[j] = keys[j + 1];
100 |           vals[j] = vals[j + 1];
101 |           keys[j + 1] = key;
102 |           vals[j + 1] = val;
103 |         }
104 |       }
105 |     }
106 |   }
107 | 
108 |  private:
109 |   key_t* keys;
110 |   value_t* vals;
111 |   int K;
112 |   int _size;
113 |   key_t max_key;
114 |   int max_idx;
115 | };
116 | 
117 | // This is a version of MinK that only touches the arrays using static indexing
118 | // via RegisterIndexUtils. If the keys and values are stored in thread-local
119 | // arrays, then this may allow the compiler to place them in registers for
120 | // fast access.
121 | //
122 | // This has the same API as RegisterMinK, but doesn't support sorting.
123 | // We found that sorting via RegisterIndexUtils gave very poor performance,
124 | // and suspect it may have prevented the compiler from placing the arrays
125 | // into registers.
126 | template <typename key_t, typename value_t, int K>
127 | class RegisterMinK {
128 |  public:
129 |   __device__ RegisterMinK(key_t* keys, value_t* vals)
130 |       : keys(keys), vals(vals), _size(0) {}
131 | 
132 |   __device__ __forceinline__ void add(const key_t& key, const value_t& val) {
133 |     if (_size < K) {
134 |       RegisterIndexUtils<key_t, K>::set(keys, _size, key);
135 |       RegisterIndexUtils<value_t, K>::set(vals, _size, val);
136 |       if (_size == 0 || key > max_key) {
137 |         max_key = key;
138 |         max_idx = _size;
139 |       }
140 |       _size++;
141 |     } else if (key < max_key) {
142 |       RegisterIndexUtils<key_t, K>::set(keys, max_idx, key);
143 |       RegisterIndexUtils<value_t, K>::set(vals, max_idx, val);
144 |       max_key = key;
145 |       for (int k = 0; k < K; ++k) {
146 |         key_t cur_key = RegisterIndexUtils<key_t, K>::get(keys, k);
147 |         if (cur_key > max_key) {
148 |           max_key = cur_key;
149 |           max_idx = k;
150 |         }
151 |       }
152 |     }
153 |   }
154 | 
155 |   __device__ __forceinline__ int size() {
156 |     return _size;
157 |   }
158 | 
159 |  private:
160 |   key_t* keys;
161 |   value_t* vals;
162 |   int _size;
163 |   key_t max_key;
164 |   int max_idx;
165 | };
166 | 


--------------------------------------------------------------------------------
/third_parties/pytorch3d/cuda/utils/pytorch3d_cutils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  * All rights reserved.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #pragma once
10 | #include <torch/extension.h>
11 | 
12 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor.")
13 | #define CHECK_CONTIGUOUS(x) \
14 |   TORCH_CHECK(x.is_contiguous(), #x " must be contiguous.")
15 | #define CHECK_CONTIGUOUS_CUDA(x) \
16 |   CHECK_CUDA(x);                 \
17 |   CHECK_CONTIGUOUS(x)
18 | 


--------------------------------------------------------------------------------