├── .gitignore
├── LICENSE
├── NeuralJacobianFields
    ├── MeshProcessor.py
    ├── PoissonSystem.py
    └── SourceMesh.py
├── README.md
├── example_config.yml
├── images
    ├── bad_orientation.png
    ├── good_orientation.png
    ├── hand_to_octopus.gif
    ├── planck_to_einstein.gif
    └── spot_to_giraffe.gif
├── loop.py
├── main.py
├── meshes
    ├── alien.obj
    ├── bird.obj
    ├── chair1.obj
    ├── chair2.obj
    ├── donkey.obj
    ├── eiffel_tower.obj
    ├── fish.obj
    ├── guitar.obj
    ├── hand.obj
    ├── max_planck.obj
    ├── shoe1.obj
    ├── shoe2.obj
    ├── spot.mtl
    ├── spot.obj
    ├── vase1.obj
    ├── vase2.obj
    └── vase3.obj
├── nvdiffmodeling
    ├── LICENSE.txt
    └── src
    │   ├── material.py
    │   ├── mesh.py
    │   ├── obj.py
    │   ├── regularizer.py
    │   ├── render.py
    │   ├── renderutils
    │       ├── __init__.py
    │       ├── bsdf.py
    │       ├── c_src
    │       │   ├── bsdf.cu
    │       │   ├── bsdf.h
    │       │   ├── common.cpp
    │       │   ├── common.h
    │       │   ├── loss.cu
    │       │   ├── loss.h
    │       │   ├── mesh.cu
    │       │   ├── mesh.h
    │       │   ├── normal.cu
    │       │   ├── normal.h
    │       │   ├── tensor.h
    │       │   ├── torch_bindings.cpp
    │       │   ├── vec3f.h
    │       │   └── vec4f.h
    │       ├── loss.py
    │       ├── ops.py
    │       └── tests
    │       │   ├── test_bsdf.py
    │       │   ├── test_loss.py
    │       │   ├── test_mesh.py
    │       │   └── test_perf.py
    │   ├── texture.py
    │   └── util.py
├── requirements.txt
└── utilities
    ├── camera.py
    ├── clip_spatial.py
    ├── helpers.py
    ├── resize_right.py
    └── video.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | outputs/
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 97 | __pypackages__/
 98 | 
 99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 AnonGit11
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NeuralJacobianFields/SourceMesh.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy
  4 | import torch
  5 | import igl
  6 | from . import MeshProcessor
  7 | WKS_DIM = MeshProcessor.WKS_DIM
  8 | WKS_FACTOR = 1000
  9 | import numpy as np
 10 | import sys
 11 | import random
 12 | import time
 13 | class SourceMesh:
 14 |     '''
 15 |     datastructure for the source mesh to be mapped
 16 |     '''
 17 | 
 18 |     def __init__(self, source_ind, source_dir, extra_source_fields,
 19 |                  random_scale, ttype, use_wks=False, random_centering=False,
 20 |                 cpuonly=False):
 21 |         self.__use_wks = use_wks
 22 |         self.source_ind = source_ind
 23 |         self.source_dir = source_dir
 24 |         self.centroids_and_normals = None
 25 |         self.center_source = True
 26 |         self.poisson = None
 27 |         self.__source_global_translation_to_original = 0
 28 |         self.__extra_keys = extra_source_fields
 29 |         self.__loaded_data = {}
 30 |         self.__ttype = ttype
 31 |         self.__random_scale = random_scale
 32 |         self.random_centering = random_centering
 33 |         self.source_mesh_centroid = None
 34 |         self.mesh_processor = None
 35 |         self.cpuonly = cpuonly
 36 | 
 37 |     def get_vertices(self):
 38 |         return self.source_vertices
 39 | 
 40 |     def get_global_translation_to_original(self):
 41 |         return self.__source_global_translation_to_original
 42 | 
 43 |     def vertices_from_jacobians(self, d):
 44 |         return self.poisson.solve_poisson(d)
 45 | 
 46 |     def jacobians_from_vertices(self, v):
 47 |         return self.poisson.jacobians_from_vertices(v)
 48 | 
 49 |     def restrict_jacobians(self, J):
 50 |         return self.poisson.restrict_jacobians(J)
 51 | 
 52 |     def get_loaded_data(self, key: str):
 53 | 
 54 |         return self.__loaded_data.get(key)
 55 | 
 56 |     def get_source_triangles(self):
 57 |         # if self.__source_triangles is None:
 58 |         #     self.__source_triangles = np.load(os.path.join(self.source_dir, 'faces.npy'))
 59 |         return self.mesh_processor.get_faces()
 60 | 
 61 |     def to(self, device):
 62 |         self.poisson = self.poisson.to(device)
 63 |         self.centroids_and_normals = self.centroids_and_normals.to(device)
 64 |         for key in self.__loaded_data.keys():
 65 |             self.__loaded_data[key] = self.__loaded_data[key].to(device)
 66 |         return self
 67 | 
 68 |     def __init_from_mesh_data(self):
 69 |         assert self.mesh_processor is not None
 70 |         self.mesh_processor.prepare_differential_operators_for_use(self.__ttype) #call 1
 71 |         self.source_vertices = torch.from_numpy(self.mesh_processor.get_vertices()).type(
 72 |             self.__ttype)
 73 |         if self.__random_scale != 1:
 74 |             print("Diff ops and WKS need to be multiplied accordingly. Not implemented for now")
 75 |             sys.exit()
 76 |         self.source_vertices *= self.__random_scale
 77 | 
 78 |         bb = igl.bounding_box(self.source_vertices.numpy())[0]
 79 |         diag = igl.bounding_box_diagonal(self.source_vertices.numpy())
 80 | 
 81 |         # self.source_mesh_centroid = torch.mean(self.source_vertices, axis=0)
 82 |         self.source_mesh_centroid =  (bb[0] + bb[-1])/2
 83 |         if self.random_centering:
 84 |             # centering augmentation
 85 |             self.source_mesh_centroid =  self.source_mesh_centroid + [(2*random.random() - 1)*diag*0.2, (2*random.random() - 1)*diag*0.2, (2*random.random() - 1)*diag*0.2]
 86 |         # self.source_mesh_centroid =  (bb[0] + bb[-1])/2 - np.array([-0.00033245, -0.2910367 ,  0.02100835])
 87 | 
 88 |         # Load input to NJF MLP
 89 |         # start = time.time()
 90 |         centroids = self.mesh_processor.get_centroids()
 91 |         centroid_points_and_normals = centroids.points_and_normals
 92 |         if self.__use_wks:
 93 |             wks = WKS_FACTOR * centroids.wks
 94 |             centroid_points_and_normals = numpy.hstack((centroid_points_and_normals, wks))
 95 |         self.centroids_and_normals = torch.from_numpy(
 96 |             centroid_points_and_normals).type(self.__ttype)
 97 |         if self.center_source:
 98 |             c = self.source_mesh_centroid
 99 |             self.centroids_and_normals[:, 0:3] -= c
100 |             self.source_vertices -= c
101 |             self.__source_global_translation_to_original = c
102 |         self.poisson = self.mesh_processor.diff_ops.poisson_solver
103 | 
104 | 
105 |         # Essentially here we load pointnet data and apply the same preprocessing
106 |         for key in self.__extra_keys:
107 |             data = self.mesh_processor.get_data(key)
108 |             # if data is None:  # not found in mesh data so try loading from disk
109 |             #     data = np.load(os.path.join(self.source_dir, key + ".npy"))
110 |             data = torch.from_numpy(data)
111 |             if key == 'samples':
112 |                 if self.center_source:
113 |                     data -= self.get_mesh_centroid()
114 |                 scale = self.__random_scale
115 |                 data *= scale
116 |             data = data.unsqueeze(0).type(self.__ttype)
117 |             
118 |             self.__loaded_data[key] = data
119 |         # print("Ellapsed load source mesh ", time.time() - start)
120 | 
121 |     def load(self, source_v=None, source_f=None):
122 |         # mesh_data = SourceMeshData.SourceMeshData.meshprocessor_from_file(self.source_dir)
123 |         if source_v is not None and source_f is not None:
124 |             self.mesh_processor = MeshProcessor.MeshProcessor.meshprocessor_from_array(source_v,source_f, self.source_dir, self.__ttype, cpuonly=self.cpuonly, load_wks_samples=self.__use_wks, load_wks_centroids=self.__use_wks)
125 |         else:
126 |             if os.path.isdir(self.source_dir):
127 |                 self.mesh_processor = MeshProcessor.MeshProcessor.meshprocessor_from_directory(self.source_dir, self.__ttype, cpuonly=self.cpuonly, load_wks_samples=self.__use_wks, load_wks_centroids=self.__use_wks)
128 |             else:
129 |                 self.mesh_processor = MeshProcessor.MeshProcessor.meshprocessor_from_file(self.source_dir, self.__ttype, cpuonly=self.cpuonly, load_wks_samples=self.__use_wks, load_wks_centroids=self.__use_wks)
130 |         self.__init_from_mesh_data()
131 | 
132 |     def get_point_dim(self):
133 |         return self.centroids_and_normals.shape[1]
134 | 
135 |     def get_centroids_and_normals(self):
136 |         return self.centroids_and_normals
137 | 
138 |     def get_mesh_centroid(self):
139 |         return self.source_mesh_centroid
140 | 
141 |     def pin_memory(self):
142 |         # self.poisson.pin_memory()
143 |         # self.centroids_and_normals.pin_memory()
144 |         # self.source_vertices.pin_memory()
145 |         # for key in self.__loaded_data.keys():
146 |         #     self.__loaded_data[key].pin_memory()
147 |         return self
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TextDeformer [[Project Page](https://threedle.github.io/TextDeformer/)]
 2 | [![arXiv](https://img.shields.io/badge/arXiv-TextDeformer-b31b1b.svg)](https://arxiv.org/abs/2304.13348)
 3 | 
 4 | ![alt](images/planck_to_einstein.gif)
 5 | ## Installation
 6 | 
 7 |     conda create -y -n TextDeformer python=3.9
 8 |     conda activate TextDeformer
 9 |     pip3 install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
10 |     conda install -y -c conda-forge igl
11 |     pip install -r requirements.txt
12 | 
13 | ## Usage
14 | **NOTE:** This repository **requires** a GPU to run.
15 | 
16 | ### Run examples
17 | ``main.py`` is the primary script to use. You may pass arguments using the ``--config`` flag, which takes the path to a ``.yml`` file. See ``example_config.yml`` for an example. Alternatively, you may pass command line arguments manually, which override the arguments provided by the config file. Below, we provide example usage:
18 |     
19 |     # Use all arguments provided by the example config
20 |     python main.py --config example_config.yml
21 | 
22 |     # Change the optimized mesh to hand.obj, change the base and target text prompts
23 |     python main.py --config example_config.yml --mesh meshes/hand.obj --text_prompt 'an octupus' --base_text_prompt 'a hand'
24 | 
25 |     # Now, increase the batch size, learning rate, and the training resolution
26 |     python main.py --config example_config.yml --mesh meshes/hand.obj --text_prompt 'an octopus' \
27 |     --base_text_prompt 'a hand' --batch_size 50 --lr 0.005 --train_res 1024
28 | 
29 | ### Outputs
30 | The outputs will be saved to the path specified in the run configuration, which is ``./outputs`` by default. The output folder will contain several folders: ``images`` contains intermittently saved samples of the rendered images passed to CLIP, ``logs`` will contain tensorboard logs of the optimization process, ``mesh_best_clip``, ``mesh_best_total``, and ``mesh_final`` contain the optimized meshes at the best CLIP score, the best total loss, and the final epoch. The configuration file is also saved at ``config.yml`` and a video of the optimization process is saved at ``video_log.mp4``. 
31 | 
32 | ### Common bugs
33 | #### Mesh Orientation
34 | Due to how the camera angles are sampled, the input mesh may need to be re-oriented to ensure proper performance. You can check your mesh orientation by checking ``images/epoch_0.png``. 
35 | ![alt](images/bad_orientation.png)
36 | 
37 | *The Eiffel tower mesh is oriented poorly and CLIP will see the mesh rendered from strange viewpoints*
38 | 
39 | ![alt](images/good_orientation.png)
40 | 
41 | *Here, we re-orient the mesh so that CLIP will see the side profile of the tower*
42 | 
43 | #### Jacobian temp files
44 | The ``NeuralJacobianFields`` code in this repository will create several temporary files in ``outputs/tmp``. Note that if these temporary files already exist, this code will attempt to read the existing files instead of overwriting them. This may cause issues if you run multiple examples with the same output path, intending to overwrite the output folder.
45 | 
46 | ## Citation
47 | ```
48 | @InProceedings{Gao_2023_SIGGRAPH,
49 |     author    = {Gao, William and Aigerman, Noam and Groueix Thibault and Kim, Vladimir and Hanocka, Rana},
50 |     title     = {TextDeformer: Geometry Manipulation using Text Guidance},
51 |     booktitle = {ACM Transactions on Graphics (SIGGRAPH)},
52 |     year      = {2023},
53 | }


--------------------------------------------------------------------------------
/example_config.yml:
--------------------------------------------------------------------------------
 1 | output_path: ./outputs
 2 | gpu: 0
 3 | seed: 99
 4 | 
 5 | # CLIP-related
 6 | text_prompt: a giraffe
 7 | base_text_prompt: a cow
 8 | clip_model: ViT-B/32
 9 | consistency_clip_model: ViT-B/32
10 | consistency_vit_stride: 8
11 | consistency_vit_layer: 11
12 | 
13 | # Mesh
14 | mesh: ./meshes/spot.obj
15 | retriangulate: 0
16 | 
17 | # Render settings
18 | bsdf: diffuse
19 | 
20 | # Hyper-parameters
21 | lr: 0.0025
22 | epochs: 2500
23 | clip_weight: 1.0
24 | delta_clip_weight: 1.0
25 | regularize_jacobians_weight: 0.5
26 | consistency_loss_weight: 0.5
27 | consistency_elev_filter: 30
28 | consistency_azim_filter: 20
29 | batch_size: 25
30 | train_res: 512
31 | resize_method: cubic
32 | 
33 | # Camera parameters
34 | fov_min: 30.0
35 | fov_max: 90.0
36 | dist_min: 2.5
37 | dist_max: 3.5
38 | light_power: 5.0
39 | elev_alpha: 1.0
40 | elev_beta: 5.0 
41 | elev_max: 60.0
42 | azim_min: 0.0
43 | azim_max: 360.0
44 | aug_loc: 1
45 | aug_light: 1
46 | aug_bkg: 1
47 | adapt_dist: 1
48 | 
49 | log_interval: 5
50 | log_interval_im: 150
51 | log_elev: 30.0
52 | log_fov: 60.0
53 | log_dist: 3.0
54 | log_res: 512
55 | log_light_power: 3.0


--------------------------------------------------------------------------------
/images/bad_orientation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/threedle/TextDeformer/f5302c66c0e5cbbd2fde3ad2aac2aec8f842924c/images/bad_orientation.png


--------------------------------------------------------------------------------
/images/good_orientation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/threedle/TextDeformer/f5302c66c0e5cbbd2fde3ad2aac2aec8f842924c/images/good_orientation.png


--------------------------------------------------------------------------------
/images/hand_to_octopus.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/threedle/TextDeformer/f5302c66c0e5cbbd2fde3ad2aac2aec8f842924c/images/hand_to_octopus.gif


--------------------------------------------------------------------------------
/images/planck_to_einstein.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/threedle/TextDeformer/f5302c66c0e5cbbd2fde3ad2aac2aec8f842924c/images/planck_to_einstein.gif


--------------------------------------------------------------------------------
/images/spot_to_giraffe.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/threedle/TextDeformer/f5302c66c0e5cbbd2fde3ad2aac2aec8f842924c/images/spot_to_giraffe.gif


--------------------------------------------------------------------------------
/loop.py:
--------------------------------------------------------------------------------
  1 | import clip
  2 | import kornia
  3 | import os
  4 | import pathlib
  5 | import pymeshlab
  6 | import shutil
  7 | import torch
  8 | import torchvision
  9 | import logging
 10 | import yaml
 11 | 
 12 | import numpy as np
 13 | import nvdiffrast.torch as dr
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | from easydict import EasyDict
 17 | 
 18 | from NeuralJacobianFields import SourceMesh
 19 | 
 20 | from nvdiffmodeling.src     import obj
 21 | from nvdiffmodeling.src     import util
 22 | from nvdiffmodeling.src     import mesh
 23 | from nvdiffmodeling.src     import render
 24 | from nvdiffmodeling.src     import texture
 25 | from nvdiffmodeling.src     import regularizer
 26 | 
 27 | from PIL import Image
 28 | from torch.utils.tensorboard import SummaryWriter
 29 | from tqdm import tqdm
 30 | 
 31 | from utilities.video import Video
 32 | from utilities.helpers import cosine_avg, create_scene, get_vp_map
 33 | from utilities.camera import CameraBatch, get_camera_params
 34 | from utilities.clip_spatial import CLIPVisualEncoder
 35 | from utilities.resize_right import resize, cubic, linear, lanczos2, lanczos3
 36 | 
 37 | def loop(cfg):
 38 |     output_path = pathlib.Path(cfg['output_path'])
 39 |     os.makedirs(output_path, exist_ok=True)
 40 |     with open(output_path / 'config.yml', 'w') as f:
 41 |         yaml.dump(cfg, f, default_flow_style=False)
 42 |     cfg = EasyDict(cfg)
 43 |     
 44 |     print(f'Output directory {cfg.output_path} created')
 45 | 
 46 |     device = torch.device(f'cuda:{cfg.gpu}')
 47 |     torch.cuda.set_device(device)
 48 | 
 49 |     print('Loading CLIP Models')
 50 |     model, _ = clip.load(cfg.clip_model, device=device)
 51 |     fe = CLIPVisualEncoder(cfg.consistency_clip_model, cfg.consistency_vit_stride, device)
 52 | 
 53 |     clip_mean = torch.tensor([0.48154660, 0.45782750, 0.40821073], device=device)
 54 |     clip_std  = torch.tensor([0.26862954, 0.26130258, 0.27577711], device=device)
 55 | 
 56 |     # output video
 57 |     video = Video(cfg.output_path)
 58 | 
 59 |     # GL Context
 60 |     glctx = dr.RasterizeGLContext()
 61 | 
 62 |     print(f'Target text prompt is {cfg.text_prompt}')
 63 |     print(f'Base text prompt is {cfg.base_text_prompt}')
 64 |     with torch.no_grad():
 65 |         text_embeds = clip.tokenize(cfg.text_prompt).to(device)
 66 |         base_text_embeds = clip.tokenize(cfg.base_text_prompt).to(device)
 67 |         text_embeds = model.encode_text(text_embeds).detach()
 68 |         target_text_embeds = text_embeds.clone() / text_embeds.norm(dim=1, keepdim=True)
 69 | 
 70 |         delta_text_embeds = text_embeds - model.encode_text(base_text_embeds)
 71 |         delta_text_embeds = delta_text_embeds / delta_text_embeds.norm(dim=1, keepdim=True)
 72 | 
 73 |     os.makedirs(output_path / 'tmp', exist_ok=True)
 74 |     ms = pymeshlab.MeshSet()
 75 |     ms.load_new_mesh(cfg.mesh)
 76 | 
 77 |     if cfg.retriangulate:
 78 |         print('Retriangulating shape')
 79 |         ms.meshing_isotropic_explicit_remeshing()
 80 |     
 81 |     if not ms.current_mesh().has_wedge_tex_coord():
 82 |         # some arbitrarily high number
 83 |         ms.compute_texcoord_parametrization_triangle_trivial_per_wedge(textdim=10000)
 84 |     
 85 |     ms.save_current_mesh(str(output_path / 'tmp' / 'mesh.obj'))
 86 | 
 87 |     load_mesh = obj.load_obj(str(output_path / 'tmp' / 'mesh.obj'))
 88 |     load_mesh = mesh.unit_size(load_mesh)
 89 | 
 90 |     ms.add_mesh(pymeshlab.Mesh(vertex_matrix=load_mesh.v_pos.cpu().numpy(), face_matrix=load_mesh.t_pos_idx.cpu().numpy()))
 91 |     ms.save_current_mesh(str(output_path / 'tmp' / 'mesh.obj'), save_vertex_color=False)
 92 | 
 93 |     # TODO: Need these for rendering even if we don't optimize textures
 94 |     texture_map = texture.create_trainable(np.random.uniform(size=[512]*2 + [3], low=0.0, high=1.0), [512]*2, True)
 95 |     normal_map = texture.create_trainable(np.array([0, 0, 1]), [512]*2, True)
 96 |     specular_map = texture.create_trainable(np.array([0, 0, 0]), [512]*2, True)
 97 | 
 98 |     load_mesh = mesh.Mesh(
 99 |         material={
100 |             'bsdf': cfg.bsdf,
101 |             'kd': texture_map,
102 |             'ks': specular_map,
103 |             'normal': normal_map,
104 |         },
105 |         base=load_mesh # Get UVs from original loaded mesh
106 |     )
107 | 
108 |     jacobian_source = SourceMesh.SourceMesh(0, str(output_path / 'tmp' / 'mesh.obj'), {}, 1, ttype=torch.float)
109 |     if len(list((output_path / 'tmp').glob('*.npz'))) > 0:
110 |         logging.warn(f'Using existing Jacobian .npz files in {str(output_path)}/tmp/ ! Please check if this is intentional.')
111 |     jacobian_source.load()
112 |     jacobian_source.to(device)
113 | 
114 |     with torch.no_grad():
115 |         gt_jacobians = jacobian_source.jacobians_from_vertices(load_mesh.v_pos.unsqueeze(0))
116 |     gt_jacobians.requires_grad_(True)
117 | 
118 |     optimizer = torch.optim.Adam([gt_jacobians], lr=cfg.lr)
119 |     cams_data = CameraBatch(
120 |         cfg.train_res,
121 |         [cfg.dist_min, cfg.dist_max],
122 |         [cfg.azim_min, cfg.azim_max],
123 |         [cfg.elev_alpha, cfg.elev_beta, cfg.elev_max],
124 |         [cfg.fov_min, cfg.fov_max],
125 |         cfg.aug_loc,
126 |         cfg.aug_light,
127 |         cfg.aug_bkg,
128 |         cfg.batch_size,
129 |         rand_solid=True
130 |     )
131 |     cams = torch.utils.data.DataLoader(cams_data, cfg.batch_size, num_workers=0, pin_memory=True)
132 |     best_losses = {'CLIP': np.inf, 'total': np.inf}
133 | 
134 |     for out_type in ['final', 'best_clip', 'best_total']:
135 |         os.makedirs(output_path / f'mesh_{out_type}', exist_ok=True)
136 |     os.makedirs(output_path / 'images', exist_ok=True)
137 |     logger = SummaryWriter(str(output_path / 'logs'))
138 | 
139 |     rot_ang = 0.0
140 |     t_loop = tqdm(range(cfg.epochs), leave=False)
141 | 
142 |     if cfg.resize_method == 'cubic':
143 |         resize_method = cubic
144 |     elif cfg.resize_method == 'linear':
145 |         resize_method = linear
146 |     elif cfg.resize_method == 'lanczos2':
147 |         resize_method = lanczos2
148 |     elif cfg.resize_method == 'lanczos3':
149 |         resize_method = lanczos3
150 | 
151 |     for it in t_loop:
152 | 
153 |         # updated vertices from jacobians
154 |         n_vert = jacobian_source.vertices_from_jacobians(gt_jacobians).squeeze()
155 | 
156 |         # TODO: More texture code required to make it work ...
157 |         ready_texture = texture.Texture2D(
158 |             kornia.filters.gaussian_blur2d(
159 |                 load_mesh.material['kd'].data.permute(0, 3, 1, 2),
160 |                 kernel_size=(7, 7),
161 |                 sigma=(3, 3),
162 |             ).permute(0, 2, 3, 1).contiguous()
163 |         )
164 | 
165 |         kd_notex = texture.Texture2D(torch.full_like(ready_texture.data, 0.5))
166 | 
167 |         ready_specular = texture.Texture2D(
168 |             kornia.filters.gaussian_blur2d(
169 |                 load_mesh.material['ks'].data.permute(0, 3, 1, 2),
170 |                 kernel_size=(7, 7),
171 |                 sigma=(3, 3),
172 |             ).permute(0, 2, 3, 1).contiguous()
173 |         )
174 | 
175 |         ready_normal = texture.Texture2D(
176 |             kornia.filters.gaussian_blur2d(
177 |                 load_mesh.material['normal'].data.permute(0, 3, 1, 2),
178 |                 kernel_size=(7, 7),
179 |                 sigma=(3, 3),
180 |             ).permute(0, 2, 3, 1).contiguous()
181 |         )
182 |             
183 |         # Final mesh
184 |         m = mesh.Mesh(
185 |             n_vert,
186 |             load_mesh.t_pos_idx,
187 |             material={
188 |                 'bsdf': cfg.bsdf,
189 |                 'kd': kd_notex,
190 |                 'ks': ready_specular,
191 |                 'normal': ready_normal,
192 |             },
193 |             base=load_mesh # gets uvs etc from here
194 |         )
195 | 
196 |         render_mesh = create_scene([m.eval()], sz=512)
197 |         if it == 0:
198 |             base_mesh = render_mesh.clone()
199 |             base_mesh = mesh.auto_normals(base_mesh)
200 |             base_mesh = mesh.compute_tangents(base_mesh)
201 |         render_mesh = mesh.auto_normals(render_mesh)
202 |         render_mesh = mesh.compute_tangents(render_mesh)
203 | 
204 |         # Logging mesh
205 |         if it % cfg.log_interval == 0:
206 |             with torch.no_grad():
207 |                 params = get_camera_params(
208 |                     cfg.log_elev,
209 |                     rot_ang,
210 |                     cfg.log_dist,
211 |                     cfg.log_res,
212 |                     cfg.log_fov,
213 |                 )
214 |                 rot_ang += 1
215 |                 log_mesh = mesh.unit_size(render_mesh.eval(params))
216 |                 log_image = render.render_mesh(
217 |                     glctx,
218 |                     log_mesh,
219 |                     params['mvp'],
220 |                     params['campos'],
221 |                     params['lightpos'],
222 |                     cfg.log_light_power,
223 |                     cfg.log_res,
224 |                     1,
225 |                     background=torch.ones(1, cfg.log_res, cfg.log_res, 3).to(device)
226 |                 )
227 | 
228 |                 log_image = video.ready_image(log_image)
229 |                 logger.add_mesh('predicted_mesh', vertices=log_mesh.v_pos.unsqueeze(0), faces=log_mesh.t_pos_idx.unsqueeze(0), global_step=it)
230 |         
231 |         if cfg.adapt_dist and it > 0:
232 |             with torch.no_grad():
233 |                 v_pos = m.v_pos.clone()
234 |                 vmin = v_pos.amin(dim=0)
235 |                 vmax = v_pos.amax(dim=0)
236 |                 v_pos -= (vmin + vmax) / 2
237 |                 mult = torch.cat([v_pos.amin(dim=0), v_pos.amax(dim=0)]).abs().amax().cpu()
238 |                 cams.dataset.dist_min = cfg.dist_min * mult
239 |                 cams.dataset.dist_max = cfg.dist_max * mult
240 | 
241 |         params_camera = next(iter(cams))
242 |         for key in params_camera:
243 |             params_camera[key] = params_camera[key].to(device)
244 |         
245 |         final_mesh = render_mesh.eval(params_camera)
246 | 
247 |         train_render = render.render_mesh(
248 |             glctx,
249 |             final_mesh,
250 |             params_camera['mvp'],
251 |             params_camera['campos'],
252 |             params_camera['lightpos'],
253 |             cfg.light_power,
254 |             cfg.train_res,
255 |             spp=1,
256 |             num_layers=1,
257 |             msaa=False,
258 |             background=params_camera['bkgs']
259 |         ).permute(0, 3, 1, 2)
260 |         train_render = resize(train_render, out_shape=(224, 224), interp_method=resize_method)
261 | 
262 |         train_rast_map = render.render_mesh(
263 |             glctx,
264 |             final_mesh,
265 |             params_camera['mvp'],
266 |             params_camera['campos'],
267 |             params_camera['lightpos'],
268 |             cfg.light_power,
269 |             cfg.train_res,
270 |             spp=1,
271 |             num_layers=1,
272 |             msaa=False,
273 |             background=params_camera['bkgs'],
274 |             return_rast_map=True
275 |         )
276 | 
277 |         if it == 0:
278 |             params_camera = next(iter(cams))
279 |             for key in params_camera:
280 |                 params_camera[key] = params_camera[key].to(device)
281 |         base_render = render.render_mesh(
282 |             glctx,
283 |             base_mesh.eval(params_camera),
284 |             params_camera['mvp'],
285 |             params_camera['campos'],
286 |             params_camera['lightpos'],
287 |             cfg.light_power,
288 |             cfg.train_res,
289 |             spp=1,
290 |             num_layers=1,
291 |             msaa=False,
292 |             background=params_camera['bkgs'],
293 |         ).permute(0, 3, 1, 2)
294 |         base_render = resize(base_render, out_shape=(224, 224), interp_method=resize_method)
295 |         
296 |         if it % cfg.log_interval_im == 0:
297 |             log_idx = torch.randperm(cfg.batch_size)[:5]
298 |             s_log = train_render[log_idx, :, :, :]
299 |             s_log = torchvision.utils.make_grid(s_log)
300 |             ndarr = s_log.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
301 |             im = Image.fromarray(ndarr)
302 |             im.save(str(output_path / 'images' / f'epoch_{it}.png'))
303 | 
304 |             obj.write_obj(
305 |                 str(output_path / 'mesh_final'),
306 |                 m.eval()
307 |             )
308 |         
309 |         optimizer.zero_grad()
310 | 
311 |         # CLIP similarity losses
312 |         normalized_clip_render = (train_render - clip_mean[None, :, None, None]) / clip_std[None, :, None, None]
313 |         image_embeds = model.encode_image(
314 |             normalized_clip_render
315 |         )
316 |         with torch.no_grad():
317 |             normalized_base_render = (base_render - clip_mean[None, :, None, None]) / clip_std[None, :, None, None]
318 |             base_embeds = model.encode_image(normalized_base_render)
319 |         
320 |         orig_image_embeds = image_embeds.clone() / image_embeds.norm(dim=1, keepdim=True)
321 |         delta_image_embeds = image_embeds - base_embeds
322 |         delta_image_embeds = delta_image_embeds / delta_image_embeds.norm(dim=1, keepdim=True)
323 | 
324 |         clip_loss = cosine_avg(orig_image_embeds, target_text_embeds)
325 |         delta_clip_loss = cosine_avg(delta_image_embeds, delta_text_embeds)
326 |         logger.add_scalar('clip_loss', clip_loss, global_step=it)
327 |         logger.add_scalar('delta_clip_loss', delta_clip_loss, global_step=it)
328 | 
329 |         # Jacobian regularization
330 |         r_loss = (((gt_jacobians) - torch.eye(3, 3, device=device)) ** 2).mean()
331 |         logger.add_scalar('jacobian_regularization', r_loss, global_step=it)
332 | 
333 |         # Consistency loss
334 |         # Get mapping from vertex to pixels
335 |         curr_vp_map = get_vp_map(final_mesh.v_pos, params_camera['mvp'], 224)
336 |         for idx, rast_faces in enumerate(train_rast_map[:, :, :, 3].view(cfg.batch_size, -1)):
337 |             u_faces = rast_faces.unique().long()[1:] - 1
338 |             t = torch.arange(len(final_mesh.v_pos), device=device)
339 |             u_ret = torch.cat([t, final_mesh.t_pos_idx[u_faces].flatten()]).unique(return_counts=True)
340 |             non_verts = u_ret[0][u_ret[1] < 2]
341 |             curr_vp_map[idx][non_verts] = torch.tensor([224, 224], device=device)
342 |         
343 |         # Get mapping from vertex to patch
344 |         med = (fe.old_stride - 1) / 2
345 |         curr_vp_map[curr_vp_map < med] = med
346 |         curr_vp_map[(curr_vp_map > 224 - fe.old_stride) & (curr_vp_map < 224)] = 223 - med
347 |         curr_patch_map = ((curr_vp_map - med) / fe.new_stride).round()
348 |         flat_patch_map = curr_patch_map[..., 0] * (((224 - fe.old_stride) / fe.new_stride) + 1) + curr_patch_map[..., 1]
349 |         
350 |         # Deep features
351 |         patch_feats = fe(normalized_clip_render)
352 |         flat_patch_map[flat_patch_map > patch_feats[0].shape[-1] - 1] = patch_feats[0].shape[-1]
353 |         flat_patch_map = flat_patch_map.long()[:, None, :].repeat(1, patch_feats[0].shape[1], 1)
354 | 
355 |         deep_feats = patch_feats[cfg.consistency_vit_layer]
356 |         deep_feats = torch.nn.functional.pad(deep_feats, (0, 1))
357 |         deep_feats = torch.gather(deep_feats, dim=2, index=flat_patch_map)
358 |         deep_feats = torch.nn.functional.normalize(deep_feats, dim=1, eps=1e-6)
359 | 
360 |         elev_d = torch.cdist(params_camera['elev'].unsqueeze(1), params_camera['elev'].unsqueeze(1)).abs() < torch.deg2rad(torch.tensor(cfg.consistency_elev_filter))
361 |         azim_d = torch.cdist(params_camera['azim'].unsqueeze(1), params_camera['azim'].unsqueeze(1)).abs() < torch.deg2rad(torch.tensor(cfg.consistency_azim_filter))
362 | 
363 |         cosines = torch.einsum('ijk, lkj -> ilk', deep_feats, deep_feats.permute(0, 2, 1))
364 |         cosines = (cosines * azim_d.unsqueeze(-1) * elev_d.unsqueeze(-1)).permute(2, 0, 1).triu(1)
365 |         consistency_loss = cosines[cosines != 0].mean()
366 |         logger.add_scalar('consistency_loss', consistency_loss, global_step=it)
367 | 
368 |         total_loss = cfg.clip_weight * clip_loss + cfg.delta_clip_weight * delta_clip_loss + \
369 |             cfg.regularize_jacobians_weight * r_loss - cfg.consistency_loss_weight * consistency_loss
370 |         logger.add_scalar('total_loss', total_loss, global_step=it)
371 | 
372 |         if best_losses['total'] > total_loss:
373 |             best_losses['total'] = total_loss.detach()
374 |             obj.write_obj(
375 |                 str(output_path / 'mesh_best_total'),
376 |                 m.eval()
377 |             )
378 |         if best_losses['CLIP'] > clip_loss:
379 |             best_losses['CLIP'] = clip_loss.detach()
380 |             obj.write_obj(
381 |                 str(output_path / 'mesh_best_clip'),
382 |                 m.eval()
383 |             )
384 | 
385 |         total_loss.backward()
386 |         optimizer.step()
387 |         t_loop.set_description(f'CLIP Loss = {clip_loss.item()}, Total Loss = {total_loss.item()}')
388 |     
389 |     video.close()
390 |     obj.write_obj(
391 |         str(output_path / 'mesh_final'),
392 |         m.eval()
393 |     )
394 |     
395 |     return
396 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | import torch
 4 | import random
 5 | import argparse
 6 | import numpy as np
 7 | 
 8 | from loop import loop
 9 | 
10 | def main():
11 |     parser = argparse.ArgumentParser()
12 |     parser.add_argument('--config', help='Path to config file', type=str, default='./example_config.yml')
13 |     parser.add_argument('--output_path', help='Output directory (will be created)', type=str, default=argparse.SUPPRESS)
14 |     parser.add_argument('--gpu', help='GPU index', type=int, default=argparse.SUPPRESS)
15 |     parser.add_argument('--seed', help='Random seed', type=int, default=argparse.SUPPRESS)
16 | 
17 |     # CLIP-related
18 |     parser.add_argument('--text_prompt', help='Target text prompt', type=str, default=argparse.SUPPRESS)
19 |     parser.add_argument('--base_text_prompt', help='Base text prompt describing input mesh', type=str, default=argparse.SUPPRESS)
20 |     parser.add_argument('--clip_model', help='CLIP Model for text comparison', type=str, default=argparse.SUPPRESS)
21 |     parser.add_argument('--consistency_clip_model', help='CLIP Model for consistency', type=str, default=argparse.SUPPRESS)
22 |     parser.add_argument('--consistency_vit_stride', help='New stride for ViT patch interpolation', type=int, default=argparse.SUPPRESS)
23 |     parser.add_argument('--consistency_vit_layer', help='Which layer to take ViT patch features from (0-11)', type=int, default=argparse.SUPPRESS)
24 | 
25 |     # Mesh
26 |     parser.add_argument('--mesh', help='Path to input mesh', type=str, default=argparse.SUPPRESS)
27 |     parser.add_argument('--retriangulate', help='Use isotropic remeshing', type=int, default=argparse.SUPPRESS, choices=[0, 1])
28 | 
29 |     # Render settings
30 |     parser.add_argument('--bsdf', help='Render technique', type=str, default=argparse.SUPPRESS, choices=['diffuse', 'pbr'])
31 | 
32 |     # Hyper-parameters
33 |     parser.add_argument('--lr', help='Learning rate', type=float, default=argparse.SUPPRESS)
34 |     parser.add_argument('--epochs', help='Number of optimization steps', type=int, default=argparse.SUPPRESS)
35 |     parser.add_argument('--clip_weight', help='Weight for CLIP loss', type=float, default=argparse.SUPPRESS)
36 |     parser.add_argument('--delta_clip_weight', help='Wight for delta-CLIP loss', type=float, default=argparse.SUPPRESS)
37 |     parser.add_argument('--regularize_jacobians_weight', help='Weight for jacobian regularization', type=float, default=argparse.SUPPRESS)
38 |     parser.add_argument('--consistency_loss_weight', help='Weight for viewpoint consistency penalty', type=float, default=argparse.SUPPRESS)
39 |     parser.add_argument('--consistency_elev_filter', help='Elev. angle threshold for filtering out pairs of viewpoints for consistency loss', type=float, default=argparse.SUPPRESS)
40 |     parser.add_argument('--consistency_azim_filter', help='Azim. angle threshold for filtering out pairs of viewpoints for consistency loss', type=float, default=argparse.SUPPRESS)
41 |     parser.add_argument('--batch_size', help='Number of images rendered at the same time', type=int, default=argparse.SUPPRESS)
42 |     parser.add_argument('--train_res', help='Resolution of render before downscaling to CLIP size', type=int, default=argparse.SUPPRESS)
43 |     parser.add_argument('--resize_method', help='Image downsampling/upsampling method', type=str, default=argparse.SUPPRESS, choices=['cubic', 'linear', 'lanczos2', 'lanczos3'])
44 |     ## Camera Parameters ##
45 |     parser.add_argument('--fov_min', help='Minimum camera field of view angle during renders', type=float, default=argparse.SUPPRESS)
46 |     parser.add_argument('--fov_max', help='Maximum camera field of view angle during renders', type=float, default=argparse.SUPPRESS)
47 |     parser.add_argument('--dist_min', help='Minimum distance of camera from mesh during renders', type=float, default=argparse.SUPPRESS)
48 |     parser.add_argument('--dist_max', help='Maximum distance of camera from mesh during renders', type=float, default=argparse.SUPPRESS)
49 |     parser.add_argument('--light_power', help='Light intensity', type=float, default=argparse.SUPPRESS)
50 |     parser.add_argument('--elev_alpha', help='Alpha parameter for Beta distribution for elevation sampling', type=float, default=argparse.SUPPRESS)
51 |     parser.add_argument('--elev_beta', help='Beta parameter for Beta distribution for elevation sampling', type=float, default=argparse.SUPPRESS)
52 |     parser.add_argument('--elev_max', help='Maximum elevation angle in degree', type=float, default=argparse.SUPPRESS)
53 |     parser.add_argument('--azim_min', help='Minimum azimuth angle in degree',  type=float, default=argparse.SUPPRESS)
54 |     parser.add_argument('--azim_max', help='Maximum azimuth angle in degree', type=float, default=argparse.SUPPRESS)
55 |     parser.add_argument('--aug_loc', help='Offset mesh from center of image?', type=int, default=argparse.SUPPRESS, choices=[0, 1])
56 |     parser.add_argument('--aug_light', help='Augment the direction of light around the camera', type=int, default=argparse.SUPPRESS, choices=[0, 1])
57 |     parser.add_argument('--aug_bkg', help='Augment the background', type=int, default=argparse.SUPPRESS, choices=[0, 1])
58 |     parser.add_argument('--adapt_dist', help='Adjust camera distance to account for scale of shape', type=int, default=argparse.SUPPRESS, choices=[0, 1])
59 | 
60 |     # Logging
61 |     parser.add_argument('--log_interval', help='Interval for logging, every X epochs',  type=int, default=argparse.SUPPRESS)
62 |     parser.add_argument('--log_interval_im', help='Interval for logging renders image, every X epochs',  type=int, default=argparse.SUPPRESS)
63 |     parser.add_argument('--log_elev', help='Logging elevation angle',  type=float, default=argparse.SUPPRESS)
64 |     parser.add_argument('--log_fov', help='Logging field of view',  type=float, default=argparse.SUPPRESS)
65 |     parser.add_argument('--log_dist', help='Logging distance from object',  type=float, default=argparse.SUPPRESS)
66 |     parser.add_argument('--log_res', help='Logging render resolution',  type=int, default=argparse.SUPPRESS)
67 |     parser.add_argument('--log_light_power', help='Light intensity for logging',  type=float, default=argparse.SUPPRESS)    
68 | 
69 |     args = parser.parse_args()
70 |     if args.config is not None:
71 |         with open(args.config, 'r') as f:
72 |             try:
73 |                 cfg = yaml.safe_load(f)
74 |             except yaml.YAMLError as e:
75 |                 print(e)
76 |     
77 |     for key in vars(args):
78 |         cfg[key] = vars(args)[key]
79 | 
80 |     print(yaml.dump(cfg, default_flow_style=False))
81 |     random.seed(cfg['seed'])
82 |     os.environ['PYTHONHASHSEED'] = str(cfg['seed'])
83 |     np.random.seed(cfg['seed'])
84 |     torch.manual_seed(cfg['seed'])
85 |     torch.cuda.manual_seed(cfg['seed'])
86 |     torch.backends.cudnn.deterministic = True
87 | 
88 |     loop(cfg)
89 |     print('Done')
90 | 
91 | if __name__ == '__main__':
92 |     main()
93 | 
94 | 


--------------------------------------------------------------------------------
/meshes/spot.mtl:
--------------------------------------------------------------------------------
 1 | 
 2 | # Blender MTL File: 'None'
 3 | # Material Count: 1
 4 | 
 5 | newmtl Default_OBJ
 6 | Ns 250.000000
 7 | Ka 1.000000 1.000000 1.000000
 8 | Kd 0.800000 0.800000 0.800000
 9 | Ks 0.500000 0.500000 0.500000
10 | Ke 0.000000 0.000000 0.000000
11 | Ni 1.450000
12 | d 1.000000
13 | illum 2
14 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
 2 | 
 3 | 
 4 | Nvidia Source Code License (1-Way Commercial)
 5 | 
 6 | =======================================================================
 7 | 
 8 | 1. Definitions
 9 | 
10 | "Licensor" means any person or entity that distributes its Work.
11 | 
12 | "Software" means the original work of authorship made available under
13 | this License.
14 | 
15 | "Work" means the Software and any additions to or derivative works of
16 | the Software that are made available under this License.
17 | 
18 | The terms "reproduce," "reproduction," "derivative works," and
19 | "distribution" have the meaning as provided under U.S. copyright law;
20 | provided, however, that for the purposes of this License, derivative
21 | works shall not include works that remain separable from, or merely
22 | link (or bind by name) to the interfaces of, the Work.
23 | 
24 | Works, including the Software, are "made available" under this License
25 | by including in or with the Work either (a) a copyright notice
26 | referencing the applicability of this License to the Work, or (b) a
27 | copy of this License.
28 | 
29 | 2. License Grants
30 | 
31 |     2.1 Copyright Grant. Subject to the terms and conditions of this
32 |     License, each Licensor grants to you a perpetual, worldwide,
33 |     non-exclusive, royalty-free, copyright license to reproduce,
34 |     prepare derivative works of, publicly display, publicly perform,
35 |     sublicense and distribute its Work and any resulting derivative
36 |     works in any form.
37 | 
38 | 3. Limitations
39 | 
40 |     3.1 Redistribution. You may reproduce or distribute the Work only
41 |     if (a) you do so under this License, (b) you include a complete
42 |     copy of this License with your distribution, and (c) you retain
43 |     without modification any copyright, patent, trademark, or
44 |     attribution notices that are present in the Work.
45 | 
46 |     3.2 Derivative Works. You may specify that additional or different
47 |     terms apply to the use, reproduction, and distribution of your
48 |     derivative works of the Work ("Your Terms") only if (a) Your Terms
49 |     provide that the use limitation in Section 3.3 applies to your
50 |     derivative works, and (b) you identify the specific derivative
51 |     works that are subject to Your Terms. Notwithstanding Your Terms,
52 |     this License (including the redistribution requirements in Section
53 |     3.1) will continue to apply to the Work itself.
54 | 
55 |     3.3 Use Limitation. The Work and any derivative works thereof only
56 |     may be used or intended for use non-commercially. The Work or
57 |     derivative works thereof may be used or intended for use by Nvidia
58 |     or its affiliates commercially or non-commercially. As used herein,
59 |     "non-commercially" means for research or evaluation purposes only
60 |     and not for any direct or indirect monetary gain.
61 | 
62 |     3.4 Patent Claims. If you bring or threaten to bring a patent claim
63 |     against any Licensor (including any claim, cross-claim or
64 |     counterclaim in a lawsuit) to enforce any patents that you allege
65 |     are infringed by any Work, then your rights under this License from
66 |     such Licensor (including the grant in Section 2.1) will terminate
67 |     immediately.
68 | 
69 |     3.5 Trademarks. This License does not grant any rights to use any
70 |     Licensor's or its affiliates' names, logos, or trademarks, except
71 |     as necessary to reproduce the notices described in this License.
72 | 
73 |     3.6 Termination. If you violate any term of this License, then your
74 |     rights under this License (including the grant in Section 2.1) will
75 |     terminate immediately.
76 | 
77 | 4. Disclaimer of Warranty.
78 | 
79 | THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
80 | KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
81 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
82 | NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
83 | THIS LICENSE.
84 | 
85 | 5. Limitation of Liability.
86 | 
87 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
88 | THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
89 | SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
90 | INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
91 | OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
92 | (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
93 | LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
94 | COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
95 | THE POSSIBILITY OF SUCH DAMAGES.
96 | 
97 | =======================================================================
98 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/material.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | from . import util
 14 | from . import texture
 15 | from . import mesh
 16 | 
 17 | ######################################################################################
 18 | # .mtl material format loading / storing
 19 | ######################################################################################
 20 | 
 21 | def load_mtl(fn, clear_ks=True):
 22 |     import re
 23 |     mtl_path = os.path.dirname(fn)
 24 | 
 25 |     # Read file
 26 |     with open(fn) as f:
 27 |         lines = f.readlines()
 28 | 
 29 |     # Parse materials
 30 |     materials = []
 31 |     for line in lines:
 32 |         split_line = re.split(' +|\t+|\n+', line.strip())
 33 |         prefix = split_line[0].lower()
 34 |         data = split_line[1:]
 35 |         if 'newmtl' in prefix:
 36 |             material = {'name' : data[0]}
 37 |             materials += [material]
 38 |         elif materials:
 39 |             if 'bsdf' in prefix or 'map_kd' in prefix or 'map_ks' in prefix or 'bump' in prefix:
 40 |                 material[prefix] = data[0]
 41 |             else:
 42 |                 material[prefix] = torch.tensor(tuple(float(d) for d in data), dtype=torch.float32, device='cuda')
 43 | 
 44 |     # Convert everything to textures. Our code expects 'kd' and 'ks' to be texture maps. So replace constants with 1x1 maps
 45 |     for mat in materials:
 46 |         if not 'bsdf' in mat:
 47 |             mat['bsdf'] = 'pbr'
 48 | 
 49 |         if 'map_kd' in mat:
 50 |             mat['kd'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_kd']))
 51 |         else:
 52 |             mat['kd'] = texture.Texture2D(mat['kd'])
 53 |         
 54 |         if 'map_ks' in mat:
 55 |             mat['ks'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_ks']), channels=3)
 56 |         else:
 57 |             mat['ks'] = texture.Texture2D(mat['ks'])
 58 | 
 59 |         if 'bump' in mat:
 60 |             mat['normal'] = texture.load_texture2D(os.path.join(mtl_path, mat['bump']), lambda_fn=lambda x: x * 2 - 1, channels=3)
 61 | 
 62 |         # Convert Kd from sRGB to linear RGB
 63 |         mat['kd'] = texture.srgb_to_rgb(mat['kd'])
 64 | 
 65 |         if clear_ks:
 66 |             # Override ORM occlusion (red) channel by zeros. We hijack this channel
 67 |             for mip in mat['ks'].getMips():
 68 |                 mip[..., 0] = 0.0 
 69 | 
 70 |     return materials
 71 | 
 72 | def save_mtl(fn, material):
 73 |     folder = os.path.dirname(fn)
 74 |     with open(fn, "w") as f:
 75 |         f.write('newmtl defaultMat\n')
 76 |         if material is not None:
 77 |             f.write('bsdf   %s\n' % material['bsdf'])
 78 |             f.write('map_kd texture_kd.png\n')
 79 |             texture.save_texture2D(os.path.join(folder, 'texture_kd.png'), texture.rgb_to_srgb(material['kd']))
 80 |             f.write('map_ks texture_ks.png\n')
 81 |             texture.save_texture2D(os.path.join(folder, 'texture_ks.png'), material['ks'])
 82 |             f.write('bump texture_n.png\n')
 83 |             texture.save_texture2D(os.path.join(folder, 'texture_n.png'), material['normal'], lambda_fn=lambda x:(x+1)*0.5)
 84 |         else:
 85 |             f.write('Kd 1 1 1\n')
 86 |             f.write('Ks 0 0 0\n')
 87 |             f.write('Ka 0 0 0\n')
 88 |             f.write('Tf 1 1 1\n')
 89 |             f.write('Ni 1\n')
 90 |             f.write('Ns 0\n')
 91 | 
 92 | ######################################################################################
 93 | # Merge multiple materials into a single uber-material
 94 | ######################################################################################
 95 | 
 96 | def _upscale_replicate(x, full_res):
 97 |     x = x.permute(0, 3, 1, 2)
 98 |     x = torch.nn.functional.pad(x, (0, full_res[1] - x.shape[3], 0, full_res[0] - x.shape[2]), 'replicate')
 99 |     return x.permute(0, 2, 3, 1).contiguous()
100 | 
101 | def merge_materials(materials, texcoords, tfaces, mfaces):
102 |     assert len(materials) > 0
103 |     for mat in materials:
104 |         assert mat['bsdf'] == materials[0]['bsdf'], "All materials must have the same BSDF (uber shader)"
105 |         assert ('normal' in mat) is ('normal' in materials[0]), "All materials must have either normal map enabled or disabled"
106 | 
107 |     uber_material = {
108 |         'name' : 'uber_material',
109 |         'bsdf' : materials[0]['bsdf'],
110 |     }
111 | 
112 |     textures = ['kd', 'ks', 'normal']
113 | 
114 |     # Find maximum texture resolution across all materials and textures
115 |     max_res = None
116 |     for mat in materials:
117 |         for tex in textures:
118 |             tex_res = np.array(mat[tex].getRes()) if tex in mat else np.array([1, 1])
119 |             max_res = np.maximum(max_res, tex_res) if max_res is not None else tex_res
120 |     
121 |     # Compute size of compund texture and round up to nearest PoT
122 |     full_res = 2**np.ceil(np.log2(max_res * np.array([1, len(materials)]))).astype(np.int)
123 | 
124 |     # Normalize texture resolution across all materials & combine into a single large texture
125 |     for tex in textures:
126 |         if tex in materials[0]:
127 |             tex_data = torch.cat(tuple(util.scale_img_nhwc(mat[tex].data, tuple(max_res)) for mat in materials), dim=2) # Lay out all textures horizontally, NHWC so dim2 is x
128 |             tex_data = _upscale_replicate(tex_data, full_res)
129 |             uber_material[tex] = texture.Texture2D(tex_data)
130 | 
131 |     # Compute scaling values for used / unused texture area
132 |     s_coeff = [full_res[0] / max_res[0], full_res[1] / max_res[1]]
133 | 
134 |     # Recompute texture coordinates to cooincide with new composite texture
135 |     new_tverts = {}
136 |     new_tverts_data = []
137 |     for fi in range(len(tfaces)):
138 |         matIdx = mfaces[fi]
139 |         for vi in range(3):
140 |             ti = tfaces[fi][vi]
141 |             if not (ti in new_tverts):
142 |                 new_tverts[ti] = {}
143 |             if not (matIdx in new_tverts[ti]): # create new vertex
144 |                 new_tverts_data.append([(matIdx + texcoords[ti][0]) / s_coeff[1], texcoords[ti][1] / s_coeff[0]]) # Offset texture coodrinate (x direction) by material id & scale to local space. Note, texcoords are (u,v) but texture is stored (w,h) so the indexes swap here
145 |                 new_tverts[ti][matIdx] = len(new_tverts_data) - 1
146 |             tfaces[fi][vi] = new_tverts[ti][matIdx] # reindex vertex
147 | 
148 |     return uber_material, new_tverts_data, tfaces
149 | 
150 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/obj.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | from . import util
 14 | from . import texture
 15 | from . import mesh
 16 | from . import material
 17 | 
 18 | ######################################################################################
 19 | # Utility functions
 20 | ######################################################################################
 21 | 
 22 | def _write_weights(folder, mesh):
 23 |     if mesh.v_weights is not None:
 24 |         file = os.path.join(folder, 'mesh.weights')
 25 |         np.save(file, mesh.v_weights.detach().cpu().numpy())
 26 | 
 27 | def _write_bones(folder, mesh):
 28 |     if mesh.bone_mtx is not None:
 29 |         file = os.path.join(folder, 'mesh.bones')
 30 |         np.save(file, mesh.bone_mtx.detach().cpu().numpy())
 31 | 
 32 | def _find_mat(materials, name):
 33 |     for mat in materials:
 34 |         if mat['name'] == name:
 35 |             return mat
 36 |     return materials[0] # Materials 0 is the default
 37 | 
 38 | ######################################################################################
 39 | # Create mesh object from objfile
 40 | ######################################################################################
 41 | 
 42 | def load_obj(filename, clear_ks=True, mtl_override=None):
 43 |     obj_path = os.path.dirname(filename)
 44 | 
 45 |     # Read entire file
 46 |     with open(filename) as f:
 47 |         lines = f.readlines()
 48 | 
 49 |     # Load materials
 50 |     all_materials = [
 51 |         {
 52 |             'name' : '_default_mat',
 53 |             'bsdf' : 'falcor',
 54 |             'kd'   : texture.Texture2D(torch.tensor([0.5, 0.5, 0.5], dtype=torch.float32, device='cuda')),
 55 |             'ks'   : texture.Texture2D(torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32, device='cuda'))
 56 |         }
 57 |     ]
 58 |     if mtl_override is None: 
 59 |         for line in lines:
 60 |             if len(line.split()) == 0:
 61 |                 continue
 62 |             if line.split()[0] == 'mtllib':
 63 |                 all_materials += material.load_mtl(os.path.join(obj_path, line.split()[1]), clear_ks) # Read in entire material library
 64 |     else:
 65 |         all_materials += material.load_mtl(mtl_override)
 66 | 
 67 |     # load vertices
 68 |     vertices, texcoords, normals  = [], [], []
 69 |     for line in lines:
 70 |         if len(line.split()) == 0:
 71 |             continue
 72 |         
 73 |         prefix = line.split()[0].lower()
 74 |         if prefix == 'v':
 75 |             vertices.append([float(v) for v in line.split()[1:]][:3])
 76 |         elif prefix == 'vt':
 77 |             val = [float(v) for v in line.split()[1:]]
 78 |             texcoords.append([val[0], 1.0 - val[1]])
 79 |         elif prefix == 'vn':
 80 |             normals.append([float(v) for v in line.split()[1:]])
 81 | 
 82 |     # load faces
 83 |     activeMatIdx = None
 84 |     used_materials = []
 85 |     faces, tfaces, nfaces, mfaces = [], [], [], []
 86 |     for line in lines:
 87 |         if len(line.split()) == 0:
 88 |             continue
 89 | 
 90 |         prefix = line.split()[0].lower()
 91 |         if prefix == 'usemtl': # Track used materials
 92 |             mat = _find_mat(all_materials, line.split()[1])
 93 |             if not mat in used_materials:
 94 |                 used_materials.append(mat)
 95 |             activeMatIdx = used_materials.index(mat)
 96 |         elif prefix == 'f': # Parse face
 97 |             vs = line.split()[1:]
 98 |             nv = len(vs)
 99 |             vv = vs[0].split('/')
100 |             v0 = int(vv[0]) - 1
101 |             if len(vv) > 1:
102 |                 t0 = int(vv[1]) - 1 if vv[1] != "" else -1
103 |                 n0 = int(vv[2]) - 1 if vv[2] != "" else -1
104 |             else:
105 |                 t0 = -1
106 |                 n0 = -1
107 |             for i in range(nv - 2): # Triangulate polygons
108 |                 vv = vs[i + 1].split('/')
109 |                 v1 = int(vv[0]) - 1
110 |                 if len(vv) > 1:
111 |                     t1 = int(vv[1]) - 1 if vv[1] != "" else -1
112 |                     n1 = int(vv[2]) - 1 if vv[2] != "" else -1
113 |                 else:
114 |                     t1 = -1
115 |                     n1 = -1
116 |                 vv = vs[i + 2].split('/')
117 |                 v2 = int(vv[0]) - 1
118 |                 if len(vv) > 1:
119 |                     t2 = int(vv[1]) - 1 if vv[1] != "" else -1
120 |                     n2 = int(vv[2]) - 1 if vv[2] != "" else -1
121 |                 else:
122 |                     t2 = -1
123 |                     n2 = -1
124 |                 mfaces.append(activeMatIdx)
125 |                 faces.append([v0, v1, v2])
126 |                 tfaces.append([t0, t1, t2])
127 |                 nfaces.append([n0, n1, n2])
128 |     assert len(tfaces) == len(faces) and len(nfaces) == len (faces)
129 | 
130 |     # Create an "uber" material by combining all textures into a larger texture
131 |     if len(used_materials) > 1:
132 |         uber_material, texcoords, tfaces = material.merge_materials(used_materials, texcoords, tfaces, mfaces)
133 |     elif len(used_materials) == 1:
134 |         uber_material = used_materials[0]
135 |     else:
136 |         uber_material = None
137 | 
138 |     vertices = torch.tensor(vertices, dtype=torch.float32, device='cuda')
139 |     texcoords = torch.tensor(texcoords, dtype=torch.float32, device='cuda') if len(texcoords) > 0 else None
140 |     normals = torch.tensor(normals, dtype=torch.float32, device='cuda') if len(normals) > 0 else None
141 |     
142 |     faces = torch.tensor(faces, dtype=torch.int64, device='cuda')
143 |     tfaces = torch.tensor(tfaces, dtype=torch.int64, device='cuda') if texcoords is not None else None
144 |     nfaces = torch.tensor(nfaces, dtype=torch.int64, device='cuda') if normals is not None else None
145 | 
146 |     # Read weights and bones if available
147 |     try:
148 |         v_weights = torch.tensor(np.load(os.path.splitext(filename)[0] + ".weights.npy"), dtype=torch.float32, device='cuda')
149 |         bone_mtx = torch.tensor(np.load(os.path.splitext(filename)[0] + ".bones.npy"), dtype=torch.float32, device='cuda')
150 |     except:
151 |         v_weights, bone_mtx = None, None
152 | 
153 |     return mesh.Mesh(vertices, faces, normals, nfaces, texcoords, tfaces, v_weights=v_weights, bone_mtx=bone_mtx, material=uber_material)
154 | 
155 | ######################################################################################
156 | # Save mesh object to objfile
157 | ######################################################################################
158 | 
159 | def write_obj(folder, mesh, verbose=True):
160 |     obj_file = os.path.join(folder, 'mesh.obj')
161 |     if verbose:
162 |         print("Writing mesh: ", obj_file)
163 |     with open(obj_file, "w") as f:
164 |         f.write("mtllib mesh.mtl\n")
165 |         f.write("g default\n")
166 | 
167 |         v_pos = mesh.v_pos.detach().cpu().numpy() if mesh.v_pos is not None else None
168 |         v_nrm = mesh.v_nrm.detach().cpu().numpy() if mesh.v_nrm is not None else None
169 |         v_tex = mesh.v_tex.detach().cpu().numpy() if mesh.v_tex is not None else None
170 | 
171 |         t_pos_idx = mesh.t_pos_idx.detach().cpu().numpy() if mesh.t_pos_idx is not None else None
172 |         t_nrm_idx = mesh.t_nrm_idx.detach().cpu().numpy() if mesh.t_nrm_idx is not None else None
173 |         t_tex_idx = mesh.t_tex_idx.detach().cpu().numpy() if mesh.t_tex_idx is not None else None
174 |         if verbose:
175 |             print("    writing %d vertices" % len(v_pos))
176 |         for v in v_pos:
177 |             f.write('v {} {} {} \n'.format(v[0], v[1], v[2]))
178 |        
179 |         if v_tex is not None:
180 |             if verbose:
181 |                 print("    writing %d texcoords" % len(v_tex))
182 |             assert(len(t_pos_idx) == len(t_tex_idx))
183 |             for v in v_tex:
184 |                 f.write('vt {} {} \n'.format(v[0], 1.0 - v[1]))
185 | 
186 |         if v_nrm is not None:
187 |             if verbose:
188 |                 print("    writing %d normals" % len(v_nrm))
189 |             assert(len(t_pos_idx) == len(t_nrm_idx))
190 |             for v in v_nrm:
191 |                 f.write('vn {} {} {}\n'.format(v[0], v[1], v[2]))
192 | 
193 |         # faces
194 |         f.write("s 1 \n")
195 |         f.write("g pMesh1\n")
196 |         f.write("usemtl defaultMat\n")
197 | 
198 |         # Write faces
199 |         if verbose:
200 |             print("    writing %d faces" % len(t_pos_idx))
201 |         for i in range(len(t_pos_idx)):
202 |             f.write("f ")
203 |             for j in range(3):
204 |                 f.write(' %s/%s/%s' % (str(t_pos_idx[i][j]+1), '' if v_tex is None else str(t_tex_idx[i][j]+1), '' if v_nrm is None else str(t_nrm_idx[i][j]+1)))
205 |             f.write("\n")
206 | 
207 |     mtl_file = os.path.join(folder, 'mesh.mtl')
208 |     if verbose:
209 |         print("Writing material: ", mtl_file)
210 |     material.save_mtl(mtl_file, mesh.material)
211 | 
212 |     _write_weights(folder, mesh)
213 |     _write_bones(folder, mesh)
214 |     if verbose:
215 |         print("Done exporting mesh")
216 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/regularizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | from . import util
 14 | from . import texture
 15 | 
 16 | ######################################################################################
 17 | # Computes the avergage edge length of a mesh. 
 18 | # Rough estimate of the tessellation of a mesh. Can be used e.g. to clamp gradients
 19 | ######################################################################################
 20 | def avg_edge_length(opt_mesh):
 21 |     with torch.no_grad():
 22 |         opt_mesh = opt_mesh.eval()
 23 |         nVerts = opt_mesh.v_pos.shape[0]
 24 |         t_pos_idx = opt_mesh.t_pos_idx.detach().cpu().numpy() 
 25 | 
 26 |         # Find unique edges
 27 |         ix_i = []
 28 |         ix_j = []
 29 |         edge_verts = {}
 30 |         for tri in t_pos_idx:
 31 |             for (i0, i1) in [(tri[0], tri[1]), (tri[1], tri[2]), (tri[2], tri[0])]:
 32 |                 if (i1, i0) not in edge_verts.keys():
 33 |                     edge_verts[(i0, i1)] = True
 34 |                     ix_i += [i0]
 35 |                     ix_j += [i1]
 36 | 
 37 |         # Setup torch tensors
 38 |         ix_i = torch.tensor(ix_i, dtype=torch.int64, device='cuda')
 39 |         ix_j = torch.tensor(ix_j, dtype=torch.int64, device='cuda')
 40 | 
 41 |         # Gather edge vertex pairs
 42 |         x_i = opt_mesh.v_pos[ix_i, :]
 43 |         x_j = opt_mesh.v_pos[ix_j, :]
 44 | 
 45 |         # Compute edge length
 46 |         term = torch.sqrt((x_j - x_i)**2)
 47 | 
 48 |         # Compute avg edge length
 49 |         return (torch.sum(term) / len(x_i)).item()
 50 | 
 51 | ######################################################################################
 52 | # Edge length regularizer 
 53 | ######################################################################################
 54 | def edge_length_regularizer(mesh):
 55 |     class mesh_op_edge_length_regularizer:
 56 |         def __init__(self, mesh):
 57 |             self.mesh = mesh
 58 |             
 59 |             mesh = mesh.eval()
 60 |             nVerts = mesh.v_pos.shape[0]
 61 |             t_pos_idx = mesh.t_pos_idx.detach().cpu().numpy() 
 62 | 
 63 |             # Find unique edges
 64 |             ix_i = []
 65 |             ix_j = []
 66 |             edge_verts = {}
 67 |             for tri in t_pos_idx:
 68 |                 for (i0, i1) in [(tri[0], tri[1]), (tri[1], tri[2]), (tri[2], tri[0])]:
 69 |                     if (i1, i0) not in edge_verts.keys():
 70 |                         edge_verts[(i0, i1)] = True
 71 |                         ix_i += [i0]
 72 |                         ix_j += [i1]
 73 | 
 74 |             # Setup torch tensors
 75 |             self.ix_i = torch.tensor(ix_i, dtype=torch.int64, device='cuda')
 76 |             self.ix_j = torch.tensor(ix_j, dtype=torch.int64, device='cuda')
 77 | 
 78 |         def eval(self, params={}):
 79 |             mesh = self.mesh.eval(params)
 80 | 
 81 |             # Gather edge vertex pairs
 82 |             x_i = mesh.v_pos[self.ix_i, :]
 83 |             x_j = mesh.v_pos[self.ix_j, :]
 84 | 
 85 |             # Compute edge length
 86 |             term = torch.sqrt((x_j - x_i)**2 + 1e-20)
 87 | 
 88 |             # Compute avg edge length
 89 |             return torch.var(term)
 90 | 
 91 |     return mesh_op_edge_length_regularizer(mesh)
 92 | 
 93 | ######################################################################################
 94 | # Laplacian regularization using umbrella operator (Fujiwara / Desbrun).
 95 | # https://mgarland.org/class/geom04/material/smoothing.pdf
 96 | ######################################################################################
 97 | def laplace_regularizer_const(opt_mesh, base_mesh=None):
 98 |     class mesh_op_laplace_regularizer_const:
 99 |         def __init__(self, opt_mesh, base_mesh):
100 |             self.inputs = [opt_mesh, base_mesh]
101 | 
102 |             opt_mesh = opt_mesh.eval()
103 |             self.nVerts = opt_mesh.v_pos.shape[0]
104 |             t_pos_idx = opt_mesh.t_pos_idx.detach().cpu().numpy() 
105 | 
106 |             # Build vertex neighbor rings
107 |             vtx_n = [[] for _ in range(self.nVerts)]
108 |             for tri in t_pos_idx:
109 |                 for (i0, i1) in [(tri[0], tri[1]), (tri[1], tri[2]), (tri[2], tri[0])]:
110 |                     vtx_n[i0].append(i1)
111 | 
112 |             # Collect index/weight pairs to compute each Laplacian vector for each vertex.
113 |             # Similar notation to https://mgarland.org/class/geom04/material/smoothing.pdf
114 |             ix_j, ix_i, w_ij = [], [], []
115 |             for i in range(self.nVerts):
116 |                 m = len(vtx_n[i])
117 |                 ix_i += [i] * m
118 |                 ix_j += vtx_n[i]
119 |                 w_ij += [1.0 / m] * m
120 | 
121 |             # Setup torch tensors
122 |             self.ix_i = torch.tensor(ix_i, dtype=torch.int64, device='cuda')
123 |             self.ix_j = torch.tensor(ix_j, dtype=torch.int64, device='cuda')
124 |             self.w_ij = torch.tensor(w_ij, dtype=torch.float32, device='cuda')[:, None]
125 | 
126 |         def eval(self, params={}):
127 |             opt_mesh = self.inputs[0].eval(params)
128 |             base_mesh = self.inputs[1].eval(params) if self.inputs[1] is not None else None
129 | 
130 |             # differences or absolute version (see paper)
131 |             if base_mesh is not None:
132 |                 v_pos = opt_mesh.v_pos - base_mesh.v_pos
133 |             else:
134 |                 v_pos = opt_mesh.v_pos
135 | 
136 |             # Gather edge vertex pairs
137 |             x_i = v_pos[self.ix_i, :]
138 |             x_j = v_pos[self.ix_j, :]
139 | 
140 |             # Compute Laplacian differences: (x_j - x_i) * w_ij
141 |             term = (x_j - x_i) * self.w_ij
142 | 
143 |             # Sum everyhing
144 |             term = util.segment_sum(term, self.ix_i)
145 |             
146 |             return torch.mean(term**2)
147 |     
148 |     return mesh_op_laplace_regularizer_const(opt_mesh, base_mesh)
149 | 
150 | ######################################################################################
151 | # Curvature based regularizer
152 | ######################################################################################
153 | def face_normal_regularizer(opt_mesh):
154 |     class mesh_op_face_normal_regularizer:
155 |         def __init__(self, opt_mesh):
156 |             self.input = opt_mesh
157 | 
158 |             imesh = opt_mesh.eval()
159 |             self.nVerts = imesh.v_pos.shape[0]
160 |             t_pos_idx = imesh.t_pos_idx.detach().cpu().numpy() 
161 | 
162 |             # Generate edge lists
163 |             edge_tris = {}
164 |             for tri_idx, tri in enumerate(t_pos_idx):
165 |                 for (i0, i1) in [(tri[0], tri[1]), (tri[1], tri[2]), (tri[2], tri[0])]:
166 |                     if (i1, i0) in edge_tris.keys():
167 |                         edge_tris[(i1, i0)] += [tri_idx]
168 |                     else:
169 |                         edge_tris[(i0, i1)] = [tri_idx]
170 | 
171 |             # Get all good edges with 2 incident triangles
172 |             shared_edge_idx = []
173 |             for edge in edge_tris.values():
174 |                 if len(edge) == 2:
175 |                     shared_edge_idx += [edge]
176 |             self.edge_tri_idx = torch.tensor(shared_edge_idx, dtype=torch.int64, device='cuda')
177 | 
178 |         def eval(self, params={}):
179 |             imesh = self.input.eval(params)
180 | 
181 |             # Compute face normals
182 |             v0 = imesh.v_pos[imesh.t_pos_idx[:, 0], :]
183 |             v1 = imesh.v_pos[imesh.t_pos_idx[:, 1], :]
184 |             v2 = imesh.v_pos[imesh.t_pos_idx[:, 2], :]
185 |             face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0))
186 | 
187 |             # Fetch normals for both faces sharind an edge
188 |             n0 = face_normals[self.edge_tri_idx[:, 0], :]
189 |             n1 = face_normals[self.edge_tri_idx[:, 1], :]
190 | 
191 |             # Compute error metric based on normal difference
192 |             term = torch.clamp(util.dot(n0, n1), min=-1.0, max=1.0)
193 |             term = (1.0 - term) * 0.5
194 | 
195 |             return torch.mean(torch.abs(term))
196 |     
197 |     return mesh_op_face_normal_regularizer(opt_mesh)
198 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/render.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import numpy as np
 10 | import torch
 11 | import torch.nn.functional as F
 12 | import nvdiffrast.torch as dr
 13 | 
 14 | from . import util
 15 | from . import mesh
 16 | from . import renderutils as ru
 17 | 
 18 | # ==============================================================================================
 19 | #  Helper functions
 20 | # ==============================================================================================
 21 | def interpolate(attr, rast, attr_idx, rast_db=None):
 22 |     return dr.interpolate(attr.contiguous(), rast, attr_idx, rast_db=rast_db, diff_attrs=None if rast_db is None else 'all')
 23 | 
 24 | # ==============================================================================================
 25 | #  pixel shader
 26 | # ==============================================================================================
 27 | def shade(
 28 |         gb_pos,
 29 |         gb_geometric_normal,
 30 |         gb_normal,
 31 |         gb_tangent,
 32 |         gb_texc,
 33 |         gb_texc_deriv,
 34 |         view_pos,
 35 |         light_pos,
 36 |         light_power,
 37 |         material,
 38 |         min_roughness
 39 |     ):
 40 | 
 41 |     ################################################################################
 42 |     # Texture lookups
 43 |     ################################################################################
 44 | 
 45 |     kd = material['kd'].sample(gb_texc, gb_texc_deriv)
 46 |     ks = material['ks'].sample(gb_texc, gb_texc_deriv)[..., 0:3] # skip alpha
 47 |     perturbed_nrm = None
 48 |     if 'normal' in material:
 49 |         perturbed_nrm = material['normal'].sample(gb_texc, gb_texc_deriv)
 50 | 
 51 |     gb_normal = ru.prepare_shading_normal(gb_pos, view_pos, perturbed_nrm, gb_normal, gb_tangent, gb_geometric_normal, two_sided_shading=True, opengl=True)
 52 | 
 53 |     # Separate kd into alpha and color, default alpha = 1
 54 |     alpha = kd[..., 3:4] if kd.shape[-1] == 4 else torch.ones_like(kd[..., 0:1]) 
 55 |     kd = kd[..., 0:3]
 56 | 
 57 |     ################################################################################
 58 |     # Evaluate BSDF
 59 |     ################################################################################
 60 | 
 61 |     assert 'bsdf' in material, "Material must specify a BSDF type"
 62 |     if material['bsdf'] == 'pbr':
 63 |         shaded_col = ru.pbr_bsdf(kd, ks, gb_pos, gb_normal, view_pos, light_pos, min_roughness) * light_power
 64 |     elif material['bsdf'] == 'diffuse':
 65 |         shaded_col = kd * ru.lambert(gb_normal, util.safe_normalize(light_pos - gb_pos)) * light_power
 66 |     elif material['bsdf'] == 'normal':
 67 |         shaded_col = (gb_normal + 1.0)*0.5
 68 |     elif material['bsdf'] == 'tangent':
 69 |         shaded_col = (gb_tangent + 1.0)*0.5
 70 |     else:
 71 |         assert False, "Invalid BSDF '%s'" % material['bsdf']
 72 | 
 73 |     out = torch.cat((shaded_col, alpha), dim=-1)
 74 | 
 75 |     return out
 76 | 
 77 | # ==============================================================================================
 78 | #  Render a depth slice of the mesh (scene), some limitations:
 79 | #  - Single mesh
 80 | #  - Single light
 81 | #  - Single material
 82 | # ==============================================================================================
 83 | def render_layer(
 84 |         rast,
 85 |         rast_deriv,
 86 |         mesh,
 87 |         view_pos,
 88 |         light_pos,
 89 |         light_power,
 90 |         resolution,
 91 |         min_roughness,
 92 |         spp,
 93 |         msaa
 94 |     ):
 95 | 
 96 |     full_res = resolution*spp
 97 | 
 98 |     ################################################################################
 99 |     # Rasterize
100 |     ################################################################################
101 | 
102 |     # Scale down to shading resolution when MSAA is enabled, otherwise shade at full resolution
103 |     if spp > 1 and msaa:
104 |         rast_out_s = util.scale_img_nhwc(rast, [resolution, resolution], mag='nearest', min='nearest')
105 |         rast_out_deriv_s = util.scale_img_nhwc(rast_deriv, [resolution, resolution], mag='nearest', min='nearest') * spp
106 |     else:
107 |         rast_out_s = rast
108 |         rast_out_deriv_s = rast_deriv
109 | 
110 |     ################################################################################
111 |     # Interpolate attributes
112 |     ################################################################################
113 | 
114 |     # Interpolate world space position
115 |     gb_pos, _ = interpolate(mesh.v_pos[None, ...], rast_out_s, mesh.t_pos_idx.int())
116 | 
117 |     # Compute geometric normals. We need those because of bent normals trick (for bump mapping)
118 |     v0 = mesh.v_pos[mesh.t_pos_idx[:, 0], :]
119 |     v1 = mesh.v_pos[mesh.t_pos_idx[:, 1], :]
120 |     v2 = mesh.v_pos[mesh.t_pos_idx[:, 2], :]
121 |     face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0))
122 |     face_normal_indices = (torch.arange(0, face_normals.shape[0], dtype=torch.int64, device='cuda')[:, None]).repeat(1, 3)
123 |     gb_geometric_normal, _ = interpolate(face_normals[None, ...], rast_out_s, face_normal_indices.int())
124 | 
125 |     # Compute tangent space
126 |     assert mesh.v_nrm is not None and mesh.v_tng is not None
127 |     gb_normal, _ = interpolate(mesh.v_nrm[None, ...], rast_out_s, mesh.t_nrm_idx.int())
128 |     gb_tangent, _ = interpolate(mesh.v_tng[None, ...], rast_out_s, mesh.t_tng_idx.int()) # Interpolate tangents
129 | 
130 |     # Texure coordinate
131 |     assert mesh.v_tex is not None
132 |     gb_texc, gb_texc_deriv = interpolate(mesh.v_tex[None, ...], rast_out_s, mesh.t_tex_idx.int(), rast_db=rast_out_deriv_s)
133 | 
134 |     ################################################################################
135 |     # Shade
136 |     ################################################################################
137 | 
138 |     color = shade(gb_pos, gb_geometric_normal, gb_normal, gb_tangent, gb_texc, gb_texc_deriv, 
139 |         view_pos, light_pos, light_power, mesh.material, min_roughness)
140 | 
141 |     ################################################################################
142 |     # Prepare output
143 |     ################################################################################
144 | 
145 |     # Scale back up to visibility resolution if using MSAA
146 |     if spp > 1 and msaa:
147 |         color = util.scale_img_nhwc(color, [full_res, full_res], mag='nearest', min='nearest')
148 | 
149 |     # Return color & raster output for peeling
150 |     return color
151 | 
152 | 
153 | # ==============================================================================================
154 | #  Render a depth peeled mesh (scene), some limitations:
155 | #  - Single mesh
156 | #  - Single light
157 | #  - Single material
158 | # ==============================================================================================
159 | def render_mesh(
160 |         ctx,
161 |         mesh,
162 |         mtx_in,
163 |         view_pos,
164 |         light_pos,
165 |         light_power,
166 |         resolution,
167 |         spp                       = 1,
168 |         num_layers                = 1,
169 |         msaa                      = False,
170 |         background                = None,
171 |         antialias                 = True,
172 |         min_roughness             = 0.08,
173 |         return_rast_map           = False,
174 |     ):
175 |     assert not (return_rast_map and num_layers > 1)
176 | 
177 |     def prepare_input_vector(x):
178 |         x = torch.tensor(x, dtype=torch.float32, device='cuda') if not torch.is_tensor(x) else x
179 |         return x[:, None, None, :] if len(x.shape) == 2 else x
180 | 
181 |     full_res = resolution*spp
182 | 
183 |     # Convert numpy arrays to torch tensors
184 |     mtx_in      = torch.tensor(mtx_in, dtype=torch.float32, device='cuda') if not torch.is_tensor(mtx_in) else mtx_in
185 |     light_pos   = prepare_input_vector(light_pos)
186 |     light_power = prepare_input_vector(light_power)
187 |     view_pos    = prepare_input_vector(view_pos)
188 | 
189 |     # clip space transform
190 |     v_pos_clip = ru.xfm_points(mesh.v_pos[None, ...], mtx_in)
191 | 
192 |     # Render all layers front-to-back
193 |     layers = []
194 |     with dr.DepthPeeler(ctx, v_pos_clip, mesh.t_pos_idx.int(), [resolution*spp, resolution*spp]) as peeler:
195 |         for _ in range(num_layers):
196 |             rast, db = peeler.rasterize_next_layer()
197 |             layers += [(render_layer(rast, db, mesh, view_pos, light_pos, light_power, resolution, min_roughness, spp, msaa), rast)]
198 | 
199 |     if return_rast_map:
200 |         return rast.detach()
201 | 
202 |     # Clear to background layer
203 |     if background is not None:
204 |         assert background.shape[1] == resolution and background.shape[2] == resolution
205 |         if spp > 1:
206 |             background = util.scale_img_nhwc(background, [full_res, full_res], mag='nearest', min='nearest')
207 |         accum_col = background
208 |     else:
209 |         accum_col = torch.zeros(size=(1, full_res, full_res, 3), dtype=torch.float32, device='cuda')
210 | 
211 |     # Composite BACK-TO-FRONT
212 |     for color, rast in reversed(layers):
213 |         alpha     = (rast[..., -1:] > 0) * color[..., 3:4]
214 |         accum_col = torch.lerp(accum_col, color[..., 0:3], alpha)
215 |         if antialias:
216 |             accum_col = dr.antialias(accum_col.contiguous(), rast, v_pos_clip, mesh.t_pos_idx.int()) # TODO: need to support bfloat16
217 | 
218 |     # Downscale to framebuffer resolution. Use avg pooling 
219 |     out = util.avg_pool_nhwc(accum_col, spp) if spp > 1 else accum_col
220 | 
221 |     return out
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto. Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | from .ops import xfm_points, xfm_vectors, image_loss, prepare_shading_normal, lambert, pbr_specular, pbr_bsdf, _fresnel_shlick, _ndf_ggx, _lambda_ggx, _masking_smith
10 | __all__ = ["xfm_vectors", "xfm_points", "image_loss", "prepare_shading_normal", "lambert", "pbr_specular", "pbr_bsdf", "_fresnel_shlick", "_ndf_ggx", "_lambda_ggx", "_masking_smith", ]
11 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/bsdf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import math
 10 | import torch
 11 | 
 12 | NORMAL_THRESHOLD = 0.1
 13 | 
 14 | ################################################################################
 15 | # Vector utility functions
 16 | ################################################################################
 17 | 
 18 | def _dot(x, y):
 19 |     return torch.sum(x*y, -1, keepdim=True)
 20 | 
 21 | def _reflect(x, n):
 22 |     return 2*_dot(x, n)*n - x
 23 | 
 24 | def _safe_normalize(x):
 25 |     return torch.nn.functional.normalize(x, dim = -1)
 26 | 
 27 | def _bend_normal(view_vec, smooth_nrm, geom_nrm, two_sided_shading):
 28 |     # Swap normal direction for backfacing surfaces
 29 |     if two_sided_shading:
 30 |         smooth_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, smooth_nrm, -smooth_nrm)
 31 |         geom_nrm   = torch.where(_dot(geom_nrm, view_vec) > 0, geom_nrm, -geom_nrm)
 32 | 
 33 |     t = torch.clamp(_dot(view_vec, smooth_nrm) / NORMAL_THRESHOLD, min=0, max=1)
 34 |     return torch.lerp(geom_nrm, smooth_nrm, t)
 35 | 
 36 | 
 37 | def _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl):
 38 |     smooth_bitang = _safe_normalize(torch.cross(smooth_tng, smooth_nrm))
 39 |     if opengl:
 40 |         shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] - smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
 41 |     else:
 42 |         shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] + smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
 43 |     return _safe_normalize(shading_nrm)
 44 | 
 45 | def bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl):
 46 |     smooth_nrm = _safe_normalize(smooth_nrm)
 47 |     smooth_tng = _safe_normalize(smooth_tng)
 48 |     view_vec   = _safe_normalize(view_pos - pos)
 49 |     shading_nrm = _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl)
 50 |     return _bend_normal(view_vec, shading_nrm, geom_nrm, two_sided_shading)
 51 | 
 52 | ################################################################################
 53 | # Simple lambertian diffuse BSDF
 54 | ################################################################################
 55 | 
 56 | def bsdf_lambert(nrm, wi):
 57 |     return torch.clamp(_dot(nrm, wi), min=0.0) / math.pi
 58 | 
 59 | ################################################################################
 60 | # Phong specular, loosely based on mitsuba implementation
 61 | ################################################################################
 62 | 
 63 | def bsdf_phong(nrm, wo, wi, N):
 64 |     dp_r = torch.clamp(_dot(_reflect(wo, nrm), wi), min=0.0, max=1.0)
 65 |     dp_l = torch.clamp(_dot(nrm, wi), min=0.0, max=1.0)
 66 |     return (dp_r ** N) * dp_l * (N + 2) / (2 * math.pi)
 67 | 
 68 | ################################################################################
 69 | # PBR's implementation of GGX specular
 70 | ################################################################################
 71 | 
 72 | specular_epsilon = 1e-4
 73 | 
 74 | def bsdf_fresnel_shlick(f0, f90, cosTheta):
 75 |     _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
 76 |     return f0 + (f90 - f0) * (1.0 - _cosTheta) ** 5.0
 77 | 
 78 | def bsdf_ndf_ggx(alphaSqr, cosTheta):
 79 |     _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
 80 |     d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1
 81 |     return alphaSqr / (d * d * math.pi)
 82 | 
 83 | def bsdf_lambda_ggx(alphaSqr, cosTheta):
 84 |     _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
 85 |     cosThetaSqr = _cosTheta * _cosTheta
 86 |     tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr
 87 |     res = 0.5 * (torch.sqrt(1 + alphaSqr * tanThetaSqr) - 1.0)
 88 |     return res
 89 | 
 90 | def bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO):
 91 |     lambdaI = bsdf_lambda_ggx(alphaSqr, cosThetaI)
 92 |     lambdaO = bsdf_lambda_ggx(alphaSqr, cosThetaO)
 93 |     return 1 / (1 + lambdaI + lambdaO)
 94 | 
 95 | def bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08):
 96 |     _alpha = torch.clamp(alpha, min=min_roughness*min_roughness, max=1.0)
 97 |     alphaSqr = _alpha * _alpha
 98 | 
 99 |     h = _safe_normalize(wo + wi)
100 |     woDotN = _dot(wo, nrm)
101 |     wiDotN = _dot(wi, nrm)
102 |     woDotH = _dot(wo, h)
103 |     nDotH  = _dot(nrm, h)
104 | 
105 |     D = bsdf_ndf_ggx(alphaSqr, nDotH)
106 |     G = bsdf_masking_smith_ggx_correlated(alphaSqr, woDotN, wiDotN)
107 |     F = bsdf_fresnel_shlick(col, 1, woDotH)
108 | 
109 |     w = F * D * G * 0.25 / torch.clamp(woDotN, min=specular_epsilon)
110 | 
111 |     frontfacing = (woDotN > specular_epsilon) & (wiDotN > specular_epsilon)
112 |     return torch.where(frontfacing, w, torch.zeros_like(w))
113 | 
114 | def bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness=0.08):
115 |     wo = _safe_normalize(view_pos - pos)
116 |     wi = _safe_normalize(light_pos - pos)
117 | 
118 |     spec_str  = arm[..., 0:1] # x component
119 |     roughness = arm[..., 1:2] # y component
120 |     metallic  = arm[..., 2:3] # z component
121 |     ks = (0.04 * (1.0 - metallic) + kd * metallic) * (1 - spec_str)
122 |     kd = kd * (1.0 - metallic)
123 | 
124 |     diffuse = kd * bsdf_lambert(nrm, wi)
125 |     specular = bsdf_pbr_specular(ks, nrm, wo, wi, roughness*roughness, min_roughness=min_roughness)
126 |     return diffuse + specular
127 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/bsdf.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | 
11 | #include "common.h"
12 | 
13 | struct LambertKernelParams
14 | {
15 |     Tensor  nrm;
16 |     Tensor  wi;
17 |     Tensor  out;
18 |     dim3    gridSize;
19 | };
20 | 
21 | struct FresnelShlickKernelParams
22 | {
23 |     Tensor  f0;
24 |     Tensor  f90;
25 |     Tensor  cosTheta;
26 |     Tensor  out;
27 |     dim3    gridSize;
28 | };
29 | 
30 | struct NdfGGXParams
31 | {
32 |     Tensor  alphaSqr;
33 |     Tensor  cosTheta;
34 |     Tensor  out;
35 |     dim3    gridSize;
36 | };
37 | 
38 | struct MaskingSmithParams
39 | {
40 |     Tensor  alphaSqr;
41 |     Tensor  cosThetaI;
42 |     Tensor  cosThetaO;
43 |     Tensor  out;
44 |     dim3    gridSize;
45 | };
46 | 
47 | struct PbrSpecular
48 | {
49 |     Tensor  col;
50 |     Tensor  nrm;
51 |     Tensor  wo;
52 |     Tensor  wi;
53 |     Tensor  alpha;
54 |     Tensor  out;
55 |     dim3    gridSize;
56 |     float   min_roughness;
57 | };
58 | 
59 | struct PbrBSDF
60 | {
61 |     Tensor  kd;
62 |     Tensor  arm;
63 |     Tensor  pos;
64 |     Tensor  nrm;
65 |     Tensor  view_pos;
66 |     Tensor  light_pos;
67 |     Tensor  out;
68 |     dim3    gridSize;
69 |     float   min_roughness;
70 | };
71 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/common.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #include <cuda_runtime.h>
10 | #include <algorithm>
11 | 
12 | //------------------------------------------------------------------------
13 | // Block and grid size calculators for kernel launches.
14 | 
15 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims)
16 | {
17 |     int maxThreads = maxWidth * maxHeight;
18 |     if (maxThreads <= 1 || (dims.x * dims.y) <= 1)
19 |         return dim3(1, 1, 1); // Degenerate.
20 | 
21 |     // Start from max size.
22 |     int bw = maxWidth;
23 |     int bh = maxHeight;
24 | 
25 |     // Optimizations for weirdly sized buffers.
26 |     if (dims.x < bw)
27 |     {
28 |         // Decrease block width to smallest power of two that covers the buffer width.
29 |         while ((bw >> 1) >= dims.x)
30 |             bw >>= 1;
31 | 
32 |         // Maximize height.
33 |         bh = maxThreads / bw;
34 |         if (bh > dims.y)
35 |             bh = dims.y;
36 |     }
37 |     else if (dims.y < bh)
38 |     {
39 |         // Halve height and double width until fits completely inside buffer vertically.
40 |         while (bh > dims.y)
41 |         {
42 |             bh >>= 1;
43 |             if (bw < dims.x)
44 |                 bw <<= 1;
45 |         }
46 |     }
47 | 
48 |     // Done.
49 |     return dim3(bw, bh, 1);
50 | }
51 | 
52 | // returns the size of a block that can be reduced using horizontal SIMD operations (e.g. __shfl_xor_sync)
53 | dim3 getWarpSize(dim3 blockSize)
54 | {
55 |     return dim3(
56 |         std::min(blockSize.x, 32u), 
57 |         std::min(std::max(32u / blockSize.x, 1u), std::min(32u, blockSize.y)), 
58 |         std::min(std::max(32u / (blockSize.x * blockSize.y), 1u), std::min(32u, blockSize.z))
59 |     );
60 | }
61 | 
62 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims)
63 | {
64 |     dim3 gridSize;
65 |     gridSize.x = (dims.x  - 1) / blockSize.x + 1;
66 |     gridSize.y = (dims.y - 1) / blockSize.y + 1;
67 |     gridSize.z = (dims.z  - 1) / blockSize.z + 1;
68 |     return gridSize;
69 | }
70 | 
71 | //------------------------------------------------------------------------
72 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/common.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | #include <cuda.h>
11 | #include <stdint.h>
12 | 
13 | #include "vec3f.h"
14 | #include "vec4f.h"
15 | #include "tensor.h"
16 | 
17 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims);
18 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims);
19 | 
20 | #ifdef __CUDACC__
21 | 
22 | #ifdef _MSC_VER
23 | #define M_PI 3.14159265358979323846f
24 | #endif
25 | 
26 | __host__ __device__ static inline dim3 getWarpSize(dim3 blockSize)
27 | {
28 |     return dim3(
29 |         min(blockSize.x, 32u),
30 |         min(max(32u / blockSize.x, 1u), min(32u, blockSize.y)),
31 |         min(max(32u / (blockSize.x * blockSize.y), 1u), min(32u, blockSize.z))
32 |     );
33 | }
34 | 
35 | __device__ static inline float clamp(float val, float mn, float mx) { return min(max(val, mn), mx); }
36 | #else
37 | dim3 getWarpSize(dim3 blockSize);
38 | #endif


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/loss.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | //
  3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | // and proprietary rights in and to this software, related documentation
  5 | // and any modifications thereto. Any use, reproduction, disclosure or
  6 | // distribution of this software and related documentation without an express
  7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | #include <cuda.h>
 10 | 
 11 | #include "common.h"
 12 | #include "loss.h"
 13 | 
 14 | //------------------------------------------------------------------------
 15 | // Utils
 16 | 
 17 | __device__ inline float bwdAbs(float x) { return x == 0.0f ? 0.0f : x < 0.0f ? -1.0f : 1.0f; }
 18 | 
 19 | __device__ float warpSum(float val) {
 20 |     for (int i = 1; i < 32; i *= 2)
 21 |         val += __shfl_xor_sync(0xFFFFFFFF, val, i);
 22 |     return val;
 23 | }
 24 | 
 25 | //------------------------------------------------------------------------
 26 | // Tonemapping
 27 | 
 28 | __device__ inline float fwdSRGB(float x)
 29 | {
 30 |     return x > 0.0031308f ? powf(max(x, 0.0031308f), 1.0f / 2.4f) * 1.055f - 0.055f : 12.92f * max(x, 0.0f);
 31 | }
 32 | 
 33 | __device__ inline void bwdSRGB(float x, float &d_x, float d_out)
 34 | {
 35 |     if (x > 0.0031308f)
 36 |         d_x += d_out * 0.439583f / powf(x, 0.583333f);
 37 |     else if (x > 0.0f)
 38 |         d_x += d_out * 12.92f;
 39 | }
 40 | 
 41 | __device__ inline vec3f fwdTonemapLogSRGB(vec3f x)
 42 | {
 43 |     return vec3f(fwdSRGB(logf(x.x + 1.0f)), fwdSRGB(logf(x.y + 1.0f)), fwdSRGB(logf(x.z + 1.0f)));
 44 | }
 45 | 
 46 | __device__ inline void bwdTonemapLogSRGB(vec3f x, vec3f& d_x, vec3f d_out)
 47 | {
 48 |     if (x.x > 0.0f && x.x < 65535.0f)
 49 |     {
 50 |         bwdSRGB(logf(x.x + 1.0f), d_x.x, d_out.x);
 51 |         d_x.x *= 1 / (x.x + 1.0f);
 52 |     }
 53 |     if (x.y > 0.0f && x.y < 65535.0f)
 54 |     {
 55 |         bwdSRGB(logf(x.y + 1.0f), d_x.y, d_out.y);
 56 |         d_x.y *= 1 / (x.y + 1.0f);
 57 |     }
 58 |     if (x.z > 0.0f && x.z < 65535.0f)
 59 |     {
 60 |         bwdSRGB(logf(x.z + 1.0f), d_x.z, d_out.z);
 61 |         d_x.z *= 1 / (x.z + 1.0f);
 62 |     }
 63 | }
 64 | 
 65 | __device__ inline float fwdRELMSE(float img, float target, float eps = 0.1f)
 66 | {
 67 |     return (img - target) * (img - target) / (img * img + target * target + eps);
 68 | }
 69 | 
 70 | __device__ inline void bwdRELMSE(float img, float target, float &d_img, float &d_target, float d_out, float eps = 0.1f)
 71 | {
 72 |     float denom  = (target * target + img * img + eps);
 73 |     d_img    += d_out * 2 * (img - target) * (target * (target + img) + eps) / (denom * denom);
 74 |     d_target -= d_out * 2 * (img - target) * (img * (target + img) + eps) / (denom * denom);
 75 | }
 76 | 
 77 | __device__ inline float fwdSMAPE(float img, float target, float eps=0.01f)
 78 | {
 79 |     return abs(img - target) / (img + target + eps);
 80 | }
 81 | 
 82 | __device__ inline void bwdSMAPE(float img, float target, float& d_img, float& d_target, float d_out, float eps = 0.01f)
 83 | {
 84 |     float denom = (target + img + eps);
 85 |     d_img    += d_out * bwdAbs(img - target) * (2 * target + eps) / (denom * denom);
 86 |     d_target -= d_out * bwdAbs(img - target) * (2 * img + eps) / (denom * denom);
 87 | }
 88 | 
 89 | //------------------------------------------------------------------------
 90 | // Kernels
 91 | 
 92 | __global__ void imgLossFwdKernel(LossKernelParams p)
 93 | {
 94 |     // Calculate pixel position.
 95 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
 96 |     unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
 97 |     unsigned int pz = blockIdx.z;
 98 | 
 99 |     float floss = 0.0f;
100 |     if (px < p.gridSize.x && py < p.gridSize.y && pz < p.gridSize.z)
101 |     {
102 |         vec3f img = p.img.fetch3(px, py, pz);
103 |         vec3f target = p.target.fetch3(px, py, pz);
104 | 
105 |         img = vec3f(clamp(img.x, 0.0f, 65535.0f), clamp(img.y, 0.0f, 65535.0f), clamp(img.z, 0.0f, 65535.0f));
106 |         target = vec3f(clamp(target.x, 0.0f, 65535.0f), clamp(target.y, 0.0f, 65535.0f), clamp(target.z, 0.0f, 65535.0f));
107 | 
108 |         if (p.tonemapper == TONEMAPPER_LOG_SRGB)
109 |         {
110 |             img = fwdTonemapLogSRGB(img);
111 |             target = fwdTonemapLogSRGB(target);
112 |         }
113 | 
114 |         vec3f vloss(0);
115 |         if (p.loss == LOSS_MSE)
116 |             vloss = (img - target) * (img - target);
117 |         else if (p.loss == LOSS_RELMSE)
118 |             vloss = vec3f(fwdRELMSE(img.x, target.x), fwdRELMSE(img.y, target.y), fwdRELMSE(img.z, target.z));
119 |         else if (p.loss == LOSS_SMAPE)
120 |             vloss = vec3f(fwdSMAPE(img.x, target.x), fwdSMAPE(img.y, target.y), fwdSMAPE(img.z, target.z));
121 |         else
122 |             vloss = vec3f(abs(img.x - target.x), abs(img.y - target.y), abs(img.z - target.z));
123 |         
124 |         floss = sum(vloss) / 3.0f;
125 |     }
126 | 
127 |     floss = warpSum(floss);
128 | 
129 |     dim3 warpSize = getWarpSize(blockDim);
130 |     if (px < p.gridSize.x && py < p.gridSize.y && pz < p.gridSize.z && threadIdx.x % warpSize.x == 0 && threadIdx.y % warpSize.y == 0 && threadIdx.z % warpSize.z == 0)
131 |         p.out.store(px / warpSize.x, py / warpSize.y, pz / warpSize.z, floss);
132 | }
133 | 
134 | __global__ void imgLossBwdKernel(LossKernelParams p)
135 | { 
136 |     // Calculate pixel position.
137 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
138 |     unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
139 |     unsigned int pz = blockIdx.z;
140 | 
141 |     if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
142 |         return;
143 | 
144 |     dim3 warpSize = getWarpSize(blockDim);
145 | 
146 |     vec3f _img = p.img.fetch3(px, py, pz);
147 |     vec3f _target = p.target.fetch3(px, py, pz);
148 |     float d_out = p.out.fetch1(px / warpSize.x, py / warpSize.y, pz / warpSize.z);
149 | 
150 |     /////////////////////////////////////////////////////////////////////
151 |     // FWD
152 | 
153 |     vec3f img = _img, target = _target;
154 |     if (p.tonemapper == TONEMAPPER_LOG_SRGB)
155 |     {
156 |         img = fwdTonemapLogSRGB(img);
157 |         target = fwdTonemapLogSRGB(target);
158 |     }
159 | 
160 |     /////////////////////////////////////////////////////////////////////
161 |     // BWD
162 | 
163 |     vec3f d_vloss = vec3f(d_out, d_out, d_out) / 3.0f;
164 | 
165 |     vec3f d_img(0), d_target(0);
166 |     if (p.loss == LOSS_MSE)
167 |     {
168 |         d_img = vec3f(d_vloss.x * 2 * (img.x - target.x), d_vloss.y * 2 * (img.y - target.y), d_vloss.x * 2 * (img.z - target.z));
169 |         d_target = -d_img;
170 |     }
171 |     else if (p.loss == LOSS_RELMSE)
172 |     {
173 |         bwdRELMSE(img.x, target.x, d_img.x, d_target.x, d_vloss.x);
174 |         bwdRELMSE(img.y, target.y, d_img.y, d_target.y, d_vloss.y);
175 |         bwdRELMSE(img.z, target.z, d_img.z, d_target.z, d_vloss.z);
176 |     }
177 |     else if (p.loss == LOSS_SMAPE)
178 |     {
179 |         bwdSMAPE(img.x, target.x, d_img.x, d_target.x, d_vloss.x);
180 |         bwdSMAPE(img.y, target.y, d_img.y, d_target.y, d_vloss.y);
181 |         bwdSMAPE(img.z, target.z, d_img.z, d_target.z, d_vloss.z);
182 |     }
183 |     else
184 |     {
185 |         d_img = d_vloss * vec3f(bwdAbs(img.x - target.x), bwdAbs(img.y - target.y), bwdAbs(img.z - target.z));
186 |         d_target = -d_img;
187 |     }
188 | 
189 | 
190 |     if (p.tonemapper == TONEMAPPER_LOG_SRGB)
191 |     {
192 |         vec3f d__img(0), d__target(0);
193 |         bwdTonemapLogSRGB(_img, d__img, d_img);
194 |         bwdTonemapLogSRGB(_target, d__target, d_target);
195 |         d_img = d__img; d_target = d__target;
196 |     }
197 | 
198 |     if (_img.x <= 0.0f || _img.x >= 65535.0f) d_img.x = 0;
199 |     if (_img.y <= 0.0f || _img.y >= 65535.0f) d_img.y = 0;
200 |     if (_img.z <= 0.0f || _img.z >= 65535.0f) d_img.z = 0;
201 |     if (_target.x <= 0.0f || _target.x >= 65535.0f) d_target.x = 0;
202 |     if (_target.y <= 0.0f || _target.y >= 65535.0f) d_target.y = 0;
203 |     if (_target.z <= 0.0f || _target.z >= 65535.0f) d_target.z = 0;
204 | 
205 |     p.img.store_grad(px, py, pz, d_img);
206 |     p.target.store_grad(px, py, pz, d_target);
207 | }


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/loss.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | 
11 | #include "common.h"
12 | 
13 | enum TonemapperType
14 | {
15 |     TONEMAPPER_NONE = 0,
16 |     TONEMAPPER_LOG_SRGB = 1
17 | };
18 | 
19 | enum LossType
20 | {
21 |     LOSS_L1 = 0,
22 |     LOSS_MSE = 1,
23 |     LOSS_RELMSE = 2,
24 |     LOSS_SMAPE = 3
25 | };
26 | 
27 | struct LossKernelParams
28 | {
29 |     Tensor          img;
30 |     Tensor          target;
31 |     Tensor          out;
32 |     dim3            gridSize;
33 |     TonemapperType  tonemapper;
34 |     LossType        loss;
35 | };
36 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/mesh.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #include <cuda.h>
10 | 
11 | #include "common.h"
12 | #include "mesh.h"
13 | 
14 | 
15 | //------------------------------------------------------------------------
16 | // Kernels
17 | 
18 | __global__ void xfmPointsFwdKernel(XfmKernelParams p)
19 | {
20 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
21 |     unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z;
22 | 
23 |     __shared__ float mtx[4][4];
24 |     if (threadIdx.x < 16)
25 |         mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0));
26 |     __syncthreads();
27 |     
28 |     if (px >= p.gridSize.x)
29 |         return;
30 | 
31 |     vec3f pos(
32 |         p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)),
33 |         p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)),
34 |         p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0))
35 |     );
36 | 
37 |     if (p.isPoints)
38 |     {
39 |         p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0] + mtx[3][0]);
40 |         p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1] + mtx[3][1]);
41 |         p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2] + mtx[3][2]);
42 |         p.out.store(p.out.nhwcIndex(pz, px, 3, 0), pos.x * mtx[0][3] + pos.y * mtx[1][3] + pos.z * mtx[2][3] + mtx[3][3]);
43 |     }
44 |     else
45 |     {
46 |         p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0]);
47 |         p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1]);
48 |         p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2]);
49 |     }
50 | }
51 | 
52 | __global__ void xfmPointsBwdKernel(XfmKernelParams p)
53 | { 
54 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
55 |     unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z;
56 | 
57 |     __shared__ float mtx[4][4];
58 |     if (threadIdx.x < 16)
59 |         mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0));
60 |     __syncthreads();
61 | 
62 |     if (px >= p.gridSize.x)
63 |         return;
64 | 
65 |     vec3f pos(
66 |         p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)),
67 |         p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)),
68 |         p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0))
69 |     );
70 | 
71 |     vec4f d_out(
72 |         p.out.fetch(p.out.nhwcIndex(pz, px, 0, 0)),
73 |         p.out.fetch(p.out.nhwcIndex(pz, px, 1, 0)),
74 |         p.out.fetch(p.out.nhwcIndex(pz, px, 2, 0)),
75 |         p.out.fetch(p.out.nhwcIndex(pz, px, 3, 0))
76 |     );
77 | 
78 |     if (p.isPoints)
79 |     {
80 |         p.points.store_grad(p.points._nhwcIndex(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2] + d_out.w * mtx[0][3]);
81 |         p.points.store_grad(p.points._nhwcIndex(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2] + d_out.w * mtx[1][3]);
82 |         p.points.store_grad(p.points._nhwcIndex(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2] + d_out.w * mtx[2][3]);
83 |     }
84 |     else
85 |     {
86 |         p.points.store_grad(p.points._nhwcIndex(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2]);
87 |         p.points.store_grad(p.points._nhwcIndex(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2]);
88 |         p.points.store_grad(p.points._nhwcIndex(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2]);
89 |     }
90 | }


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/mesh.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | 
11 | #include "common.h"
12 | 
13 | struct XfmKernelParams
14 | {
15 |     bool            isPoints;
16 |     Tensor          points;
17 |     Tensor          matrix;
18 |     Tensor          out;
19 |     dim3            gridSize;
20 | };
21 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/normal.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | //
  3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | // and proprietary rights in and to this software, related documentation
  5 | // and any modifications thereto. Any use, reproduction, disclosure or
  6 | // distribution of this software and related documentation without an express
  7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | #include "common.h"
 10 | #include "normal.h"
 11 | 
 12 | #define NORMAL_THRESHOLD 0.1f
 13 | 
 14 | //------------------------------------------------------------------------
 15 | // Perturb shading normal by tangent frame
 16 | 
 17 | __device__ vec3f fwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, bool opengl)
 18 | {
 19 |     vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm);
 20 |     vec3f smooth_bitng = safeNormalize(_smooth_bitng);
 21 |     vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f);
 22 |     return safeNormalize(_shading_nrm);
 23 | }
 24 | 
 25 | __device__ void bwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, vec3f &d_perturbed_nrm, vec3f &d_smooth_nrm, vec3f &d_smooth_tng, const vec3f d_out, bool opengl)
 26 | {
 27 |     ////////////////////////////////////////////////////////////////////////
 28 |     // FWD
 29 |     vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm);
 30 |     vec3f smooth_bitng = safeNormalize(_smooth_bitng);
 31 |     vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f);
 32 |         
 33 |     ////////////////////////////////////////////////////////////////////////
 34 |     // BWD
 35 |     vec3f d_shading_nrm(0);
 36 |     bwdSafeNormalize(_shading_nrm, d_shading_nrm, d_out);
 37 | 
 38 |     vec3f d_smooth_bitng(0);
 39 |     
 40 |     if (perturbed_nrm.z > 0.0f)
 41 |     {
 42 |         d_smooth_nrm += d_shading_nrm * perturbed_nrm.z;
 43 |         d_perturbed_nrm.z += sum(d_shading_nrm * smooth_nrm);
 44 |     }
 45 | 
 46 |     d_smooth_bitng += (opengl ? -1 : 1) * d_shading_nrm * perturbed_nrm.y;
 47 |     d_perturbed_nrm.y += (opengl ? -1 : 1) * sum(d_shading_nrm * smooth_bitng);
 48 | 
 49 |     d_smooth_tng += d_shading_nrm * perturbed_nrm.x;
 50 |     d_perturbed_nrm.x += sum(d_shading_nrm * smooth_tng);
 51 | 
 52 |     vec3f d__smooth_bitng(0);
 53 |     bwdSafeNormalize(_smooth_bitng, d__smooth_bitng, d_smooth_bitng);
 54 | 
 55 |     bwdCross(smooth_tng, smooth_nrm, d_smooth_tng, d_smooth_nrm, d__smooth_bitng);
 56 | }
 57 | 
 58 | //------------------------------------------------------------------------
 59 | #define bent_nrm_eps 0.001f
 60 | 
 61 | __device__ vec3f fwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm)
 62 | {
 63 |     float dp = dot(view_vec, smooth_nrm);
 64 |     float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f);
 65 |     return geom_nrm * (1.0f - t) + smooth_nrm * t;
 66 | }
 67 | 
 68 | __device__ void bwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm, vec3f& d_view_vec, vec3f& d_smooth_nrm, vec3f& d_geom_nrm, const vec3f d_out)
 69 | {
 70 |     ////////////////////////////////////////////////////////////////////////
 71 |     // FWD
 72 |     float dp = dot(view_vec, smooth_nrm);
 73 |     float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f);
 74 | 
 75 |     ////////////////////////////////////////////////////////////////////////
 76 |     // BWD
 77 |     if (dp > NORMAL_THRESHOLD)
 78 |         d_smooth_nrm += d_out;
 79 |     else
 80 |     {
 81 |         // geom_nrm * (1.0f - t) + smooth_nrm * t;
 82 |         d_geom_nrm   += d_out * (1.0f - t);
 83 |         d_smooth_nrm += d_out * t;
 84 |         float d_t = sum(d_out * (smooth_nrm - geom_nrm));
 85 | 
 86 |         float d_dp = dp < 0.0f || dp > NORMAL_THRESHOLD ? 0.0f : d_t / NORMAL_THRESHOLD;
 87 | 
 88 |         bwdDot(view_vec, smooth_nrm, d_view_vec, d_smooth_nrm, d_dp);
 89 |     }
 90 | }
 91 | 
 92 | //------------------------------------------------------------------------
 93 | // Kernels
 94 | 
 95 | __global__ void PrepareShadingNormalFwdKernel(PrepareShadingNormalKernelParams p) 
 96 | {
 97 |     // Calculate pixel position.
 98 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
 99 |     unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
100 |     unsigned int pz = blockIdx.z;
101 |     if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
102 |         return;
103 | 
104 |     vec3f pos = p.pos.fetch3(px, py, pz);
105 |     vec3f view_pos = p.view_pos.fetch3(px, py, pz);
106 |     vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz);
107 |     vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz);
108 |     vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz);
109 |     vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz);
110 | 
111 |     vec3f smooth_nrm = safeNormalize(_smooth_nrm);
112 |     vec3f smooth_tng = safeNormalize(_smooth_tng);
113 |     vec3f view_vec = safeNormalize(view_pos - pos);
114 |     vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl);
115 | 
116 |     vec3f res;
117 |     if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f)
118 |         res = fwdBendNormal(view_vec, -shading_nrm, -geom_nrm);
119 |     else
120 |         res = fwdBendNormal(view_vec, shading_nrm, geom_nrm);
121 | 
122 |     p.out.store(px, py, pz, res);
123 | }
124 | 
125 | __global__ void PrepareShadingNormalBwdKernel(PrepareShadingNormalKernelParams p) 
126 | { 
127 |     // Calculate pixel position.
128 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
129 |     unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
130 |     unsigned int pz = blockIdx.z;
131 |     if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
132 |         return;
133 | 
134 |     vec3f pos = p.pos.fetch3(px, py, pz);
135 |     vec3f view_pos = p.view_pos.fetch3(px, py, pz);
136 |     vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz);
137 |     vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz);
138 |     vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz);
139 |     vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz);
140 |     vec3f d_out = p.out.fetch3(px, py, pz);
141 | 
142 |     ///////////////////////////////////////////////////////////////////////////////////////////////////
143 |     // FWD
144 | 
145 |     vec3f smooth_nrm = safeNormalize(_smooth_nrm);
146 |     vec3f smooth_tng = safeNormalize(_smooth_tng);
147 |     vec3f _view_vec = view_pos - pos;
148 |     vec3f view_vec = safeNormalize(view_pos - pos);
149 | 
150 |     vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl);
151 | 
152 |     ///////////////////////////////////////////////////////////////////////////////////////////////////
153 |     // BWD
154 | 
155 |     vec3f d_view_vec(0), d_shading_nrm(0), d_geom_nrm(0);
156 |     if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f)
157 |     {
158 |         bwdBendNormal(view_vec, -shading_nrm, -geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out);
159 |         d_shading_nrm = -d_shading_nrm;
160 |         d_geom_nrm = -d_geom_nrm;
161 |     }
162 |     else
163 |         bwdBendNormal(view_vec, shading_nrm, geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out);
164 | 
165 |     vec3f d_perturbed_nrm(0), d_smooth_nrm(0), d_smooth_tng(0);
166 |     bwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, d_perturbed_nrm, d_smooth_nrm, d_smooth_tng, d_shading_nrm, p.opengl);
167 | 
168 |     vec3f d__view_vec(0), d__smooth_nrm(0), d__smooth_tng(0);
169 |     bwdSafeNormalize(_view_vec, d__view_vec, d_view_vec);
170 |     bwdSafeNormalize(_smooth_nrm, d__smooth_nrm, d_smooth_nrm);
171 |     bwdSafeNormalize(_smooth_tng, d__smooth_tng, d_smooth_tng);
172 | 
173 |     p.pos.store_grad(px, py, pz, -d__view_vec);
174 |     p.view_pos.store_grad(px, py, pz, d__view_vec);
175 |     p.perturbed_nrm.store_grad(px, py, pz, d_perturbed_nrm);
176 |     p.smooth_nrm.store_grad(px, py, pz, d__smooth_nrm);
177 |     p.smooth_tng.store_grad(px, py, pz, d__smooth_tng);
178 |     p.geom_nrm.store_grad(px, py, pz, d_geom_nrm);
179 | }


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/normal.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | 
11 | #include "common.h"
12 | 
13 | struct PrepareShadingNormalKernelParams
14 | {
15 |     Tensor  pos;
16 |     Tensor  view_pos;
17 |     Tensor  perturbed_nrm;
18 |     Tensor  smooth_nrm;
19 |     Tensor  smooth_tng;
20 |     Tensor  geom_nrm;
21 |     Tensor  out;
22 |     dim3    gridSize;
23 |     bool    two_sided_shading, opengl;
24 | };
25 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/tensor.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | #if defined(__CUDACC__) && defined(BFLOAT16)
11 | #include <cuda_bf16.h> // bfloat16 is float32 compatible with less mantissa bits
12 | #endif
13 | 
14 | //---------------------------------------------------------------------------------
15 | // CUDA-side Tensor class for in/out parameter parsing. Can be float32 or bfloat16
16 | 
17 | struct Tensor
18 | {
19 |     void*   val;
20 |     void*   d_val;
21 |     int     dims[4];
22 |     int     strides[4];
23 |     bool    fp16;
24 |     Tensor() : val(nullptr), d_val(nullptr), fp16(true), dims{ 0, 0, 0, 0 }, strides{ 0, 0, 0, 0 } {}
25 | 
26 | #ifdef __CUDACC__
27 |     // Helpers to index and read/write a single element
28 |     __device__ inline int   _nhwcIndex(int n, int h, int w, int c) const { return n * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]; }
29 |     __device__ inline int   nhwcIndex(int n, int h, int w, int c) const { return (dims[0] == 1 ? 0 : n * strides[0]) + (dims[1] == 1 ? 0 : h * strides[1]) + (dims[2] == 1 ? 0 : w * strides[2]) + (dims[3] == 1 ? 0 : c * strides[3]); }
30 |     __device__ inline int   nhwcIndexContinuous(int n, int h, int w, int c) const { return ((n * dims[1] + h) * dims[2] + w) * dims[3] + c; }
31 | #ifdef BFLOAT16
32 |     __device__ inline float fetch(unsigned int idx) const { return fp16 ? __bfloat162float(((__nv_bfloat16*)val)[idx]) : ((float*)val)[idx]; }
33 |     __device__ inline void  store(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)val)[idx] = __float2bfloat16(_val); else ((float*)val)[idx] = _val; }
34 |     __device__ inline void  store_grad(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)d_val)[idx] = __float2bfloat16(_val); else ((float*)d_val)[idx] = _val; }
35 | #else
36 |     __device__ inline float fetch(unsigned int idx) const { return ((float*)val)[idx]; }
37 |     __device__ inline void  store(unsigned int idx, float _val) { ((float*)val)[idx] = _val; }
38 |     __device__ inline void  store_grad(unsigned int idx, float _val) { ((float*)d_val)[idx] = _val; }
39 | #endif
40 | 
41 |     //////////////////////////////////////////////////////////////////////////////////////////
42 |     // Fetch, use broadcasting for tensor dimensions of size 1
43 |     __device__ inline float fetch1(unsigned int x, unsigned int y, unsigned int z) const
44 |     {
45 |         return fetch(nhwcIndex(z, y, x, 0));
46 |     }
47 | 
48 |     __device__ inline vec3f fetch3(unsigned int x, unsigned int y, unsigned int z) const
49 |     {
50 |         return vec3f(
51 |             fetch(nhwcIndex(z, y, x, 0)),
52 |             fetch(nhwcIndex(z, y, x, 1)),
53 |             fetch(nhwcIndex(z, y, x, 2))
54 |         );
55 |     }
56 | 
57 |     /////////////////////////////////////////////////////////////////////////////////////////////////////////////
58 |     // Store, no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside
59 |     __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, float _val)
60 |     {
61 |         store(_nhwcIndex(z, y, x, 0), _val);
62 |     }
63 | 
64 |     __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, vec3f _val)
65 |     {
66 |         store(_nhwcIndex(z, y, x, 0), _val.x);
67 |         store(_nhwcIndex(z, y, x, 1), _val.y);
68 |         store(_nhwcIndex(z, y, x, 2), _val.z);
69 |     }
70 | 
71 |     /////////////////////////////////////////////////////////////////////////////////////////////////////////////
72 |     // Store gradient , no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside
73 |     __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, float _val)
74 |     {
75 |         store_grad(nhwcIndexContinuous(z, y, x, 0), _val);
76 |     }
77 | 
78 |     __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, vec3f _val)
79 |     {
80 |         store_grad(nhwcIndexContinuous(z, y, x, 0), _val.x);
81 |         store_grad(nhwcIndexContinuous(z, y, x, 1), _val.y);
82 |         store_grad(nhwcIndexContinuous(z, y, x, 2), _val.z);
83 |     }
84 | #endif
85 | 
86 | };
87 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/vec3f.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | //
  3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | // and proprietary rights in and to this software, related documentation
  5 | // and any modifications thereto. Any use, reproduction, disclosure or
  6 | // distribution of this software and related documentation without an express
  7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | #pragma once 
 10 | 
 11 | struct vec3f
 12 | {
 13 |     float x, y, z;
 14 | 
 15 | #ifdef __CUDACC__
 16 |     __device__ vec3f() { }
 17 |     __device__ vec3f(float v) { x = v; y = v; z = v; }
 18 |     __device__ vec3f(float _x, float _y, float _z) { x = _x; y = _y; z = _z; }
 19 |     __device__ vec3f(float3 v) { x = v.x; y = v.y; z = v.z; }
 20 | 
 21 |     __device__ inline vec3f& operator+=(const vec3f& b) { x += b.x; y += b.y; z += b.z; return *this; }
 22 |     __device__ inline vec3f& operator-=(const vec3f& b) { x -= b.x; y -= b.y; z -= b.z; return *this; }
 23 |     __device__ inline vec3f& operator*=(const vec3f& b) { x *= b.x; y *= b.y; z *= b.z; return *this; }
 24 |     __device__ inline vec3f& operator/=(const vec3f& b) { x /= b.x; y /= b.y; z /= b.z; return *this; }
 25 | #endif
 26 | };
 27 | 
 28 | #ifdef __CUDACC__
 29 | __device__ static inline vec3f operator+(const vec3f& a, const vec3f& b) { return vec3f(a.x + b.x, a.y + b.y, a.z + b.z); }
 30 | __device__ static inline vec3f operator-(const vec3f& a, const vec3f& b) { return vec3f(a.x - b.x, a.y - b.y, a.z - b.z); }
 31 | __device__ static inline vec3f operator*(const vec3f& a, const vec3f& b) { return vec3f(a.x * b.x, a.y * b.y, a.z * b.z); }
 32 | __device__ static inline vec3f operator/(const vec3f& a, const vec3f& b) { return vec3f(a.x / b.x, a.y / b.y, a.z / b.z); }
 33 | __device__ static inline vec3f operator-(const vec3f& a) { return vec3f(-a.x, -a.y, -a.z); }
 34 | 
 35 | __device__ static inline float sum(vec3f a)
 36 | {
 37 |     return a.x + a.y + a.z;
 38 | }
 39 | 
 40 | __device__ static inline vec3f cross(vec3f a, vec3f b)
 41 | {
 42 |     vec3f out;
 43 |     out.x = a.y * b.z - a.z * b.y;
 44 |     out.y = a.z * b.x - a.x * b.z;
 45 |     out.z = a.x * b.y - a.y * b.x;
 46 |     return out;
 47 | }
 48 | 
 49 | __device__ static inline void bwdCross(vec3f a, vec3f b, vec3f &d_a, vec3f &d_b, vec3f d_out)
 50 | {
 51 |     d_a.x += d_out.z * b.y - d_out.y * b.z;
 52 |     d_a.y += d_out.x * b.z - d_out.z * b.x;
 53 |     d_a.z += d_out.y * b.x - d_out.x * b.y;
 54 | 
 55 |     d_b.x += d_out.y * a.z - d_out.z * a.y;
 56 |     d_b.y += d_out.z * a.x - d_out.x * a.z;
 57 |     d_b.z += d_out.x * a.y - d_out.y * a.x;
 58 | }
 59 | 
 60 | __device__ static inline float dot(vec3f a, vec3f b)
 61 | {
 62 |     return a.x * b.x + a.y * b.y + a.z * b.z;
 63 | }
 64 | 
 65 | __device__ static inline void bwdDot(vec3f a, vec3f b, vec3f& d_a, vec3f& d_b, float d_out)
 66 | {
 67 |     d_a.x += d_out * b.x; d_a.y += d_out * b.y; d_a.z += d_out * b.z;
 68 |     d_b.x += d_out * a.x; d_b.y += d_out * a.y; d_b.z += d_out * a.z;
 69 | }
 70 | 
 71 | __device__ static inline vec3f reflect(vec3f x, vec3f n)
 72 | {
 73 |     return n * 2.0f * dot(n, x) - x;
 74 | }
 75 | 
 76 | __device__ static inline void bwdReflect(vec3f x, vec3f n, vec3f& d_x, vec3f& d_n, const vec3f d_out)
 77 | {
 78 |     d_x.x += d_out.x * (2 * n.x * n.x - 1) + d_out.y * (2 * n.x * n.y) + d_out.z * (2 * n.x * n.z);
 79 |     d_x.y += d_out.x * (2 * n.x * n.y) + d_out.y * (2 * n.y * n.y - 1) + d_out.z * (2 * n.y * n.z);
 80 |     d_x.z += d_out.x * (2 * n.x * n.z) + d_out.y * (2 * n.y * n.z) + d_out.z * (2 * n.z * n.z - 1);
 81 | 
 82 |     d_n.x += d_out.x * (2 * (2 * n.x * x.x + n.y * x.y + n.z * x.z)) + d_out.y * (2 * n.y * x.x) + d_out.z * (2 * n.z * x.x);
 83 |     d_n.y += d_out.x * (2 * n.x * x.y) + d_out.y * (2 * (n.x * x.x + 2 * n.y * x.y + n.z * x.z)) + d_out.z * (2 * n.z * x.y);
 84 |     d_n.z += d_out.x * (2 * n.x * x.z) + d_out.y * (2 * n.y * x.z) + d_out.z * (2 * (n.x * x.x + n.y * x.y + 2 * n.z * x.z));
 85 | }
 86 | 
 87 | __device__ static inline vec3f safeNormalize(vec3f v)
 88 | {
 89 |     float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
 90 |     return l > 0.0f ? (v / l) : vec3f(0.0f);
 91 | }
 92 | 
 93 | __device__ static inline void bwdSafeNormalize(const vec3f v, vec3f& d_v, const vec3f d_out)
 94 | {
 95 | 
 96 |     float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
 97 |     if (l > 0.0f)
 98 |     {
 99 |         float fac = 1.0 / powf(v.x * v.x + v.y * v.y + v.z * v.z, 1.5f);
100 |         d_v.x += (d_out.x * (v.y * v.y + v.z * v.z) - d_out.y * (v.x * v.y) - d_out.z * (v.x * v.z)) * fac;
101 |         d_v.y += (d_out.y * (v.x * v.x + v.z * v.z) - d_out.x * (v.y * v.x) - d_out.z * (v.y * v.z)) * fac;
102 |         d_v.z += (d_out.z * (v.x * v.x + v.y * v.y) - d_out.x * (v.z * v.x) - d_out.y * (v.z * v.y)) * fac;
103 |     }
104 | }
105 | 
106 | #endif


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/c_src/vec4f.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once 
10 | 
11 | struct vec4f
12 | {
13 |     float x, y, z, w;
14 | 
15 | #ifdef __CUDACC__
16 |     __device__ vec4f() { }
17 |     __device__ vec4f(float v) { x = v; y = v; z = v; w = v; }
18 |     __device__ vec4f(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; }
19 |     __device__ vec4f(float4 v) { x = v.x; y = v.y; z = v.z; w = v.w; }
20 | #endif
21 | };
22 | 
23 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto. Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import torch
10 | 
11 | #----------------------------------------------------------------------------
12 | # HDR image losses
13 | #----------------------------------------------------------------------------
14 | 
15 | def _tonemap_srgb(f):
16 |     return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
17 | 
18 | def _SMAPE(img, target, eps=0.01):
19 |     nom = torch.abs(img - target)
20 |     denom = torch.abs(img) + torch.abs(target) + 0.01
21 |     return torch.mean(nom / denom)
22 | 
23 | def _RELMSE(img, target, eps=0.1):
24 |     nom = (img - target) * (img - target)
25 |     denom = img * img + target * target + 0.1 
26 |     return torch.mean(nom / denom)
27 | 
28 | def image_loss_fn(img, target, loss, tonemapper):
29 |     if tonemapper == 'log_srgb':
30 |         img    = _tonemap_srgb(torch.log(torch.clamp(img, min=0, max=65535) + 1))
31 |         target = _tonemap_srgb(torch.log(torch.clamp(target, min=0, max=65535) + 1))
32 | 
33 |     if loss == 'mse':
34 |         return torch.nn.functional.mse_loss(img, target)
35 |     elif loss == 'smape':
36 |         return _SMAPE(img, target)
37 |     elif loss == 'relmse':
38 |         return _RELMSE(img, target)
39 |     else:
40 |         return torch.nn.functional.l1_loss(img, target)
41 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import numpy as np
 10 | import os
 11 | import sys
 12 | import torch
 13 | import torch.utils.cpp_extension
 14 | 
 15 | from .bsdf import *
 16 | from .loss import *
 17 | 
 18 | #----------------------------------------------------------------------------
 19 | # C++/Cuda plugin compiler/loader.
 20 | 
 21 | _plugin = None
 22 | if _plugin is None:
 23 | 
 24 |     # Make sure we can find the necessary compiler and libary binaries.
 25 |     if os.name == 'nt':
 26 |         def find_cl_path():
 27 |             import glob
 28 |             for edition in ['Enterprise', 'Professional', 'BuildTools', 'Community']:
 29 |                 paths = sorted(glob.glob(r"C:\Program Files (x86)\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition), reverse=True)
 30 |                 if paths:
 31 |                     return paths[0]
 32 | 
 33 |         # If cl.exe is not on path, try to find it.
 34 |         if os.system("where cl.exe >nul 2>nul") != 0:
 35 |             cl_path = find_cl_path()
 36 |             if cl_path is None:
 37 |                 raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
 38 |             os.environ['PATH'] += ';' + cl_path
 39 | 
 40 |     # Linker options.
 41 |     if os.name == 'posix':
 42 |         ldflags = ['-lcuda']
 43 |     elif os.name == 'nt':
 44 |         ldflags = ['/DEFAULTLIB:cuda']
 45 | 
 46 |     # List of sources.
 47 |     source_files = [
 48 |         'c_src/mesh.cu',
 49 |         'c_src/loss.cu',
 50 |         'c_src/bsdf.cu',
 51 |         'c_src/normal.cu',
 52 |         'c_src/common.cpp',
 53 |         'c_src/torch_bindings.cpp'
 54 |     ]
 55 | 
 56 |     # Some containers set this to contain old architectures that won't compile. We only need the one installed in the machine.
 57 |     os.environ['TORCH_CUDA_ARCH_LIST'] = ''
 58 | 
 59 |     # Compile and load.
 60 |     source_paths = [os.path.join(os.path.dirname(__file__), fn) for fn in source_files]
 61 |     torch.utils.cpp_extension.load(name='renderutils_plugin', sources=source_paths, extra_ldflags=ldflags, with_cuda=True, verbose=True)
 62 | 
 63 |     # Import, cache, and return the compiled module.
 64 |     import renderutils_plugin
 65 |     _plugin = renderutils_plugin
 66 | 
 67 | #----------------------------------------------------------------------------
 68 | # Internal kernels, just used for testing functionality
 69 | 
 70 | class _fresnel_shlick_func(torch.autograd.Function):
 71 |     @staticmethod
 72 |     def forward(ctx, f0, f90, cosTheta):
 73 |         out = _plugin.fresnel_shlick_fwd(f0, f90, cosTheta, False)
 74 |         ctx.save_for_backward(f0, f90, cosTheta)
 75 |         return out
 76 | 
 77 |     @staticmethod
 78 |     def backward(ctx, dout):
 79 |         f0, f90, cosTheta = ctx.saved_variables
 80 |         return _plugin.fresnel_shlick_bwd(f0, f90, cosTheta, dout) + (None,)
 81 | 
 82 | def _fresnel_shlick(f0, f90, cosTheta, use_python=False):
 83 |     if use_python:
 84 |         out = bsdf_fresnel_shlick(f0, f90, cosTheta)
 85 |     else:
 86 |         out = _fresnel_shlick_func.apply(f0, f90, cosTheta)
 87 | 
 88 |     if torch.is_anomaly_enabled():
 89 |         assert torch.all(torch.isfinite(out)), "Output of _fresnel_shlick contains inf or NaN"
 90 |     return out
 91 | 
 92 | 
 93 | class _ndf_ggx_func(torch.autograd.Function):
 94 |     @staticmethod
 95 |     def forward(ctx, alphaSqr, cosTheta):
 96 |         out = _plugin.ndf_ggx_fwd(alphaSqr, cosTheta, False)
 97 |         ctx.save_for_backward(alphaSqr, cosTheta)
 98 |         return out
 99 | 
100 |     @staticmethod
101 |     def backward(ctx, dout):
102 |         alphaSqr, cosTheta = ctx.saved_variables
103 |         return _plugin.ndf_ggx_bwd(alphaSqr, cosTheta, dout) + (None,)
104 | 
105 | def _ndf_ggx(alphaSqr, cosTheta, use_python=False):
106 |     if use_python:
107 |         out = bsdf_ndf_ggx(alphaSqr, cosTheta)
108 |     else:
109 |         out = _ndf_ggx_func.apply(alphaSqr, cosTheta)
110 | 
111 |     if torch.is_anomaly_enabled():
112 |         assert torch.all(torch.isfinite(out)), "Output of _ndf_ggx contains inf or NaN"
113 |     return out
114 | 
115 | class _lambda_ggx_func(torch.autograd.Function):
116 |     @staticmethod
117 |     def forward(ctx, alphaSqr, cosTheta):
118 |         out = _plugin.lambda_ggx_fwd(alphaSqr, cosTheta, False)
119 |         ctx.save_for_backward(alphaSqr, cosTheta)
120 |         return out
121 | 
122 |     @staticmethod
123 |     def backward(ctx, dout):
124 |         alphaSqr, cosTheta = ctx.saved_variables
125 |         return _plugin.lambda_ggx_bwd(alphaSqr, cosTheta, dout) + (None,)
126 | 
127 | def _lambda_ggx(alphaSqr, cosTheta, use_python=False):
128 |     if use_python:
129 |         out = bsdf_lambda_ggx(alphaSqr, cosTheta)
130 |     else:
131 |         out = _lambda_ggx_func.apply(alphaSqr, cosTheta)
132 | 
133 |     if torch.is_anomaly_enabled():
134 |         assert torch.all(torch.isfinite(out)), "Output of _lambda_ggx contains inf or NaN"
135 |     return out
136 | 
137 | class _masking_smith_func(torch.autograd.Function):
138 |     @staticmethod
139 |     def forward(ctx, alphaSqr, cosThetaI, cosThetaO):
140 |         ctx.save_for_backward(alphaSqr, cosThetaI, cosThetaO)
141 |         out = _plugin.masking_smith_fwd(alphaSqr, cosThetaI, cosThetaO, False)
142 |         return out
143 | 
144 |     @staticmethod
145 |     def backward(ctx, dout):
146 |         alphaSqr, cosThetaI, cosThetaO = ctx.saved_variables
147 |         return _plugin.masking_smith_bwd(alphaSqr, cosThetaI, cosThetaO, dout) + (None,)
148 | 
149 | def _masking_smith(alphaSqr, cosThetaI, cosThetaO, use_python=False):
150 |     if use_python:
151 |         out = bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO)
152 |     else:
153 |         out = _masking_smith_func.apply(alphaSqr, cosThetaI, cosThetaO)
154 | 
155 |     if torch.is_anomaly_enabled():
156 |         assert torch.all(torch.isfinite(out)), "Output of _masking_smith contains inf or NaN"
157 |     return out
158 | 
159 | #----------------------------------------------------------------------------
160 | # Shading normal setup (bump mapping + bent normals)
161 | 
162 | class _prepare_shading_normal_func(torch.autograd.Function):
163 |     @staticmethod
164 |     def forward(ctx, pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl):
165 |         ctx.two_sided_shading, ctx.opengl = two_sided_shading, opengl
166 |         out = _plugin.prepare_shading_normal_fwd(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl, False)
167 |         ctx.save_for_backward(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm)
168 |         return out
169 | 
170 |     @staticmethod
171 |     def backward(ctx, dout):
172 |         pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm = ctx.saved_variables
173 |         return _plugin.prepare_shading_normal_bwd(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, dout, ctx.two_sided_shading, ctx.opengl) + (None, None, None)
174 | 
175 | def prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading=True, opengl=True, use_python=False):
176 |     '''Takes care of all corner cases and produces a final normal used for shading:
177 |         - Constructs tangent space
178 |         - Flips normal direction based on geometric normal for two sided Shading
179 |         - Perturbs shading normal by normal map
180 |         - Bends backfacing normals towards the camera to avoid shading artifacts
181 | 
182 |         All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent.
183 | 
184 |     Args:
185 |         pos: World space g-buffer position.
186 |         view_pos: Camera position in world space (typically using broadcasting).
187 |         perturbed_nrm: Trangent-space normal perturbation from normal map lookup.
188 |         smooth_nrm: Interpolated vertex normals.
189 |         smooth_tng: Interpolated vertex tangents.
190 |         geom_nrm: Geometric (face) normals.
191 |         two_sided_shading: Use one/two sided shading
192 |         opengl: Use OpenGL/DirectX normal map conventions 
193 |         use_python: Use PyTorch implementation (for validation)
194 |     Returns:
195 |         Final shading normal
196 |     '''    
197 | 
198 |     if perturbed_nrm is None:
199 |         perturbed_nrm = torch.tensor([0, 0, 1], dtype=torch.float32, device='cuda', requires_grad=False)[None, None, None, ...]
200 |     
201 |     if use_python:
202 |         out = bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl)
203 |     else:
204 |         out = _prepare_shading_normal_func.apply(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl)
205 |     
206 |     if torch.is_anomaly_enabled():
207 |         assert torch.all(torch.isfinite(out)), "Output of prepare_shading_normal contains inf or NaN"
208 |     return out
209 | 
210 | #----------------------------------------------------------------------------
211 | # BSDF functions
212 | 
213 | class _lambert_func(torch.autograd.Function):
214 |     @staticmethod
215 |     def forward(ctx, nrm, wi):
216 |         out = _plugin.lambert_fwd(nrm, wi, False)
217 |         ctx.save_for_backward(nrm, wi)
218 |         return out
219 | 
220 |     @staticmethod
221 |     def backward(ctx, dout):
222 |         nrm, wi = ctx.saved_variables
223 |         return _plugin.lambert_bwd(nrm, wi, dout) + (None,)
224 | 
225 | def lambert(nrm, wi, use_python=False):
226 |     '''Lambertian bsdf. 
227 |     All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent.
228 | 
229 |     Args:
230 |         nrm: World space shading normal.
231 |         wi: World space light vector.
232 |         use_python: Use PyTorch implementation (for validation)
233 | 
234 |     Returns:
235 |         Shaded diffuse value with shape [minibatch_size, height, width, 1]
236 |     '''
237 | 
238 |     if use_python:
239 |         out = bsdf_lambert(nrm, wi)
240 |     else:
241 |         out = _lambert_func.apply(nrm, wi)
242 |  
243 |     if torch.is_anomaly_enabled():
244 |         assert torch.all(torch.isfinite(out)), "Output of lambert contains inf or NaN"
245 |     return out
246 | 
247 | class _pbr_specular_func(torch.autograd.Function):
248 |     @staticmethod
249 |     def forward(ctx, col, nrm, wo, wi, alpha, min_roughness):
250 |         ctx.save_for_backward(col, nrm, wo, wi, alpha)
251 |         ctx.min_roughness = min_roughness
252 |         out = _plugin.pbr_specular_fwd(col, nrm, wo, wi, alpha, min_roughness, False)
253 |         return out
254 | 
255 |     @staticmethod
256 |     def backward(ctx, dout):
257 |         col, nrm, wo, wi, alpha = ctx.saved_variables
258 |         return _plugin.pbr_specular_bwd(col, nrm, wo, wi, alpha, ctx.min_roughness, dout) + (None, None)
259 | 
260 | def pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08, use_python=False):
261 |     '''Physically-based specular bsdf.
262 |     All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent unless otherwise noted.
263 | 
264 |     Args:
265 |         col: Specular lobe color
266 |         nrm: World space shading normal.
267 |         wo: World space camera vector.
268 |         wi: World space light vector
269 |         alpha: Specular roughness parameter with shape [minibatch_size, height, width, 1]
270 |         min_roughness: Scalar roughness clamping threshold
271 | 
272 |         use_python: Use PyTorch implementation (for validation)
273 |     Returns:
274 |         Shaded specular color
275 |     '''
276 | 
277 |     if use_python:
278 |         out = bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=min_roughness)
279 |     else:
280 |         out = _pbr_specular_func.apply(col, nrm, wo, wi, alpha, min_roughness)
281 |     
282 |     if torch.is_anomaly_enabled():
283 |         assert torch.all(torch.isfinite(out)), "Output of pbr_specular contains inf or NaN"
284 |     return out
285 | 
286 | class _pbr_bsdf_func(torch.autograd.Function):
287 |     @staticmethod
288 |     def forward(ctx, kd, arm, pos, nrm, view_pos, light_pos, min_roughness):
289 |         ctx.save_for_backward(kd, arm, pos, nrm, view_pos, light_pos)
290 |         ctx.min_roughness = min_roughness
291 |         out = _plugin.pbr_bsdf_fwd(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, False)
292 |         return out
293 | 
294 |     @staticmethod
295 |     def backward(ctx, dout):
296 |         kd, arm, pos, nrm, view_pos, light_pos = ctx.saved_variables
297 |         return _plugin.pbr_bsdf_bwd(kd, arm, pos, nrm, view_pos, light_pos, ctx.min_roughness, dout) + (None, None)
298 | 
299 | def pbr_bsdf(kd, arm, pos, nrm, view_pos, light_pos, min_roughness=0.08, use_python=False):
300 |     '''Physically-based bsdf, both diffuse & specular lobes
301 |     All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent unless otherwise noted.
302 | 
303 |     Args:
304 |         kd: Diffuse albedo.
305 |         arm: Specular parameters (attenuation, linear roughness, metalness).
306 |         pos: World space position.
307 |         nrm: World space shading normal.
308 |         view_pos: Camera position in world space, typically using broadcasting.
309 |         light_pos: Light position in world space, typically using broadcasting.
310 |         min_roughness: Scalar roughness clamping threshold
311 | 
312 |         use_python: Use PyTorch implementation (for validation)
313 | 
314 |     Returns:
315 |         Shaded color.
316 |     '''    
317 | 
318 |     if use_python:
319 |         out = bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness=min_roughness)
320 |     else:
321 |         out = _pbr_bsdf_func.apply(kd, arm, pos, nrm, view_pos, light_pos, min_roughness)
322 |     
323 |     if torch.is_anomaly_enabled():
324 |         assert torch.all(torch.isfinite(out)), "Output of pbr_bsdf contains inf or NaN"
325 |     return out
326 | 
327 | #----------------------------------------------------------------------------
328 | # Fast image loss function
329 | 
330 | class _image_loss_func(torch.autograd.Function):
331 |     @staticmethod
332 |     def forward(ctx, img, target, loss, tonemapper):
333 |         ctx.loss, ctx.tonemapper = loss, tonemapper
334 |         ctx.save_for_backward(img, target)
335 |         out = _plugin.image_loss_fwd(img, target, loss, tonemapper, False)
336 |         return out
337 | 
338 |     @staticmethod
339 |     def backward(ctx, dout):
340 |         img, target = ctx.saved_variables
341 |         return _plugin.image_loss_bwd(img, target, dout, ctx.loss, ctx.tonemapper) + (None, None, None)
342 | 
343 | def image_loss(img, target, loss='l1', tonemapper='none', use_python=False):
344 |     '''Compute HDR image loss. Combines tonemapping and loss into a single kernel for better perf.
345 |     All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent unless otherwise noted.
346 | 
347 |     Args:
348 |         img: Input image.
349 |         target: Target (reference) image. 
350 |         loss: Type of loss. Valid options are ['l1', 'mse', 'smape', 'relmse']
351 |         tonemapper: Tonemapping operations. Valid options are ['none', 'log_srgb']
352 |         use_python: Use PyTorch implementation (for validation)
353 | 
354 |     Returns:
355 |         Image space loss (scalar value).
356 |     '''
357 |     if use_python:
358 |         out = image_loss_fn(img, target, loss, tonemapper)
359 |     else:
360 |         out = _image_loss_func.apply(img, target, loss, tonemapper)
361 |         out = torch.sum(out) / (img.shape[0]*img.shape[1]*img.shape[2])
362 | 
363 |     if torch.is_anomaly_enabled():
364 |         assert torch.all(torch.isfinite(out)), "Output of image_loss contains inf or NaN"
365 |     return out
366 | 
367 | #----------------------------------------------------------------------------
368 | # Transform points function
369 | 
370 | class _xfm_func(torch.autograd.Function):
371 |     @staticmethod
372 |     def forward(ctx, points, matrix, isPoints):
373 |         ctx.save_for_backward(points, matrix)
374 |         ctx.isPoints = isPoints
375 |         out = _plugin.xfm_fwd(points, matrix, isPoints, False)
376 |         return out
377 | 
378 |     @staticmethod
379 |     def backward(ctx, dout):
380 |         points, matrix = ctx.saved_variables
381 |         return (_plugin.xfm_bwd(points, matrix, dout, ctx.isPoints),) + (None, None, None)
382 | 
383 | def xfm_points(points, matrix, use_python=False):
384 |     '''Transform points. 
385 |     Note: this method does not back-propagate matrix gradients by default for performance reasons. For matrix gradients, 
386 |     enable use_python=True or use torch.matmul instead.
387 | 
388 |     Args:
389 |         points: Tensor containing 3D points with shape [minibatch_size, num_vertices, 3] or [1, num_vertices, 3]
390 |         matrix: A 4x4 transform matrix with shape [minibatch_size, 4, 4]
391 |         use_python: Use PyTorch's torch.matmul (for validation)
392 |     Returns:
393 |         Transformed points in homogeneous 4D with shape [minibatch_size, num_vertices, 4].
394 |     '''    
395 |     if use_python:
396 |         out = torch.matmul(torch.nn.functional.pad(points, pad=(0,1), mode='constant', value=1.0), torch.transpose(matrix, 1, 2))
397 |     else:
398 |         out = _xfm_func.apply(points, matrix, True)
399 | 
400 |     if torch.is_anomaly_enabled():
401 |         assert torch.all(torch.isfinite(out)), "Output of xfm_points contains inf or NaN"
402 |     return out
403 | 
404 | def xfm_vectors(vectors, matrix, use_python=False):
405 |     '''Transform vectors. 
406 |     Note: this method does not back-propagate matrix gradients by default for performance reasons. For matrix gradients, 
407 |     enable use_python=True or use torch.matmul instead.
408 | 
409 |     Args:
410 |         vectors: Tensor containing 3D vectors with shape [minibatch_size, num_vertices, 3] or [1, num_vertices, 3]
411 |         matrix: A 4x4 transform matrix with shape [minibatch_size, 4, 4]
412 |         use_python: Use PyTorch's torch.matmul (for validation)
413 | 
414 |     Returns:
415 |         Transformed vectors in homogeneous 4D with shape [minibatch_size, num_vertices, 4].
416 |     '''    
417 | 
418 |     if use_python:
419 |         out = torch.matmul(torch.nn.functional.pad(vectors, pad=(0,1), mode='constant', value=0.0), torch.transpose(matrix, 1, 2))[..., 0:3].contiguous()
420 |     else:
421 |         out = _xfm_func.apply(vectors, matrix, False)
422 | 
423 |     if torch.is_anomaly_enabled():
424 |         assert torch.all(torch.isfinite(out)), "Output of xfm_vectors contains inf or NaN"
425 |     return out
426 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/tests/test_bsdf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import numpy as np
 10 | import torch
 11 | 
 12 | import os
 13 | import sys
 14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
 15 | import renderutils as ru
 16 | 
 17 | RES = 4
 18 | DTYPE = torch.float32
 19 | 
 20 | def relative_loss(name, ref, cuda):
 21 | 	ref = ref.float()
 22 | 	cuda = cuda.float()
 23 | 	print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item())
 24 | 
 25 | def test_normal():
 26 | 	pos_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
 27 | 	pos_ref = pos_cuda.clone().detach().requires_grad_(True)
 28 | 	view_pos_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
 29 | 	view_pos_ref = view_pos_cuda.clone().detach().requires_grad_(True)
 30 | 	perturbed_nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
 31 | 	perturbed_nrm_ref = perturbed_nrm_cuda.clone().detach().requires_grad_(True)
 32 | 	smooth_nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
 33 | 	smooth_nrm_ref = smooth_nrm_cuda.clone().detach().requires_grad_(True)
 34 | 	smooth_tng_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
 35 | 	smooth_tng_ref = smooth_tng_cuda.clone().detach().requires_grad_(True)
 36 | 	geom_nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
 37 | 	geom_nrm_ref = geom_nrm_cuda.clone().detach().requires_grad_(True)
 38 | 	target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda')
 39 | 
 40 | 	ref = ru.prepare_shading_normal(pos_ref, view_pos_ref, perturbed_nrm_ref, smooth_nrm_ref, smooth_tng_ref, geom_nrm_ref, True, use_python=True)
 41 | 	ref_loss = torch.nn.MSELoss()(ref, target)
 42 | 	ref_loss.backward()
 43 | 
 44 | 	cuda = ru.prepare_shading_normal(pos_cuda, view_pos_cuda, perturbed_nrm_cuda, smooth_nrm_cuda, smooth_tng_cuda, geom_nrm_cuda, True)
 45 | 	cuda_loss = torch.nn.MSELoss()(cuda, target)
 46 | 	cuda_loss.backward()
 47 | 
 48 | 	print("-------------------------------------------------------------")
 49 | 	print("    bent normal")
 50 | 	print("-------------------------------------------------------------")
 51 | 	relative_loss("res:", ref, cuda)
 52 | 	relative_loss("pos:", pos_ref.grad, pos_cuda.grad)
 53 | 	relative_loss("view_pos:", view_pos_ref.grad, view_pos_cuda.grad)
 54 | 	relative_loss("perturbed_nrm:", perturbed_nrm_ref.grad, perturbed_nrm_cuda.grad)
 55 | 	relative_loss("smooth_nrm:", smooth_nrm_ref.grad, smooth_nrm_cuda.grad)
 56 | 	relative_loss("smooth_tng:", smooth_tng_ref.grad, smooth_tng_cuda.grad)
 57 | 	relative_loss("geom_nrm:", geom_nrm_ref.grad, geom_nrm_cuda.grad)
 58 | 
 59 | def test_schlick():
 60 | 	f0_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
 61 | 	f0_ref = f0_cuda.clone().detach().requires_grad_(True)
 62 | 	f90_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
 63 | 	f90_ref = f90_cuda.clone().detach().requires_grad_(True)
 64 | 	cosT_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) * 2.0
 65 | 	cosT_cuda = cosT_cuda.clone().detach().requires_grad_(True)
 66 | 	cosT_ref = cosT_cuda.clone().detach().requires_grad_(True)
 67 | 	target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda')
 68 | 
 69 | 	ref = ru._fresnel_shlick(f0_ref, f90_ref, cosT_ref, use_python=True)
 70 | 	ref_loss = torch.nn.MSELoss()(ref, target)
 71 | 	ref_loss.backward()
 72 | 
 73 | 	cuda = ru._fresnel_shlick(f0_cuda, f90_cuda, cosT_cuda)
 74 | 	cuda_loss = torch.nn.MSELoss()(cuda, target)
 75 | 	cuda_loss.backward()
 76 | 
 77 | 	print("-------------------------------------------------------------")
 78 | 	print("    Fresnel shlick")
 79 | 	print("-------------------------------------------------------------")
 80 | 	relative_loss("res:", ref, cuda)
 81 | 	relative_loss("f0:", f0_ref.grad, f0_cuda.grad)
 82 | 	relative_loss("f90:", f90_ref.grad, f90_cuda.grad)
 83 | 	relative_loss("cosT:", cosT_ref.grad, cosT_cuda.grad)
 84 | 
 85 | def test_ndf_ggx():
 86 | 	alphaSqr_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
 87 | 	alphaSqr_cuda = alphaSqr_cuda.clone().detach().requires_grad_(True)
 88 | 	alphaSqr_ref = alphaSqr_cuda.clone().detach().requires_grad_(True)
 89 | 	cosT_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) * 3.0 - 1
 90 | 	cosT_cuda = cosT_cuda.clone().detach().requires_grad_(True)
 91 | 	cosT_ref = cosT_cuda.clone().detach().requires_grad_(True)
 92 | 	target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda')
 93 | 
 94 | 	ref = ru._ndf_ggx(alphaSqr_ref, cosT_ref, use_python=True)
 95 | 	ref_loss = torch.nn.MSELoss()(ref, target)
 96 | 	ref_loss.backward()
 97 | 
 98 | 	cuda = ru._ndf_ggx(alphaSqr_cuda, cosT_cuda)
 99 | 	cuda_loss = torch.nn.MSELoss()(cuda, target)
100 | 	cuda_loss.backward()
101 | 
102 | 	print("-------------------------------------------------------------")
103 | 	print("    Ndf GGX")
104 | 	print("-------------------------------------------------------------")
105 | 	relative_loss("res:", ref, cuda)
106 | 	relative_loss("alpha:", alphaSqr_ref.grad, alphaSqr_cuda.grad)
107 | 	relative_loss("cosT:", cosT_ref.grad, cosT_cuda.grad)
108 | 
109 | def test_lambda_ggx():
110 | 	alphaSqr_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
111 | 	alphaSqr_ref = alphaSqr_cuda.clone().detach().requires_grad_(True)
112 | 	cosT_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) * 3.0 - 1
113 | 	cosT_cuda = cosT_cuda.clone().detach().requires_grad_(True)
114 | 	cosT_ref = cosT_cuda.clone().detach().requires_grad_(True)
115 | 	target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda')
116 | 
117 | 	ref = ru._lambda_ggx(alphaSqr_ref, cosT_ref, use_python=True)
118 | 	ref_loss = torch.nn.MSELoss()(ref, target)
119 | 	ref_loss.backward()
120 | 
121 | 	cuda = ru._lambda_ggx(alphaSqr_cuda, cosT_cuda)
122 | 	cuda_loss = torch.nn.MSELoss()(cuda, target)
123 | 	cuda_loss.backward()
124 | 
125 | 	print("-------------------------------------------------------------")
126 | 	print("    Lambda GGX")
127 | 	print("-------------------------------------------------------------")
128 | 	relative_loss("res:", ref, cuda)
129 | 	relative_loss("alpha:", alphaSqr_ref.grad, alphaSqr_cuda.grad)
130 | 	relative_loss("cosT:", cosT_ref.grad, cosT_cuda.grad)
131 | 
132 | def test_masking_smith():
133 | 	alphaSqr_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
134 | 	alphaSqr_ref = alphaSqr_cuda.clone().detach().requires_grad_(True)
135 | 	cosThetaI_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
136 | 	cosThetaI_ref = cosThetaI_cuda.clone().detach().requires_grad_(True)
137 | 	cosThetaO_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
138 | 	cosThetaO_ref = cosThetaO_cuda.clone().detach().requires_grad_(True)
139 | 	target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda')
140 | 
141 | 	ref = ru._masking_smith(alphaSqr_ref, cosThetaI_ref, cosThetaO_ref, use_python=True)
142 | 	ref_loss = torch.nn.MSELoss()(ref, target)
143 | 	ref_loss.backward()
144 | 
145 | 	cuda = ru._masking_smith(alphaSqr_cuda, cosThetaI_cuda, cosThetaO_cuda)
146 | 	cuda_loss = torch.nn.MSELoss()(cuda, target)
147 | 	cuda_loss.backward()
148 | 
149 | 	print("-------------------------------------------------------------")
150 | 	print("    Smith masking term")
151 | 	print("-------------------------------------------------------------")
152 | 	relative_loss("res:", ref, cuda)
153 | 	relative_loss("alpha:", alphaSqr_ref.grad, alphaSqr_cuda.grad)
154 | 	relative_loss("cosThetaI:", cosThetaI_ref.grad, cosThetaI_cuda.grad)
155 | 	relative_loss("cosThetaO:", cosThetaO_ref.grad, cosThetaO_cuda.grad)
156 | 
157 | def test_lambert():
158 | 	normals_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
159 | 	normals_ref = normals_cuda.clone().detach().requires_grad_(True)
160 | 	wi_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
161 | 	wi_ref = wi_cuda.clone().detach().requires_grad_(True)
162 | 	target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda')
163 | 
164 | 	ref = ru.lambert(normals_ref, wi_ref, use_python=True)
165 | 	ref_loss = torch.nn.MSELoss()(ref, target)
166 | 	ref_loss.backward()
167 | 
168 | 	cuda = ru.lambert(normals_cuda, wi_cuda)
169 | 	cuda_loss = torch.nn.MSELoss()(cuda, target)
170 | 	cuda_loss.backward()
171 | 
172 | 	print("-------------------------------------------------------------")
173 | 	print("    Lambert")
174 | 	print("-------------------------------------------------------------")
175 | 	relative_loss("res:", ref, cuda)
176 | 	relative_loss("nrm:", normals_ref.grad, normals_cuda.grad)
177 | 	relative_loss("wi:", wi_ref.grad, wi_cuda.grad)
178 | 
179 | def test_pbr_specular():
180 | 	col_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
181 | 	col_ref = col_cuda.clone().detach().requires_grad_(True)
182 | 	nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
183 | 	nrm_ref = nrm_cuda.clone().detach().requires_grad_(True)
184 | 	wi_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
185 | 	wi_ref = wi_cuda.clone().detach().requires_grad_(True)
186 | 	wo_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
187 | 	wo_ref = wo_cuda.clone().detach().requires_grad_(True)
188 | 	alpha_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
189 | 	alpha_ref = alpha_cuda.clone().detach().requires_grad_(True)
190 | 	target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda')
191 | 
192 | 	ref = ru.pbr_specular(col_ref, nrm_ref, wo_ref, wi_ref, alpha_ref, use_python=True)
193 | 	ref_loss = torch.nn.MSELoss()(ref, target)
194 | 	ref_loss.backward()
195 | 
196 | 	cuda = ru.pbr_specular(col_cuda, nrm_cuda, wo_cuda, wi_cuda, alpha_cuda)
197 | 	cuda_loss = torch.nn.MSELoss()(cuda, target)
198 | 	cuda_loss.backward()
199 | 
200 | 	print("-------------------------------------------------------------")
201 | 	print("    Pbr specular")
202 | 	print("-------------------------------------------------------------")
203 | 
204 | 	relative_loss("res:", ref, cuda)
205 | 	if col_ref.grad is not None:
206 | 		relative_loss("col:", col_ref.grad, col_cuda.grad)
207 | 	if nrm_ref.grad is not None:
208 | 		relative_loss("nrm:", nrm_ref.grad, nrm_cuda.grad)
209 | 	if wi_ref.grad is not None:
210 | 		relative_loss("wi:", wi_ref.grad, wi_cuda.grad)
211 | 	if wo_ref.grad is not None:
212 | 		relative_loss("wo:", wo_ref.grad, wo_cuda.grad)
213 | 	if alpha_ref.grad is not None:
214 | 		relative_loss("alpha:", alpha_ref.grad, alpha_cuda.grad)
215 | 
216 | def test_pbr_bsdf():
217 | 	kd_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
218 | 	kd_ref = kd_cuda.clone().detach().requires_grad_(True)
219 | 	arm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
220 | 	arm_ref = arm_cuda.clone().detach().requires_grad_(True)
221 | 	pos_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
222 | 	pos_ref = pos_cuda.clone().detach().requires_grad_(True)
223 | 	nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
224 | 	nrm_ref = nrm_cuda.clone().detach().requires_grad_(True)
225 | 	view_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
226 | 	view_ref = view_cuda.clone().detach().requires_grad_(True)
227 | 	light_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
228 | 	light_ref = light_cuda.clone().detach().requires_grad_(True)
229 | 	target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda')
230 | 
231 | 	ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True)
232 | 	ref_loss = torch.nn.MSELoss()(ref, target)
233 | 	ref_loss.backward()
234 | 
235 | 	cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
236 | 	cuda_loss = torch.nn.MSELoss()(cuda, target)
237 | 	cuda_loss.backward()
238 | 
239 | 	print("-------------------------------------------------------------")
240 | 	print("    Pbr BSDF")
241 | 	print("-------------------------------------------------------------")
242 | 
243 | 	relative_loss("res:", ref, cuda)
244 | 	if kd_ref.grad is not None:
245 | 		relative_loss("kd:", kd_ref.grad, kd_cuda.grad)
246 | 	if arm_ref.grad is not None:
247 | 		relative_loss("arm:", arm_ref.grad, arm_cuda.grad)
248 | 	if pos_ref.grad is not None:
249 | 		relative_loss("pos:", pos_ref.grad, pos_cuda.grad)
250 | 	if nrm_ref.grad is not None:
251 | 		relative_loss("nrm:", nrm_ref.grad, nrm_cuda.grad)
252 | 	if view_ref.grad is not None:
253 | 		relative_loss("view:", view_ref.grad, view_cuda.grad)
254 | 	if light_ref.grad is not None:
255 | 		relative_loss("light:", light_ref.grad, light_cuda.grad)
256 | 
257 | test_normal()
258 | 
259 | test_schlick()
260 | test_ndf_ggx()
261 | test_lambda_ggx()
262 | test_masking_smith()
263 | 
264 | test_lambert()
265 | test_pbr_specular()
266 | test_pbr_bsdf()
267 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/tests/test_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto. Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import numpy as np
10 | import torch
11 | 
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 | 
17 | RES = 8
18 | DTYPE = torch.float32
19 | 
20 | def tonemap_srgb(f):
21 |     return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
22 | 
23 | def l1(output, target):
24 |     x = torch.clamp(output, min=0, max=65535)
25 |     r = torch.clamp(target, min=0, max=65535)
26 |     x = tonemap_srgb(torch.log(x + 1))
27 |     r = tonemap_srgb(torch.log(r + 1))
28 |     return torch.nn.functional.l1_loss(x,r)
29 | 
30 | def relative_loss(name, ref, cuda):
31 | 	ref = ref.float()
32 | 	cuda = cuda.float()
33 | 	print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item())
34 | 
35 | def test_loss(loss, tonemapper):
36 | 	img_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
37 | 	img_ref = img_cuda.clone().detach().requires_grad_(True)
38 | 	target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
39 | 	target_ref = target_cuda.clone().detach().requires_grad_(True)
40 | 
41 | 	ref_loss = ru.image_loss(img_ref, target_ref, loss=loss, tonemapper=tonemapper, use_python=True)
42 | 	ref_loss.backward()
43 | 
44 | 	cuda_loss = ru.image_loss(img_cuda, target_cuda, loss=loss, tonemapper=tonemapper)
45 | 	cuda_loss.backward()
46 | 
47 | 	print("-------------------------------------------------------------")
48 | 	print("    Loss: %s, %s" % (loss, tonemapper))
49 | 	print("-------------------------------------------------------------")
50 | 
51 | 	relative_loss("res:", ref_loss, cuda_loss)
52 | 	relative_loss("img:", img_ref.grad, img_cuda.grad)
53 | 	relative_loss("target:", target_ref.grad, target_cuda.grad)
54 | 
55 | 
56 | test_loss('l1', 'none')
57 | test_loss('l1', 'log_srgb')
58 | test_loss('mse', 'log_srgb')
59 | test_loss('smape', 'none')
60 | test_loss('relmse', 'none')
61 | test_loss('mse', 'none')


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/tests/test_mesh.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto. Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import numpy as np
10 | import torch
11 | 
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 | 
17 | BATCH = 8
18 | RES = 1024
19 | DTYPE = torch.float32
20 | 
21 | torch.manual_seed(0)
22 | 
23 | def tonemap_srgb(f):
24 |     return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
25 | 
26 | def l1(output, target):
27 |     x = torch.clamp(output, min=0, max=65535)
28 |     r = torch.clamp(target, min=0, max=65535)
29 |     x = tonemap_srgb(torch.log(x + 1))
30 |     r = tonemap_srgb(torch.log(r + 1))
31 |     return torch.nn.functional.l1_loss(x,r)
32 | 
33 | def relative_loss(name, ref, cuda):
34 | 	ref = ref.float()
35 | 	cuda = cuda.float()
36 | 	print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref)).item())
37 | 
38 | def test_xfm_points():
39 | 	points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
40 | 	points_ref = points_cuda.clone().detach().requires_grad_(True)
41 | 	mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False)
42 | 	mtx_ref = mtx_cuda.clone().detach().requires_grad_(True)
43 | 	target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True)
44 | 
45 | 	ref_out = ru.xfm_points(points_ref, mtx_ref, use_python=True)
46 | 	ref_loss = torch.nn.MSELoss()(ref_out, target)
47 | 	ref_loss.backward()
48 | 
49 | 	cuda_out = ru.xfm_points(points_cuda, mtx_cuda)
50 | 	cuda_loss = torch.nn.MSELoss()(cuda_out, target)
51 | 	cuda_loss.backward()
52 | 
53 | 	print("-------------------------------------------------------------")
54 | 
55 | 	relative_loss("res:", ref_out, cuda_out)
56 | 	relative_loss("points:", points_ref.grad, points_cuda.grad)
57 | 
58 | def test_xfm_vectors():
59 | 	points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
60 | 	points_ref = points_cuda.clone().detach().requires_grad_(True)
61 | 	points_cuda_p = points_cuda.clone().detach().requires_grad_(True)
62 | 	points_ref_p = points_cuda.clone().detach().requires_grad_(True)
63 | 	mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False)
64 | 	mtx_ref = mtx_cuda.clone().detach().requires_grad_(True)
65 | 	target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True)
66 | 
67 | 	ref_out = ru.xfm_vectors(points_ref.contiguous(), mtx_ref, use_python=True)
68 | 	ref_loss = torch.nn.MSELoss()(ref_out, target[..., 0:3])
69 | 	ref_loss.backward()
70 | 
71 | 	cuda_out = ru.xfm_vectors(points_cuda.contiguous(), mtx_cuda)
72 | 	cuda_loss = torch.nn.MSELoss()(cuda_out, target[..., 0:3])
73 | 	cuda_loss.backward()
74 | 
75 | 	ref_out_p = ru.xfm_points(points_ref_p.contiguous(), mtx_ref, use_python=True)
76 | 	ref_loss_p = torch.nn.MSELoss()(ref_out_p, target)
77 | 	ref_loss_p.backward()
78 | 	
79 | 	cuda_out_p = ru.xfm_points(points_cuda_p.contiguous(), mtx_cuda)
80 | 	cuda_loss_p = torch.nn.MSELoss()(cuda_out_p, target)
81 | 	cuda_loss_p.backward()
82 | 
83 | 	print("-------------------------------------------------------------")
84 | 
85 | 	relative_loss("res:", ref_out, cuda_out)
86 | 	relative_loss("points:", points_ref.grad, points_cuda.grad)
87 | 	relative_loss("points_p:", points_ref_p.grad, points_cuda_p.grad)
88 | 
89 | test_xfm_points()
90 | test_xfm_vectors()
91 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/renderutils/tests/test_perf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto. Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import numpy as np
10 | import torch
11 | 
12 | import os
13 | import sys
14 | import time
15 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
16 | import renderutils as ru
17 | 
18 | DTYPE=torch.float32
19 | 
20 | def test_bsdf(BATCH, RES, ITR):
21 | 	kd_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
22 | 	kd_ref = kd_cuda.clone().detach().requires_grad_(True)
23 | 	arm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
24 | 	arm_ref = arm_cuda.clone().detach().requires_grad_(True)
25 | 	pos_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
26 | 	pos_ref = pos_cuda.clone().detach().requires_grad_(True)
27 | 	nrm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
28 | 	nrm_ref = nrm_cuda.clone().detach().requires_grad_(True)
29 | 	view_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
30 | 	view_ref = view_cuda.clone().detach().requires_grad_(True)
31 | 	light_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
32 | 	light_ref = light_cuda.clone().detach().requires_grad_(True)
33 | 	target = torch.rand(BATCH, RES, RES, 3, device='cuda')
34 | 
35 | 	start = torch.cuda.Event(enable_timing=True)
36 | 	end = torch.cuda.Event(enable_timing=True)
37 | 
38 | 	ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
39 | 
40 | 	print("--- Testing: [%d, %d, %d] ---" % (BATCH, RES, RES))
41 | 
42 | 	start.record()
43 | 	for i in range(ITR):
44 | 		ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True)
45 | 	end.record()
46 | 	torch.cuda.synchronize()
47 | 	print("Pbr BSDF python:", start.elapsed_time(end))
48 | 
49 | 	start.record()
50 | 	for i in range(ITR):
51 | 		cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
52 | 	end.record()
53 | 	torch.cuda.synchronize()
54 | 	print("Pbr BSDF cuda:", start.elapsed_time(end))
55 | 
56 | test_bsdf(1, 512, 1000)
57 | test_bsdf(16, 512, 1000)
58 | test_bsdf(1, 2048, 1000)
59 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/texture.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import numpy as np
 11 | import torch
 12 | import nvdiffrast.torch as dr
 13 | 
 14 | from . import util
 15 | 
 16 | ########################################################################################################
 17 | # Simple texture class. A texture can be either 
 18 | # - A 3D tensor (using auto mipmaps)
 19 | # - A list of 3D tensors (full custom mip hierarchy)
 20 | ########################################################################################################
 21 | 
 22 | class Texture2D:
 23 |      # Initializes a texture from image data.
 24 |      # Input can be constant value (1D array) or texture (3D array) or mip hierarchy (list of 3d arrays)
 25 |     def __init__(self, init):
 26 |         if isinstance(init, np.ndarray):
 27 |             init = torch.tensor(init, dtype=torch.float32, device='cuda')
 28 |         elif isinstance(init, list) and len(init) == 1:
 29 |             init = init[0]
 30 | 
 31 |         if isinstance(init, list) or len(init.shape) == 4:
 32 |             self.data = init
 33 |         elif len(init.shape) == 3:
 34 |             self.data = init[None, ...]
 35 |         else:
 36 |             self.data = init[None, None, None, :] # Convert constant to 1x1 tensor
 37 | 
 38 |     # Filtered (trilinear) sample texture at a given location
 39 |     def sample(self, texc, texc_deriv, filter_mode='linear-mipmap-linear', data_fmt=torch.float32):
 40 |         if isinstance(self.data, list):
 41 |             out = dr.texture(self.data[0], texc, texc_deriv, mip=self.data[1:], filter_mode=filter_mode)
 42 |         else:
 43 |             out = dr.texture(self.data, texc, texc_deriv, filter_mode=filter_mode)
 44 |         return out.to(data_fmt)
 45 | 
 46 |     def getRes(self):
 47 |         return self.getMips()[0].shape[1:3]
 48 | 
 49 |     def getMips(self):
 50 |         if isinstance(self.data, list):
 51 |             return self.data
 52 |         else:
 53 |             return [self.data]
 54 | 
 55 |     # In-place clamp with no derivative to make sure values are in valid range after training
 56 |     def clamp_(self, min=None, max=None):
 57 |         with torch.no_grad():
 58 |             for mip in self.getMips():
 59 |                 mip.clamp_(min=min, max=max)
 60 | 
 61 |     # In-place clamp with no derivative to make sure values are in valid range after training
 62 |     def clamp_rgb_(self, minR=None, maxR=None, minG=None, maxG=None, minB=None, maxB=None):
 63 |         with torch.no_grad():
 64 |             for mip in self.getMips():
 65 |                 mip[...,0].clamp_(min=minR, max=maxR)
 66 |                 mip[...,1].clamp_(min=minG, max=maxG)
 67 |                 mip[...,2].clamp_(min=minB, max=maxB)
 68 | 
 69 | ########################################################################################################
 70 | # Helper function to create a trainable texture from a regular texture. The trainable weights are 
 71 | # initialized with texture data as an initial guess
 72 | ########################################################################################################
 73 | 
 74 | def create_trainable(init, res, auto_mipmaps):
 75 |     with torch.no_grad():
 76 |         if isinstance(init, Texture2D):
 77 |             assert isinstance(init.data, torch.Tensor)
 78 |             init = init.data
 79 |         elif isinstance(init, np.ndarray):
 80 |             init = torch.tensor(init, dtype=torch.float32, device='cuda')
 81 | 
 82 |         # Pad to NHWC if needed
 83 |         if len(init.shape) == 1: # Extend constant to NHWC tensor
 84 |             init = init[None, None, None, :]
 85 |         elif len(init.shape) == 3:
 86 |             init = init[None, ...]
 87 | 
 88 |         # Scale input to desired resolution.
 89 |         init = util.scale_img_nhwc(init, res)
 90 | 
 91 |         # Genreate custom mipchain
 92 |         if not auto_mipmaps:
 93 |             mip_chain = [init.clone().detach().requires_grad_(True)]
 94 |             while mip_chain[-1].shape[1] > 1 or mip_chain[-1].shape[2] > 1:
 95 |                 new_size = [max(mip_chain[-1].shape[1] // 2, 1), max(mip_chain[-1].shape[2] // 2, 1)]
 96 |                 init = util.scale_img_nhwc(mip_chain[-1], new_size)
 97 |                 mip_chain += [init.clone().detach().requires_grad_(True)]
 98 |             return Texture2D(mip_chain)
 99 |         else:
100 |             return Texture2D(init.clone().detach().requires_grad_(True))
101 | 
102 | ########################################################################################################
103 | # Convert texture to and from SRGB
104 | ########################################################################################################
105 | 
106 | def srgb_to_rgb(texture):
107 |     return Texture2D(list(util.srgb_to_rgb(mip) for mip in texture.getMips()))
108 | 
109 | def rgb_to_srgb(texture):
110 |     return Texture2D(list(util.rgb_to_srgb(mip) for mip in texture.getMips()))
111 | 
112 | ########################################################################################################
113 | # Utility functions for loading / storing a texture
114 | ########################################################################################################
115 | 
116 | def _load_mip2D(fn, lambda_fn=None, channels=None):
117 |     imgdata = torch.tensor(util.load_image(fn), dtype=torch.float32, device='cuda')
118 |     if channels is not None:
119 |         imgdata = imgdata[..., 0:channels]
120 |     if lambda_fn is not None:
121 |         imgdata = lambda_fn(imgdata)
122 |     return imgdata.detach().clone()
123 | 
124 | def load_texture2D(fn, lambda_fn=None, channels=None):
125 |     base, ext = os.path.splitext(fn)
126 |     if os.path.exists(base + "_0" + ext):
127 |         mips = []
128 |         while os.path.exists(base + ("_%d" % len(mips)) + ext):
129 |             mips += [_load_mip2D(base + ("_%d" % len(mips)) + ext, lambda_fn, channels)]
130 |         return Texture2D(mips)
131 |     else:
132 |         return Texture2D(_load_mip2D(fn, lambda_fn, channels))
133 | 
134 | def _save_mip2D(fn, mip, mipidx, lambda_fn):
135 |     if lambda_fn is not None:
136 |         data = lambda_fn(mip).detach().cpu().numpy()
137 |     else:
138 |         data = mip.detach().cpu().numpy()
139 | 
140 |     if mipidx is None:
141 |         util.save_image(fn, data)
142 |     else:
143 |         base, ext = os.path.splitext(fn)
144 |         util.save_image(base + ("_%d" % mipidx) + ext, data)
145 | 
146 | def save_texture2D(fn, tex, lambda_fn=None):
147 |     if isinstance(tex.data, list):
148 |         for i, mip in enumerate(tex.data):
149 |             _save_mip2D(fn, mip[0,...], i, lambda_fn)
150 |     else:
151 |         _save_mip2D(fn, tex.data[0,...], None, lambda_fn)
152 | 


--------------------------------------------------------------------------------
/nvdiffmodeling/src/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import sys
 11 | import numpy as np
 12 | import torch
 13 | import nvdiffrast.torch as dr
 14 | import imageio
 15 | 
 16 | #----------------------------------------------------------------------------
 17 | # Vector operations
 18 | #----------------------------------------------------------------------------
 19 | 
 20 | def dot(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 21 |     return torch.sum(x*y, -1, keepdim=True)
 22 | 
 23 | def reflect(x: torch.Tensor, n: torch.Tensor) -> torch.Tensor:
 24 |     return 2*dot(x, n)*n - x
 25 | 
 26 | def length(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor:
 27 |     return torch.sqrt(torch.clamp(dot(x,x), min=eps)) # Clamp to avoid nan gradients because grad(sqrt(0)) = NaN
 28 | 
 29 | def safe_normalize(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor:
 30 |     return x / length(x, eps)
 31 | 
 32 | def to_hvec(x: torch.Tensor, w: float) -> torch.Tensor:
 33 |     return torch.nn.functional.pad(x, pad=(0,1), mode='constant', value=w)
 34 | 
 35 | #----------------------------------------------------------------------------
 36 | # Tonemapping
 37 | #----------------------------------------------------------------------------
 38 | 
 39 | def tonemap_srgb(f: torch.Tensor) -> torch.Tensor:
 40 |     return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
 41 | 
 42 | #----------------------------------------------------------------------------
 43 | # sRGB color transforms
 44 | #----------------------------------------------------------------------------
 45 | 
 46 | def _rgb_to_srgb(f: torch.Tensor) -> torch.Tensor:
 47 |     return torch.where(f <= 0.0031308, f * 12.92, torch.pow(torch.clamp(f, 0.0031308), 1.0/2.4)*1.055 - 0.055)
 48 | 
 49 | def rgb_to_srgb(f: torch.Tensor) -> torch.Tensor:
 50 |     assert f.shape[-1] == 3 or f.shape[-1] == 4
 51 |     out = torch.cat((_rgb_to_srgb(f[..., 0:3]), f[..., 3:4]), dim=-1) if f.shape[-1] == 4 else _rgb_to_srgb(f)
 52 |     assert out.shape[0] == f.shape[0] and out.shape[1] == f.shape[1] and out.shape[2] == f.shape[2]
 53 |     return out
 54 | 
 55 | def _srgb_to_rgb(f: torch.Tensor) -> torch.Tensor:
 56 |     return torch.where(f <= 0.04045, f / 12.92, torch.pow((torch.clamp(f, 0.04045) + 0.055) / 1.055, 2.4))
 57 | 
 58 | def srgb_to_rgb(f: torch.Tensor) -> torch.Tensor:
 59 |     assert f.shape[-1] == 3 or f.shape[-1] == 4
 60 |     out = torch.cat((_srgb_to_rgb(f[..., 0:3]), f[..., 3:4]), dim=-1) if f.shape[-1] == 4 else _srgb_to_rgb(f)
 61 |     assert out.shape[0] == f.shape[0] and out.shape[1] == f.shape[1] and out.shape[2] == f.shape[2]
 62 |     return out
 63 | 
 64 | #----------------------------------------------------------------------------
 65 | # Displacement texture lookup
 66 | #----------------------------------------------------------------------------
 67 | 
 68 | def get_miplevels(texture: np.ndarray) -> float:
 69 |     minDim = min(texture.shape[0], texture.shape[1])
 70 |     return np.floor(np.log2(minDim))
 71 | 
 72 | # TODO: Handle wrapping maybe
 73 | def tex_2d(tex_map : torch.Tensor, coords : torch.Tensor, filter='nearest') -> torch.Tensor:
 74 |     tex_map = tex_map[None, ...]    # Add batch dimension
 75 |     tex_map = tex_map.permute(0, 3, 1, 2) # NHWC -> NCHW
 76 |     tex = torch.nn.functional.grid_sample(tex_map, coords[None, None, ...] * 2 - 1, mode=filter, align_corners=False)
 77 |     tex = tex.permute(0, 2, 3, 1) # NCHW -> NHWC
 78 |     return tex[0, 0, ...]
 79 | 
 80 | #----------------------------------------------------------------------------
 81 | # Image scaling
 82 | #----------------------------------------------------------------------------
 83 | 
 84 | def scale_img_hwc(x : torch.Tensor, size, mag='bilinear', min='area') -> torch.Tensor:
 85 |     return scale_img_nhwc(x[None, ...], size, mag, min)[0]
 86 | 
 87 | def scale_img_nhwc(x  : torch.Tensor, size, mag='bilinear', min='area') -> torch.Tensor:
 88 |     assert (x.shape[1] >= size[0] and x.shape[2] >= size[1]) or (x.shape[1] < size[0] and x.shape[2] < size[1]), "Trying to magnify image in one dimension and minify in the other"
 89 |     y = x.permute(0, 3, 1, 2) # NHWC -> NCHW
 90 |     if x.shape[1] > size[0] and x.shape[2] > size[1]: # Minification, previous size was bigger
 91 |         y = torch.nn.functional.interpolate(y, size, mode=min)
 92 |     else: # Magnification
 93 |         if mag == 'bilinear' or mag == 'bicubic':
 94 |             y = torch.nn.functional.interpolate(y, size, mode=mag, align_corners=True)
 95 |         else:
 96 |             y = torch.nn.functional.interpolate(y, size, mode=mag)
 97 |     return y.permute(0, 2, 3, 1).contiguous() # NCHW -> NHWC
 98 | 
 99 | def avg_pool_nhwc(x  : torch.Tensor, size) -> torch.Tensor:
100 |     y = x.permute(0, 3, 1, 2) # NHWC -> NCHW
101 |     y = torch.nn.functional.avg_pool2d(y, size)
102 |     return y.permute(0, 2, 3, 1).contiguous() # NCHW -> NHWC
103 | 
104 | #----------------------------------------------------------------------------
105 | # Behaves similar to tf.segment_sum
106 | #----------------------------------------------------------------------------
107 | 
108 | def segment_sum(data: torch.Tensor, segment_ids: torch.Tensor) -> torch.Tensor:
109 |     num_segments = torch.unique_consecutive(segment_ids).shape[0]
110 | 
111 |     # Repeats ids until same dimension as data
112 |     if len(segment_ids.shape) == 1:
113 |         s = torch.prod(torch.tensor(data.shape[1:], dtype=torch.int64, device='cuda')).long()
114 |         segment_ids = segment_ids.repeat_interleave(s).view(segment_ids.shape[0], *data.shape[1:])
115 | 
116 |     assert data.shape == segment_ids.shape, "data.shape and segment_ids.shape should be equal"
117 | 
118 |     shape = [num_segments] + list(data.shape[1:])
119 |     result = torch.zeros(*shape, dtype=torch.float32, device='cuda')
120 |     result = result.scatter_add(0, segment_ids, data)
121 |     return result
122 | 
123 | #----------------------------------------------------------------------------
124 | # Projection and transformation matrix helpers.
125 | #----------------------------------------------------------------------------
126 | 
127 | def projection(x=0.1, n=1.0, f=50.0):
128 |     return np.array([[n/x,    0,            0,              0], 
129 |                      [  0, n/-x,            0,              0], 
130 |                      [  0,    0, -(f+n)/(f-n), -(2*f*n)/(f-n)], 
131 |                      [  0,    0,           -1,              0]]).astype(np.float32)
132 |                     
133 | def translate(x, y, z):
134 |     return np.array([[1, 0, 0, x], 
135 |                      [0, 1, 0, y], 
136 |                      [0, 0, 1, z], 
137 |                      [0, 0, 0, 1]]).astype(np.float32)
138 | 
139 | def rotate_x(a):
140 |     s, c = np.sin(a), np.cos(a)
141 |     return np.array([[1,  0, 0, 0], 
142 |                      [0,  c, s, 0], 
143 |                      [0, -s, c, 0], 
144 |                      [0,  0, 0, 1]]).astype(np.float32)
145 | 
146 | def rotate_y(a):
147 |     s, c = np.sin(a), np.cos(a)
148 |     return np.array([[ c, 0, s, 0], 
149 |                      [ 0, 1, 0, 0], 
150 |                      [-s, 0, c, 0], 
151 |                      [ 0, 0, 0, 1]]).astype(np.float32)
152 | 
153 | def scale(s):
154 |     return np.array([[ s, 0, 0, 0], 
155 |                      [ 0, s, 0, 0], 
156 |                      [ 0, 0, s, 0], 
157 |                      [ 0, 0, 0, 1]]).astype(np.float32)
158 | 
159 | def lookAt(eye, at, up):
160 |     a = eye - at
161 |     b = up
162 |     w = a / np.linalg.norm(a)
163 |     u = np.cross(b, w)
164 |     u = u / np.linalg.norm(u)
165 |     v = np.cross(w, u)
166 |     translate = np.array([[1, 0, 0, -eye[0]], 
167 |                           [0, 1, 0, -eye[1]], 
168 |                           [0, 0, 1, -eye[2]], 
169 |                           [0, 0, 0, 1]]).astype(np.float32)
170 |     rotate =  np.array([[u[0], u[1], u[2], 0], 
171 |                         [v[0], v[1], v[2], 0], 
172 |                         [w[0], w[1], w[2], 0], 
173 |                         [0, 0, 0, 1]]).astype(np.float32)
174 |     return np.matmul(rotate, translate)
175 | 
176 | def random_rotation_translation(t):
177 |     m = np.random.normal(size=[3, 3])
178 |     m[1] = np.cross(m[0], m[2])
179 |     m[2] = np.cross(m[0], m[1])
180 |     m = m / np.linalg.norm(m, axis=1, keepdims=True)
181 |     m = np.pad(m, [[0, 1], [0, 1]], mode='constant')
182 |     m[3, 3] = 1.0
183 |     m[:3, 3] = np.random.uniform(-t, t, size=[3])
184 |     return m
185 | 
186 | 
187 | #----------------------------------------------------------------------------
188 | # Cosine sample around a vector N
189 | #----------------------------------------------------------------------------
190 | def cosine_sample(N : np.ndarray) -> np.ndarray:
191 |     # construct local frame
192 |     N = N/np.linalg.norm(N)
193 | 
194 |     dx0 = np.array([0, N[2], -N[1]])
195 |     dx1 = np.array([-N[2], 0, N[0]])
196 | 
197 |     dx = dx0 if np.dot(dx0,dx0) > np.dot(dx1,dx1) else dx1
198 |     dx = dx/np.linalg.norm(dx)
199 |     dy = np.cross(N,dx)
200 |     dy = dy/np.linalg.norm(dy)
201 | 
202 |     # cosine sampling in local frame
203 |     phi = 2.0*np.pi*np.random.uniform()
204 |     s = np.random.uniform()
205 |     costheta = np.sqrt(s)
206 |     sintheta = np.sqrt(1.0 - s)
207 | 
208 |     # cartesian vector in local space
209 |     x = np.cos(phi)*sintheta
210 |     y = np.sin(phi)*sintheta
211 |     z = costheta
212 | 
213 |     # local to world
214 |     return dx*x + dy*y + N*z
215 | 
216 | 
217 | #----------------------------------------------------------------------------
218 | # Cosine sampled light directions around the vector N
219 | #----------------------------------------------------------------------------
220 | def cosine_sample_texture(res, N : np.ndarray) -> torch.Tensor:
221 |     # construct local frame
222 |     N = N/np.linalg.norm(N)
223 | 
224 |     dx0 = np.array([0, N[2], -N[1]])
225 |     dx1 = np.array([-N[2], 0, N[0]])
226 | 
227 |     dx = dx0 if np.dot(dx0,dx0) > np.dot(dx1,dx1) else dx1
228 |     dx = dx/np.linalg.norm(dx)
229 |     dy = np.cross(N,dx)
230 |     dy = dy/np.linalg.norm(dy)
231 | 
232 |     X = torch.tensor(dx, dtype=torch.float32, device='cuda')
233 |     Y = torch.tensor(dy, dtype=torch.float32, device='cuda')
234 |     Z = torch.tensor(N, dtype=torch.float32, device='cuda')
235 | 
236 |     # cosine sampling in local frame
237 | 
238 |     phi = 2.0*np.pi*torch.rand(res, res, 1, dtype=torch.float32, device='cuda')
239 |     s = torch.rand(res, res, 1, dtype=torch.float32, device='cuda')
240 |     costheta = torch.sqrt(s)
241 |     sintheta = torch.sqrt(1.0 - s)
242 | 
243 |     # cartesian vector in local space
244 |     x = torch.cos(phi)*sintheta
245 |     y = torch.sin(phi)*sintheta
246 |     z = costheta
247 | 
248 |     # local to world
249 |     return X*x + Y*y + Z*z
250 | 
251 | #----------------------------------------------------------------------------
252 | # Bilinear downsample by 2x.
253 | #----------------------------------------------------------------------------
254 | 
255 | def bilinear_downsample(x : torch.tensor) -> torch.Tensor:
256 |     w = torch.tensor([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=torch.float32, device=x.device) / 64.0
257 |     w = w.expand(x.shape[-1], 1, 4, 4) 
258 |     x = torch.nn.functional.conv2d(x.permute(0, 3, 1, 2), w, padding=1, stride=2, groups=x.shape[-1])
259 |     return x.permute(0, 2, 3, 1)
260 | 
261 | #----------------------------------------------------------------------------
262 | # Bilinear downsample log(spp) steps
263 | #----------------------------------------------------------------------------
264 | 
265 | def bilinear_downsample(x : torch.tensor, spp) -> torch.Tensor:
266 |     w = torch.tensor([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=torch.float32, device=x.device) / 64.0
267 |     g = x.shape[-1]
268 |     w = w.expand(g, 1, 4, 4) 
269 |     x = x.permute(0, 3, 1, 2) # NHWC -> NCHW
270 |     steps = int(np.log2(spp))
271 |     for _ in range(steps):
272 |         xp = torch.nn.functional.pad(x, (1,1,1,1), mode='replicate')
273 |         x = torch.nn.functional.conv2d(xp, w, padding=0, stride=2, groups=g)
274 |     return x.permute(0, 2, 3, 1).contiguous() # NCHW -> NHWC
275 | 
276 | 
277 | #----------------------------------------------------------------------------
278 | # Image display function using OpenGL.
279 | #----------------------------------------------------------------------------
280 | 
281 | _glfw_window = None
282 | def display_image(image, zoom=None, size=None, title=None): # HWC
283 |     # Import OpenGL and glfw.
284 |     import OpenGL.GL as gl
285 |     import glfw
286 | 
287 |     # Zoom image if requested.
288 |     image = np.asarray(image)
289 |     if size is not None:
290 |         assert zoom is None
291 |         zoom = max(1, size // image.shape[0])
292 |     if zoom is not None:
293 |         image = image.repeat(zoom, axis=0).repeat(zoom, axis=1)
294 |     height, width, channels = image.shape
295 | 
296 |     # Initialize window.
297 |     if title is None:
298 |         title = 'Debug window'
299 |     global _glfw_window
300 |     if _glfw_window is None:
301 |         glfw.init()
302 |         _glfw_window = glfw.create_window(width, height, title, None, None)
303 |         glfw.make_context_current(_glfw_window)
304 |         glfw.show_window(_glfw_window)
305 |         glfw.swap_interval(0)
306 |     else:
307 |         glfw.make_context_current(_glfw_window)
308 |         glfw.set_window_title(_glfw_window, title)
309 |         glfw.set_window_size(_glfw_window, width, height)
310 | 
311 |     # Update window.
312 |     glfw.poll_events()
313 |     gl.glClearColor(0, 0, 0, 1)
314 |     gl.glClear(gl.GL_COLOR_BUFFER_BIT)
315 |     gl.glWindowPos2f(0, 0)
316 |     gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1)
317 |     gl_format = {3: gl.GL_RGB, 2: gl.GL_RG, 1: gl.GL_LUMINANCE}[channels]
318 |     gl_dtype = {'uint8': gl.GL_UNSIGNED_BYTE, 'float32': gl.GL_FLOAT}[image.dtype.name]
319 |     gl.glDrawPixels(width, height, gl_format, gl_dtype, image[::-1])
320 |     glfw.swap_buffers(_glfw_window)
321 |     if glfw.window_should_close(_glfw_window):
322 |         return False
323 |     return True
324 | 
325 | #----------------------------------------------------------------------------
326 | # Image save helper.
327 | #----------------------------------------------------------------------------
328 | 
329 | def save_image(fn, x : np.ndarray) -> np.ndarray:
330 |     imageio.imwrite(fn, np.clip(np.rint(x * 255.0), 0, 255).astype(np.uint8))
331 | 
332 | def load_image(fn) -> np.ndarray:
333 |     img = imageio.imread(fn)
334 |     if img.dtype == np.float32: # HDR image
335 |         return img
336 |     else: # LDR image
337 |         return img.astype(np.float32) / 255
338 | 
339 | #----------------------------------------------------------------------------
340 | 
341 | def time_to_text(x):
342 |     if x > 3600:
343 |         return "%.2f h" % (x / 3600)
344 |     elif x > 60:
345 |         return "%.2f m" % (x / 60)
346 |     else:
347 |         return "%.2f s" % x
348 | 
349 | #----------------------------------------------------------------------------
350 | 
351 | def checkerboard(width, repetitions) -> np.ndarray:
352 |     tilesize = int(width//repetitions//2)
353 |     check = np.kron([[1, 0] * repetitions, [0, 1] * repetitions] * repetitions, np.ones((tilesize, tilesize)))*0.33 + 0.33
354 |     return np.stack((check, check, check), axis=-1)[None, ...]
355 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | clip @ git+https://github.com/openai/CLIP.git@b46f5ac7587d2e1862f8b7b1573179d80dcdd620
 2 | imageio
 3 | cython
 4 | imageio-ffmpeg
 5 | kornia
 6 | numpy
 7 | nvdiffrast @ git+https://github.com/NVlabs/nvdiffrast.git@78528e683210dfaa1be57e3c65aa37d3b36c6644
 8 | Pillow
 9 | PyGLM
10 | resize-right
11 | scipy
12 | smplx
13 | tqdm
14 | Ninja
15 | pyyaml
16 | matplotlib>=3.3.0
17 | trimesh
18 | tensorboard
19 | easydict 
20 | cholespy 
21 | fire
22 | torch-scatter 
23 | torch-sparse
24 | -f https://data.pyg.org/whl/torch-1.13.1+11.7.html
25 | pymeshlab


--------------------------------------------------------------------------------
/utilities/camera.py:
--------------------------------------------------------------------------------
  1 | import glm
  2 | import torch
  3 | import random
  4 | 
  5 | import numpy as np
  6 | import torchvision.transforms as transforms
  7 | 
  8 | from .resize_right import resize
  9 | 
 10 | blurs = [
 11 |     transforms.Compose([
 12 |         transforms.GaussianBlur(11, sigma=(5, 5))
 13 |     ]),
 14 |     transforms.Compose([
 15 |         transforms.GaussianBlur(11, sigma=(2, 2))
 16 |     ]),
 17 |     transforms.Compose([
 18 |         transforms.GaussianBlur(5, sigma=(5, 5))
 19 |     ]),
 20 |     transforms.Compose([
 21 |         transforms.GaussianBlur(5, sigma=(2, 2))
 22 |     ]),
 23 | ]
 24 | 
 25 | def get_random_bg(h, w, rand_solid=False):
 26 | 
 27 |         p = torch.rand(1)
 28 | 
 29 |         if p > 0.66666:
 30 |             if rand_solid:
 31 |                 background = torch.vstack([
 32 |                     torch.full( (1, h, w), torch.rand(1).item()),
 33 |                     torch.full( (1, h, w), torch.rand(1).item()),
 34 |                     torch.full( (1, h, w), torch.rand(1).item()),
 35 |                 ]).unsqueeze(0) + torch.rand(1, 3, h, w)
 36 |                 background = ((background - background.amin()) / (background.amax() - background.amin()))
 37 |                 background = blurs[random.randint(0, 3)](background).permute(0, 2, 3, 1)
 38 |             else:
 39 |                 background =  blurs[random.randint(0, 3)]( torch.rand((1, 3, h, w)) ).permute(0, 2, 3, 1)
 40 |         elif p > 0.333333:
 41 |             size = random.randint(5, 10)
 42 |             background = torch.vstack([
 43 |                 torch.full( (1, size, size), torch.rand(1).item() / 2),
 44 |                 torch.full( (1, size, size), torch.rand(1).item() / 2 ),
 45 |                 torch.full( (1, size, size), torch.rand(1).item() / 2 ),
 46 |             ]).unsqueeze(0)
 47 | 
 48 |             second = torch.rand(3)
 49 | 
 50 |             background[:, 0, ::2, ::2] = second[0]
 51 |             background[:, 1, ::2, ::2] = second[1]
 52 |             background[:, 2, ::2, ::2] = second[2]
 53 | 
 54 |             background[:, 0, 1::2, 1::2] = second[0]
 55 |             background[:, 1, 1::2, 1::2] = second[1]
 56 |             background[:, 2, 1::2, 1::2] = second[2]
 57 | 
 58 |             background = blurs[random.randint(0, 3)]( resize(background, out_shape=(h, w)) )
 59 | 
 60 |             background = background.permute(0, 2, 3, 1)
 61 | 
 62 |         else:
 63 |             background = torch.vstack([
 64 |                 torch.full( (1, h, w), torch.rand(1).item()),
 65 |                 torch.full( (1, h, w), torch.rand(1).item()),
 66 |                 torch.full( (1, h, w), torch.rand(1).item()),
 67 |             ]).unsqueeze(0).permute(0, 2, 3, 1)
 68 | 
 69 |         return background
 70 | 
 71 | def cosine_sample(N : np.ndarray) -> np.ndarray:
 72 |     """
 73 |     #----------------------------------------------------------------------------
 74 |     # Cosine sample around a vector N
 75 |     #----------------------------------------------------------------------------
 76 | 
 77 |     Copied from nvdiffmodelling
 78 | 
 79 |     """
 80 |     # construct local frame
 81 |     N = N/np.linalg.norm(N)
 82 | 
 83 |     dx0 = np.array([0, N[2], -N[1]])
 84 |     dx1 = np.array([-N[2], 0, N[0]])
 85 | 
 86 |     dx = dx0 if np.dot(dx0,dx0) > np.dot(dx1,dx1) else dx1
 87 |     dx = dx/np.linalg.norm(dx)
 88 |     dy = np.cross(N,dx)
 89 |     dy = dy/np.linalg.norm(dy)
 90 | 
 91 |     # cosine sampling in local frame
 92 |     phi = 2.0*np.pi*np.random.uniform()
 93 |     s = np.random.uniform()
 94 |     costheta = np.sqrt(s)
 95 |     sintheta = np.sqrt(1.0 - s)
 96 | 
 97 |     # cartesian vector in local space
 98 |     x = np.cos(phi)*sintheta
 99 |     y = np.sin(phi)*sintheta
100 |     z = costheta
101 | 
102 |     # local to world
103 |     return dx*x + dy*y + N*z
104 | 
105 | def persp_proj(fov_x=45, ar=1, near=1.0, far=50.0):
106 |     """
107 |     From https://github.com/rgl-epfl/large-steps-pytorch by @bathal1 (Baptiste Nicolet)
108 | 
109 |     Build a perspective projection matrix.
110 |     Parameters
111 |     ----------
112 |     fov_x : float
113 |         Horizontal field of view (in degrees).
114 |     ar : float
115 |         Aspect ratio (w/h).
116 |     near : float
117 |         Depth of the near plane relative to the camera.
118 |     far : float
119 |         Depth of the far plane relative to the camera.
120 |     """
121 |     fov_rad = np.deg2rad(fov_x)
122 | 
123 |     tanhalffov = np.tan( (fov_rad / 2) )
124 |     max_y = tanhalffov * near
125 |     min_y = -max_y
126 |     max_x = max_y * ar
127 |     min_x = -max_x
128 | 
129 |     z_sign = -1.0
130 |     proj_mat = np.array([[0, 0, 0, 0],
131 |                         [0, 0, 0, 0],
132 |                         [0, 0, 0, 0],
133 |                         [0, 0, 0, 0]])
134 | 
135 |     proj_mat[0, 0] = 2.0 * near / (max_x - min_x)
136 |     proj_mat[1, 1] = 2.0 * near / (max_y - min_y)
137 |     proj_mat[0, 2] = (max_x + min_x) / (max_x - min_x)
138 |     proj_mat[1, 2] = (max_y + min_y) / (max_y - min_y)
139 |     proj_mat[3, 2] = z_sign
140 | 
141 |     proj_mat[2, 2] = z_sign * far / (far - near)
142 |     proj_mat[2, 3] = -(far * near) / (far - near)
143 |     
144 |     return proj_mat
145 | 
146 | def get_camera_params(elev_angle, azim_angle, distance, resolution, fov=60, look_at=[0, 0, 0], up=[0, -1, 0]):
147 |     
148 |     elev = np.radians( elev_angle )
149 |     azim = np.radians( azim_angle ) 
150 |     
151 |     # Generate random view
152 |     cam_z = distance * np.cos(elev) * np.sin(azim)
153 |     cam_y = distance * np.sin(elev)
154 |     cam_x = distance * np.cos(elev) * np.cos(azim)
155 | 
156 |     modl = glm.mat4()
157 |     view  = glm.lookAt(
158 |         glm.vec3(cam_x, cam_y, cam_z),
159 |         glm.vec3(look_at[0], look_at[1], look_at[2]),
160 |         glm.vec3(up[0], up[1], up[2]),
161 |     )
162 | 
163 |     a_mv = view * modl
164 |     a_mv = np.array(a_mv.to_list()).T
165 |     proj_mtx = persp_proj(fov)
166 |     
167 |     a_mvp = np.matmul(proj_mtx, a_mv).astype(np.float32)[None, ...]
168 |     
169 |     a_lightpos = np.linalg.inv(a_mv)[None, :3, 3]
170 |     a_campos = a_lightpos
171 | 
172 |     return {
173 |         'mvp' : a_mvp,
174 |         'lightpos' : a_lightpos,
175 |         'campos' : a_campos,
176 |         'resolution' : [resolution, resolution], 
177 |         }
178 | 
179 | # Returns a batch of camera parameters
180 | class CameraBatch(torch.utils.data.Dataset):
181 |     def __init__(
182 |         self,
183 |         image_resolution,
184 |         distances,
185 |         azimuths,
186 |         elevation_params,
187 |         fovs,
188 |         aug_loc, 
189 |         aug_light,
190 |         aug_bkg,
191 |         bs,
192 |         look_at=[0, 0, 0], up=[0, -1, 0],
193 |         rand_solid=False
194 |     ):
195 | 
196 |         self.res = image_resolution
197 | 
198 |         self.dist_min = distances[0]
199 |         self.dist_max = distances[1]
200 | 
201 |         self.azim_min = azimuths[0]
202 |         self.azim_max = azimuths[1]
203 | 
204 |         self.fov_min = fovs[0]
205 |         self.fov_max = fovs[1]
206 |         
207 |         self.elev_alpha = elevation_params[0]
208 |         self.elev_beta  = elevation_params[1]
209 |         self.elev_max   = elevation_params[2]
210 | 
211 |         self.aug_loc   = aug_loc
212 |         self.aug_light = aug_light
213 |         self.aug_bkg   = aug_bkg
214 | 
215 |         self.look_at = look_at
216 |         self.up = up
217 | 
218 |         self.batch_size = bs
219 |         self.rand_solid = rand_solid
220 | 
221 |     def __len__(self):
222 |         return self.batch_size
223 |         
224 |     def __getitem__(self, index):
225 | 
226 |         elev = np.radians( np.random.beta( self.elev_alpha, self.elev_beta ) * self.elev_max )
227 |         azim = np.radians( np.random.uniform( self.azim_min, self.azim_max+1.0 ) )
228 |         dist = np.random.uniform( self.dist_min, self.dist_max )
229 |         fov = np.random.uniform( self.fov_min, self.fov_max )
230 |         
231 |         proj_mtx = persp_proj(fov)
232 |         
233 |         # Generate random view
234 |         cam_z = dist * np.cos(elev) * np.sin(azim)
235 |         cam_y = dist * np.sin(elev)
236 |         cam_x = dist * np.cos(elev) * np.cos(azim)
237 |         
238 |         if self.aug_loc:
239 | 
240 |             # Random offset
241 |             limit  = self.dist_min // 2
242 |             rand_x = np.random.uniform( -limit, limit )
243 |             rand_y = np.random.uniform( -limit, limit )
244 | 
245 |             modl = glm.translate(glm.mat4(), glm.vec3(rand_x, rand_y, 0))
246 | 
247 |         else:
248 |         
249 |             modl = glm.mat4()
250 |             
251 |         view  = glm.lookAt(
252 |             glm.vec3(cam_x, cam_y, cam_z),
253 |             glm.vec3(self.look_at[0], self.look_at[1], self.look_at[2]),
254 |             glm.vec3(self.up[0], self.up[1], self.up[2]),
255 |         )
256 | 
257 |         r_mv = view * modl
258 |         r_mv = np.array(r_mv.to_list()).T
259 | 
260 |         mvp     = np.matmul(proj_mtx, r_mv).astype(np.float32)
261 |         campos  = np.linalg.inv(r_mv)[:3, 3]
262 | 
263 |         if self.aug_light:
264 |             lightpos = cosine_sample(campos)*dist
265 |         else:
266 |             lightpos = campos*dist
267 | 
268 |         if self.aug_bkg:
269 |             bkgs = get_random_bg(self.res, self.res, self.rand_solid).squeeze(0)
270 |         else:
271 |             bkgs = torch.ones(self.res, self.res, 3)
272 | 
273 |         return {
274 |             'mvp': torch.from_numpy( mvp ).float(),
275 |             'lightpos': torch.from_numpy( lightpos ).float(),
276 |             'campos': torch.from_numpy( campos ).float(),
277 |             'bkgs': bkgs,
278 |             'azim': torch.tensor(azim).float(),
279 |             'elev': torch.tensor(elev).float(),
280 |         }
281 | 
282 | class ListCameraBatch(torch.utils.data.Dataset):
283 |     def __init__(self, datasets, bs, weights=None):
284 |         self.datasets = datasets
285 |         self.batch_size = bs
286 |         self.weights = weights
287 |     def __len__(self):
288 |         return self.batch_size
289 |     def __getitem__(self, index):
290 |         d = random.choices(self.datasets, weights=self.weights)[0]
291 |         return d[index]


--------------------------------------------------------------------------------
/utilities/clip_spatial.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import math
  3 | import types
  4 | import typing
  5 | 
  6 | import clip
  7 | import torch
  8 | import torch.nn as nn
  9 | from torchvision import models, transforms
 10 | 
 11 | # code lifted from CLIPasso
 12 | 
 13 | # For ViT
 14 | class CLIPVisualEncoder(nn.Module):
 15 |     def __init__(self, model_name, stride, device):
 16 |         super().__init__()
 17 |         self.load_model(model_name, device)
 18 |         self.old_stride = self.model.conv1.stride[0]
 19 |         self.new_stride = stride
 20 |         self.patch_vit_resolution(stride)
 21 | 
 22 |         for i in range(12):  # 12 resblocks in VIT visual transformer
 23 |             self.model.transformer.resblocks[i].register_forward_hook(
 24 |                 self.make_hook(i))
 25 | 
 26 |     
 27 |     def load_model(self, model_name, device):
 28 |         model, preprocess = clip.load(model_name, device=device)
 29 |         self.model = model.visual
 30 |         self.mean = torch.tensor(preprocess.transforms[-1].mean, device=device)
 31 |         self.std = torch.tensor(preprocess.transforms[-1].std, device=device)
 32 | 
 33 |     @staticmethod
 34 |     def _fix_pos_enc(patch_size: int, stride_hw: typing.Tuple[int, int]):
 35 |         def interpolate_pos_encoding(self, x, w, h):
 36 |             npatch = x.shape[1] - 1
 37 |             N = self.positional_embedding.shape[0] - 1
 38 |             if npatch == N and w == h:
 39 |                 return self.positional_embedding
 40 |             class_pos_embed = self.positional_embedding[:1].type(x.dtype)
 41 |             patch_pos_embed = self.positional_embedding[1:].type(x.dtype)
 42 |             dim = x.shape[-1]
 43 |             w0 = 1 + (w - patch_size) // stride_hw[1]
 44 |             h0 = 1 + (h - patch_size) // stride_hw[0]
 45 |             assert (w0 * h0 == npatch)
 46 |             w0, h0 = w0 + 0.1, h0 + 0.1
 47 |             patch_pos_embed = torch.nn.functional.interpolate(
 48 |                 patch_pos_embed.reshape(int(math.sqrt(N)), int(math.sqrt(N)), dim).permute(2, 0, 1).unsqueeze(0),
 49 |                 scale_factor=(w0 / math.sqrt(N), h0 / math.sqrt(N)),
 50 |                 mode='bicubic',
 51 |                 align_corners=False, recompute_scale_factor=False,
 52 |             ).squeeze()
 53 |             assert int(w0) == patch_pos_embed.shape[-2] and int(h0) == patch_pos_embed.shape[-1]
 54 |             patch_pos_embed = patch_pos_embed.permute(1, 2, 0).view(1, -1, dim)
 55 |             return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1)
 56 |         return interpolate_pos_encoding
 57 | 
 58 |     
 59 |     def patch_vit_resolution(self, stride):
 60 |         patch_size = self.model.conv1.stride[0]
 61 |         if stride == patch_size:
 62 |             return
 63 |         
 64 |         stride = (stride, stride)
 65 |         assert all([(patch_size // s_) * s_ == patch_size for s_ in stride])
 66 |         self.model.conv1.stride = stride
 67 |         self.model.interpolate_pos_encoding = types.MethodType(CLIPVisualEncoder._fix_pos_enc(patch_size, stride), self.model)
 68 | 
 69 |     @property
 70 |     def dtype(self):
 71 |         return self.model.conv1.weight.dtype
 72 | 
 73 |     def make_hook(self, name):
 74 |         def hook(module, input, output):
 75 |             if len(output.shape) == 3:
 76 |                 self.featuremaps[name] = output.permute(
 77 |                     1, 0, 2)  # LND -> NLD bs, smth, 768
 78 |             else:
 79 |                 self.featuremaps[name] = output
 80 | 
 81 |         return hook
 82 | 
 83 |     def forward(self, x, preprocess=False):
 84 |         self.featuremaps = collections.OrderedDict()
 85 |         if preprocess:
 86 |             x = (x - self.mean[None, :, None, None]) / self.std[None, :, None, None]
 87 |         B, C, W, H = x.shape
 88 |         x = self.model.conv1(x.type(self.dtype))  # shape = [*, width, grid, grid]
 89 |         x = x.reshape(x.shape[0], x.shape[1], -1)  # shape = [*, width, grid ** 2]
 90 |         x = x.permute(0, 2, 1)  # shape = [*, grid ** 2, width]
 91 |         x = torch.cat([self.model.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device), x], dim=1)  # shape = [*, grid ** 2 + 1, width]
 92 |         x = x + self.model.interpolate_pos_encoding(x, W, H)
 93 |         x = self.model.ln_pre(x)
 94 |         x = x.permute(1, 0, 2)
 95 |         x = self.model.transformer(x)
 96 |         # remove cls
 97 |         featuremaps = [self.featuremaps[k].permute(0, 2, 1)[..., 1:] for k in range(12)]
 98 | 
 99 |         return featuremaps
100 | 


--------------------------------------------------------------------------------
/utilities/helpers.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     Various helper functions
  3 | 
  4 |     create_scene -> combines multiple nvdiffmodeling meshes in to a single mesh with mega texture
  5 | """
  6 | import sys
  7 | import numpy as np
  8 | import torch
  9 | 
 10 | from math import ceil
 11 | 
 12 | sys.path.append("../nvdiffmodeling")
 13 | 
 14 | import nvdiffmodeling.src.mesh as mesh
 15 | import nvdiffmodeling.src.texture as texture
 16 | import nvdiffmodeling.src.renderutils as ru
 17 | 
 18 | cosine_sim = torch.nn.CosineSimilarity()
 19 | 
 20 | def cosine_sum(features, targets):
 21 |     return -cosine_sim(features, targets).sum()
 22 | 
 23 | def cosine_avg(features, targets):
 24 |     return -cosine_sim(features, targets).mean()
 25 |     
 26 | def _merge_attr_idx(a, b, a_idx, b_idx, scale_a=1.0, scale_b=1.0, add_a=0.0, add_b=0.0):
 27 |     if a is None and b is None:
 28 |         return None, None
 29 |     elif a is not None and b is None:
 30 |         return (a*scale_a)+add_a, a_idx
 31 |     elif a is None and b is not None:
 32 |         return (b*scale_b)+add_b, b_idx
 33 |     else:
 34 |         return torch.cat(((a*scale_a)+add_a, (b*scale_b)+add_b), dim=0), torch.cat((a_idx, b_idx + a.shape[0]), dim=0)
 35 | 
 36 | def create_scene(meshes, sz=1024):
 37 |     
 38 |     # Need to comment and fix code
 39 |     
 40 |     scene = mesh.Mesh()
 41 | 
 42 |     tot = len(meshes) if len(meshes) % 2 == 0 else len(meshes)+1
 43 | 
 44 |     nx = 2
 45 |     ny = ceil(tot / 2) if ceil(tot / 2) % 2 == 0 else ceil(tot / 2) + 1
 46 | 
 47 |     w = int(sz*ny)
 48 |     h = int(sz*nx)
 49 | 
 50 |     dev = meshes[0].v_pos.device
 51 | 
 52 |     kd_atlas = torch.ones ( (1, w, h, 4) ).to(dev)
 53 |     ks_atlas = torch.zeros( (1, w, h, 3) ).to(dev)
 54 |     kn_atlas = torch.ones ( (1, w, h, 3) ).to(dev)
 55 | 
 56 |     for i, m in enumerate(meshes):
 57 |         v_pos, t_pos_idx = _merge_attr_idx(scene.v_pos, m.v_pos, scene.t_pos_idx, m.t_pos_idx)
 58 |         v_nrm, t_nrm_idx = _merge_attr_idx(scene.v_nrm, m.v_nrm, scene.t_nrm_idx, m.t_nrm_idx)
 59 |         v_tng, t_tng_idx = _merge_attr_idx(scene.v_tng, m.v_tng, scene.t_tng_idx, m.t_tng_idx)
 60 | 
 61 |         pos_x = i % nx
 62 |         pos_y = int(i / ny)
 63 | 
 64 |         sc_x = 1./nx
 65 |         sc_y = 1./ny
 66 | 
 67 |         v_tex, t_tex_idx = _merge_attr_idx(
 68 |             scene.v_tex,
 69 |             m.v_tex,
 70 |             scene.t_tex_idx,
 71 |             m.t_tex_idx,
 72 |             scale_a=1.,
 73 |             scale_b=torch.tensor([sc_x, sc_y]).to(dev),
 74 |             add_a=0.,
 75 |             add_b=torch.tensor([sc_x*pos_x, sc_y*pos_y]).to(dev)
 76 |         )
 77 | 
 78 |         kd_atlas[:, pos_y*sz:(pos_y*sz)+sz, pos_x*sz:(pos_x*sz)+sz, :m.material['kd'].data.shape[-1]] = m.material['kd'].data
 79 |         ks_atlas[:, pos_y*sz:(pos_y*sz)+sz, pos_x*sz:(pos_x*sz)+sz, :m.material['ks'].data.shape[-1]] = m.material['ks'].data
 80 |         kn_atlas[:, pos_y*sz:(pos_y*sz)+sz, pos_x*sz:(pos_x*sz)+sz, :m.material['normal'].data.shape[-1]] = m.material['normal'].data
 81 | 
 82 |         scene = mesh.Mesh(
 83 |             v_pos=v_pos,
 84 |             t_pos_idx=t_pos_idx,
 85 |             v_nrm=v_nrm,
 86 |             t_nrm_idx=t_nrm_idx,
 87 |             v_tng=v_tng,
 88 |             t_tng_idx=t_tng_idx,
 89 |             v_tex=v_tex,
 90 |             t_tex_idx=t_tex_idx,
 91 |             base=scene 
 92 |         )
 93 | 
 94 |     scene = mesh.Mesh(
 95 |         material={
 96 |             'bsdf': 'diffuse',
 97 |             'kd': texture.Texture2D(
 98 |                 kd_atlas
 99 |             ),
100 |             'ks': texture.Texture2D(
101 |                 ks_atlas
102 |             ),
103 |             'normal': texture.Texture2D(
104 |                 kn_atlas
105 |             ),
106 |         },
107 |         base=scene # gets uvs etc from here
108 |     )
109 | 
110 |     return scene
111 | 
112 | def get_vp_map(v_pos, mtx_in, resolution):
113 |     device = v_pos.device
114 |     with torch.no_grad():
115 |         vp_mtx = torch.tensor([
116 |             [resolution / 2, 0., 0., (resolution - 1) / 2],
117 |             [0., resolution / 2, 0., (resolution - 1) / 2],
118 |             [0., 0., 1., 0.],
119 |             [0., 0., 0., 1.,]
120 |         ], device=device)
121 | 
122 |         v_pos_clip = ru.xfm_points(v_pos[None, ...], mtx_in)
123 |         v_pos_div = v_pos_clip / v_pos_clip[..., -1:]
124 | 
125 |         v_vp = (vp_mtx @ v_pos_div.transpose(1, 2)).transpose(1, 2)[..., :-1]
126 | 
127 |         # don't need manual z-buffer here since we're using the rast map to do occlusion
128 |         if False:
129 |             v_pix = v_vp[..., :-1].int().cpu().numpy()
130 |             v_depth = v_vp[..., -1].cpu().numpy()
131 | 
132 |             # pix_v_map = -torch.ones(len(v_pix), resolution, resolution, dtype=int)
133 |             pix_v_map = -np.ones((len(v_pix), resolution, resolution), dtype=int)
134 |             # v_pix_map = resolution * torch.ones(len(v_pix), len(v_pos), 2, dtype=int)
135 |             v_pix_map = resolution * np.ones_like(v_pix, dtype=int)
136 |             # buffer = torch.ones_like(pix_v_map) / 0
137 |             buffer = -np.ones_like(pix_v_map) / 0
138 |             for i, vs in enumerate(v_pix):
139 |                 for j, (y, x) in enumerate(vs):
140 |                     if x < 0 or x > resolution - 1 or y < 0 or y > resolution - 1:
141 |                         continue
142 |                     else:
143 |                         if v_depth[i, j] > buffer[i, x, y]:
144 |                             buffer[i, x, y] = v_depth[i, j]
145 |                             if pix_v_map[i, x, y] != -1:
146 |                                 v_pix_map[i, pix_v_map[i, x, y]] = np.array([resolution, resolution])
147 |                             pix_v_map[i, x, y] = j
148 |                             v_pix_map[i, j] = np.array([x, y])
149 |             v_pix_map = torch.tensor(v_pix_map, device=device)
150 |         v_pix_map = v_vp[..., :-1].int().flip([-1])
151 |         v_pix_map [(v_pix_map > resolution - 1) | (v_pix_map < 0)] = resolution
152 |     return v_pix_map.long()
153 | 
154 | 
155 | 
156 | 
157 |     


--------------------------------------------------------------------------------
/utilities/video.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Helper class to create and add images to video
 3 | """
 4 | import imageio
 5 | import numpy as np
 6 | 
 7 | class Video():
 8 |     def __init__(self, path, name='video_log.mp4', mode='I', fps=30, codec='libx264', bitrate='16M') -> None:
 9 |         
10 |         if path[-1] != "/":
11 |             path += "/"
12 |             
13 |         self.writer = imageio.get_writer(path+name, mode=mode, fps=fps, codec=codec, bitrate=bitrate)
14 |     
15 |     def ready_image(self, image, write_video=True):
16 |         # assuming channels last - as renderer returns it
17 |         if len(image.shape) == 4: 
18 |             image = image.squeeze(0)[..., :3].detach().cpu().numpy()
19 |         else:
20 |             image = image[..., :3].detach().cpu().numpy()
21 | 
22 |         image = np.clip(np.rint(image*255.0), 0, 255).astype(np.uint8)
23 | 
24 |         if write_video:
25 |             self.writer.append_data(image)
26 | 
27 |         return image
28 | 
29 |     def close(self):
30 |         self.writer.close()


--------------------------------------------------------------------------------