├── .gitignore ├── LICENSE ├── NeuralJacobianFields ├── MeshProcessor.py ├── PoissonSystem.py └── SourceMesh.py ├── README.md ├── example_config.yml ├── images ├── bad_orientation.png ├── good_orientation.png ├── hand_to_octopus.gif ├── planck_to_einstein.gif └── spot_to_giraffe.gif ├── loop.py ├── main.py ├── meshes ├── alien.obj ├── bird.obj ├── chair1.obj ├── chair2.obj ├── donkey.obj ├── eiffel_tower.obj ├── fish.obj ├── guitar.obj ├── hand.obj ├── max_planck.obj ├── shoe1.obj ├── shoe2.obj ├── spot.mtl ├── spot.obj ├── vase1.obj ├── vase2.obj └── vase3.obj ├── nvdiffmodeling ├── LICENSE.txt └── src │ ├── material.py │ ├── mesh.py │ ├── obj.py │ ├── regularizer.py │ ├── render.py │ ├── renderutils │ ├── __init__.py │ ├── bsdf.py │ ├── c_src │ │ ├── bsdf.cu │ │ ├── bsdf.h │ │ ├── common.cpp │ │ ├── common.h │ │ ├── loss.cu │ │ ├── loss.h │ │ ├── mesh.cu │ │ ├── mesh.h │ │ ├── normal.cu │ │ ├── normal.h │ │ ├── tensor.h │ │ ├── torch_bindings.cpp │ │ ├── vec3f.h │ │ └── vec4f.h │ ├── loss.py │ ├── ops.py │ └── tests │ │ ├── test_bsdf.py │ │ ├── test_loss.py │ │ ├── test_mesh.py │ │ └── test_perf.py │ ├── texture.py │ └── util.py ├── requirements.txt └── utilities ├── camera.py ├── clip_spatial.py ├── helpers.py ├── resize_right.py └── video.py /.gitignore: -------------------------------------------------------------------------------- 1 | outputs/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 AnonGit11 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NeuralJacobianFields/SourceMesh.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | import torch 5 | import igl 6 | from . import MeshProcessor 7 | WKS_DIM = MeshProcessor.WKS_DIM 8 | WKS_FACTOR = 1000 9 | import numpy as np 10 | import sys 11 | import random 12 | import time 13 | class SourceMesh: 14 | ''' 15 | datastructure for the source mesh to be mapped 16 | ''' 17 | 18 | def __init__(self, source_ind, source_dir, extra_source_fields, 19 | random_scale, ttype, use_wks=False, random_centering=False, 20 | cpuonly=False): 21 | self.__use_wks = use_wks 22 | self.source_ind = source_ind 23 | self.source_dir = source_dir 24 | self.centroids_and_normals = None 25 | self.center_source = True 26 | self.poisson = None 27 | self.__source_global_translation_to_original = 0 28 | self.__extra_keys = extra_source_fields 29 | self.__loaded_data = {} 30 | self.__ttype = ttype 31 | self.__random_scale = random_scale 32 | self.random_centering = random_centering 33 | self.source_mesh_centroid = None 34 | self.mesh_processor = None 35 | self.cpuonly = cpuonly 36 | 37 | def get_vertices(self): 38 | return self.source_vertices 39 | 40 | def get_global_translation_to_original(self): 41 | return self.__source_global_translation_to_original 42 | 43 | def vertices_from_jacobians(self, d): 44 | return self.poisson.solve_poisson(d) 45 | 46 | def jacobians_from_vertices(self, v): 47 | return self.poisson.jacobians_from_vertices(v) 48 | 49 | def restrict_jacobians(self, J): 50 | return self.poisson.restrict_jacobians(J) 51 | 52 | def get_loaded_data(self, key: str): 53 | 54 | return self.__loaded_data.get(key) 55 | 56 | def get_source_triangles(self): 57 | # if self.__source_triangles is None: 58 | # self.__source_triangles = np.load(os.path.join(self.source_dir, 'faces.npy')) 59 | return self.mesh_processor.get_faces() 60 | 61 | def to(self, device): 62 | self.poisson = self.poisson.to(device) 63 | self.centroids_and_normals = self.centroids_and_normals.to(device) 64 | for key in self.__loaded_data.keys(): 65 | self.__loaded_data[key] = self.__loaded_data[key].to(device) 66 | return self 67 | 68 | def __init_from_mesh_data(self): 69 | assert self.mesh_processor is not None 70 | self.mesh_processor.prepare_differential_operators_for_use(self.__ttype) #call 1 71 | self.source_vertices = torch.from_numpy(self.mesh_processor.get_vertices()).type( 72 | self.__ttype) 73 | if self.__random_scale != 1: 74 | print("Diff ops and WKS need to be multiplied accordingly. Not implemented for now") 75 | sys.exit() 76 | self.source_vertices *= self.__random_scale 77 | 78 | bb = igl.bounding_box(self.source_vertices.numpy())[0] 79 | diag = igl.bounding_box_diagonal(self.source_vertices.numpy()) 80 | 81 | # self.source_mesh_centroid = torch.mean(self.source_vertices, axis=0) 82 | self.source_mesh_centroid = (bb[0] + bb[-1])/2 83 | if self.random_centering: 84 | # centering augmentation 85 | self.source_mesh_centroid = self.source_mesh_centroid + [(2*random.random() - 1)*diag*0.2, (2*random.random() - 1)*diag*0.2, (2*random.random() - 1)*diag*0.2] 86 | # self.source_mesh_centroid = (bb[0] + bb[-1])/2 - np.array([-0.00033245, -0.2910367 , 0.02100835]) 87 | 88 | # Load input to NJF MLP 89 | # start = time.time() 90 | centroids = self.mesh_processor.get_centroids() 91 | centroid_points_and_normals = centroids.points_and_normals 92 | if self.__use_wks: 93 | wks = WKS_FACTOR * centroids.wks 94 | centroid_points_and_normals = numpy.hstack((centroid_points_and_normals, wks)) 95 | self.centroids_and_normals = torch.from_numpy( 96 | centroid_points_and_normals).type(self.__ttype) 97 | if self.center_source: 98 | c = self.source_mesh_centroid 99 | self.centroids_and_normals[:, 0:3] -= c 100 | self.source_vertices -= c 101 | self.__source_global_translation_to_original = c 102 | self.poisson = self.mesh_processor.diff_ops.poisson_solver 103 | 104 | 105 | # Essentially here we load pointnet data and apply the same preprocessing 106 | for key in self.__extra_keys: 107 | data = self.mesh_processor.get_data(key) 108 | # if data is None: # not found in mesh data so try loading from disk 109 | # data = np.load(os.path.join(self.source_dir, key + ".npy")) 110 | data = torch.from_numpy(data) 111 | if key == 'samples': 112 | if self.center_source: 113 | data -= self.get_mesh_centroid() 114 | scale = self.__random_scale 115 | data *= scale 116 | data = data.unsqueeze(0).type(self.__ttype) 117 | 118 | self.__loaded_data[key] = data 119 | # print("Ellapsed load source mesh ", time.time() - start) 120 | 121 | def load(self, source_v=None, source_f=None): 122 | # mesh_data = SourceMeshData.SourceMeshData.meshprocessor_from_file(self.source_dir) 123 | if source_v is not None and source_f is not None: 124 | self.mesh_processor = MeshProcessor.MeshProcessor.meshprocessor_from_array(source_v,source_f, self.source_dir, self.__ttype, cpuonly=self.cpuonly, load_wks_samples=self.__use_wks, load_wks_centroids=self.__use_wks) 125 | else: 126 | if os.path.isdir(self.source_dir): 127 | self.mesh_processor = MeshProcessor.MeshProcessor.meshprocessor_from_directory(self.source_dir, self.__ttype, cpuonly=self.cpuonly, load_wks_samples=self.__use_wks, load_wks_centroids=self.__use_wks) 128 | else: 129 | self.mesh_processor = MeshProcessor.MeshProcessor.meshprocessor_from_file(self.source_dir, self.__ttype, cpuonly=self.cpuonly, load_wks_samples=self.__use_wks, load_wks_centroids=self.__use_wks) 130 | self.__init_from_mesh_data() 131 | 132 | def get_point_dim(self): 133 | return self.centroids_and_normals.shape[1] 134 | 135 | def get_centroids_and_normals(self): 136 | return self.centroids_and_normals 137 | 138 | def get_mesh_centroid(self): 139 | return self.source_mesh_centroid 140 | 141 | def pin_memory(self): 142 | # self.poisson.pin_memory() 143 | # self.centroids_and_normals.pin_memory() 144 | # self.source_vertices.pin_memory() 145 | # for key in self.__loaded_data.keys(): 146 | # self.__loaded_data[key].pin_memory() 147 | return self 148 | 149 | 150 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TextDeformer [[Project Page](https://threedle.github.io/TextDeformer/)] 2 | [![arXiv](https://img.shields.io/badge/arXiv-TextDeformer-b31b1b.svg)](https://arxiv.org/abs/2304.13348) 3 | 4 | ![alt](images/planck_to_einstein.gif) 5 | ## Installation 6 | 7 | conda create -y -n TextDeformer python=3.9 8 | conda activate TextDeformer 9 | pip3 install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113 10 | conda install -y -c conda-forge igl 11 | pip install -r requirements.txt 12 | 13 | ## Usage 14 | **NOTE:** This repository **requires** a GPU to run. 15 | 16 | ### Run examples 17 | ``main.py`` is the primary script to use. You may pass arguments using the ``--config`` flag, which takes the path to a ``.yml`` file. See ``example_config.yml`` for an example. Alternatively, you may pass command line arguments manually, which override the arguments provided by the config file. Below, we provide example usage: 18 | 19 | # Use all arguments provided by the example config 20 | python main.py --config example_config.yml 21 | 22 | # Change the optimized mesh to hand.obj, change the base and target text prompts 23 | python main.py --config example_config.yml --mesh meshes/hand.obj --text_prompt 'an octupus' --base_text_prompt 'a hand' 24 | 25 | # Now, increase the batch size, learning rate, and the training resolution 26 | python main.py --config example_config.yml --mesh meshes/hand.obj --text_prompt 'an octopus' \ 27 | --base_text_prompt 'a hand' --batch_size 50 --lr 0.005 --train_res 1024 28 | 29 | ### Outputs 30 | The outputs will be saved to the path specified in the run configuration, which is ``./outputs`` by default. The output folder will contain several folders: ``images`` contains intermittently saved samples of the rendered images passed to CLIP, ``logs`` will contain tensorboard logs of the optimization process, ``mesh_best_clip``, ``mesh_best_total``, and ``mesh_final`` contain the optimized meshes at the best CLIP score, the best total loss, and the final epoch. The configuration file is also saved at ``config.yml`` and a video of the optimization process is saved at ``video_log.mp4``. 31 | 32 | ### Common bugs 33 | #### Mesh Orientation 34 | Due to how the camera angles are sampled, the input mesh may need to be re-oriented to ensure proper performance. You can check your mesh orientation by checking ``images/epoch_0.png``. 35 | ![alt](images/bad_orientation.png) 36 | 37 | *The Eiffel tower mesh is oriented poorly and CLIP will see the mesh rendered from strange viewpoints* 38 | 39 | ![alt](images/good_orientation.png) 40 | 41 | *Here, we re-orient the mesh so that CLIP will see the side profile of the tower* 42 | 43 | #### Jacobian temp files 44 | The ``NeuralJacobianFields`` code in this repository will create several temporary files in ``outputs/tmp``. Note that if these temporary files already exist, this code will attempt to read the existing files instead of overwriting them. This may cause issues if you run multiple examples with the same output path, intending to overwrite the output folder. 45 | 46 | ## Citation 47 | ``` 48 | @InProceedings{Gao_2023_SIGGRAPH, 49 | author = {Gao, William and Aigerman, Noam and Groueix Thibault and Kim, Vladimir and Hanocka, Rana}, 50 | title = {TextDeformer: Geometry Manipulation using Text Guidance}, 51 | booktitle = {ACM Transactions on Graphics (SIGGRAPH)}, 52 | year = {2023}, 53 | } -------------------------------------------------------------------------------- /example_config.yml: -------------------------------------------------------------------------------- 1 | output_path: ./outputs 2 | gpu: 0 3 | seed: 99 4 | 5 | # CLIP-related 6 | text_prompt: a giraffe 7 | base_text_prompt: a cow 8 | clip_model: ViT-B/32 9 | consistency_clip_model: ViT-B/32 10 | consistency_vit_stride: 8 11 | consistency_vit_layer: 11 12 | 13 | # Mesh 14 | mesh: ./meshes/spot.obj 15 | retriangulate: 0 16 | 17 | # Render settings 18 | bsdf: diffuse 19 | 20 | # Hyper-parameters 21 | lr: 0.0025 22 | epochs: 2500 23 | clip_weight: 1.0 24 | delta_clip_weight: 1.0 25 | regularize_jacobians_weight: 0.5 26 | consistency_loss_weight: 0.5 27 | consistency_elev_filter: 30 28 | consistency_azim_filter: 20 29 | batch_size: 25 30 | train_res: 512 31 | resize_method: cubic 32 | 33 | # Camera parameters 34 | fov_min: 30.0 35 | fov_max: 90.0 36 | dist_min: 2.5 37 | dist_max: 3.5 38 | light_power: 5.0 39 | elev_alpha: 1.0 40 | elev_beta: 5.0 41 | elev_max: 60.0 42 | azim_min: 0.0 43 | azim_max: 360.0 44 | aug_loc: 1 45 | aug_light: 1 46 | aug_bkg: 1 47 | adapt_dist: 1 48 | 49 | log_interval: 5 50 | log_interval_im: 150 51 | log_elev: 30.0 52 | log_fov: 60.0 53 | log_dist: 3.0 54 | log_res: 512 55 | log_light_power: 3.0 -------------------------------------------------------------------------------- /images/bad_orientation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/threedle/TextDeformer/f5302c66c0e5cbbd2fde3ad2aac2aec8f842924c/images/bad_orientation.png -------------------------------------------------------------------------------- /images/good_orientation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/threedle/TextDeformer/f5302c66c0e5cbbd2fde3ad2aac2aec8f842924c/images/good_orientation.png -------------------------------------------------------------------------------- /images/hand_to_octopus.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/threedle/TextDeformer/f5302c66c0e5cbbd2fde3ad2aac2aec8f842924c/images/hand_to_octopus.gif -------------------------------------------------------------------------------- /images/planck_to_einstein.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/threedle/TextDeformer/f5302c66c0e5cbbd2fde3ad2aac2aec8f842924c/images/planck_to_einstein.gif -------------------------------------------------------------------------------- /images/spot_to_giraffe.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/threedle/TextDeformer/f5302c66c0e5cbbd2fde3ad2aac2aec8f842924c/images/spot_to_giraffe.gif -------------------------------------------------------------------------------- /loop.py: -------------------------------------------------------------------------------- 1 | import clip 2 | import kornia 3 | import os 4 | import pathlib 5 | import pymeshlab 6 | import shutil 7 | import torch 8 | import torchvision 9 | import logging 10 | import yaml 11 | 12 | import numpy as np 13 | import nvdiffrast.torch as dr 14 | import matplotlib.pyplot as plt 15 | 16 | from easydict import EasyDict 17 | 18 | from NeuralJacobianFields import SourceMesh 19 | 20 | from nvdiffmodeling.src import obj 21 | from nvdiffmodeling.src import util 22 | from nvdiffmodeling.src import mesh 23 | from nvdiffmodeling.src import render 24 | from nvdiffmodeling.src import texture 25 | from nvdiffmodeling.src import regularizer 26 | 27 | from PIL import Image 28 | from torch.utils.tensorboard import SummaryWriter 29 | from tqdm import tqdm 30 | 31 | from utilities.video import Video 32 | from utilities.helpers import cosine_avg, create_scene, get_vp_map 33 | from utilities.camera import CameraBatch, get_camera_params 34 | from utilities.clip_spatial import CLIPVisualEncoder 35 | from utilities.resize_right import resize, cubic, linear, lanczos2, lanczos3 36 | 37 | def loop(cfg): 38 | output_path = pathlib.Path(cfg['output_path']) 39 | os.makedirs(output_path, exist_ok=True) 40 | with open(output_path / 'config.yml', 'w') as f: 41 | yaml.dump(cfg, f, default_flow_style=False) 42 | cfg = EasyDict(cfg) 43 | 44 | print(f'Output directory {cfg.output_path} created') 45 | 46 | device = torch.device(f'cuda:{cfg.gpu}') 47 | torch.cuda.set_device(device) 48 | 49 | print('Loading CLIP Models') 50 | model, _ = clip.load(cfg.clip_model, device=device) 51 | fe = CLIPVisualEncoder(cfg.consistency_clip_model, cfg.consistency_vit_stride, device) 52 | 53 | clip_mean = torch.tensor([0.48154660, 0.45782750, 0.40821073], device=device) 54 | clip_std = torch.tensor([0.26862954, 0.26130258, 0.27577711], device=device) 55 | 56 | # output video 57 | video = Video(cfg.output_path) 58 | 59 | # GL Context 60 | glctx = dr.RasterizeGLContext() 61 | 62 | print(f'Target text prompt is {cfg.text_prompt}') 63 | print(f'Base text prompt is {cfg.base_text_prompt}') 64 | with torch.no_grad(): 65 | text_embeds = clip.tokenize(cfg.text_prompt).to(device) 66 | base_text_embeds = clip.tokenize(cfg.base_text_prompt).to(device) 67 | text_embeds = model.encode_text(text_embeds).detach() 68 | target_text_embeds = text_embeds.clone() / text_embeds.norm(dim=1, keepdim=True) 69 | 70 | delta_text_embeds = text_embeds - model.encode_text(base_text_embeds) 71 | delta_text_embeds = delta_text_embeds / delta_text_embeds.norm(dim=1, keepdim=True) 72 | 73 | os.makedirs(output_path / 'tmp', exist_ok=True) 74 | ms = pymeshlab.MeshSet() 75 | ms.load_new_mesh(cfg.mesh) 76 | 77 | if cfg.retriangulate: 78 | print('Retriangulating shape') 79 | ms.meshing_isotropic_explicit_remeshing() 80 | 81 | if not ms.current_mesh().has_wedge_tex_coord(): 82 | # some arbitrarily high number 83 | ms.compute_texcoord_parametrization_triangle_trivial_per_wedge(textdim=10000) 84 | 85 | ms.save_current_mesh(str(output_path / 'tmp' / 'mesh.obj')) 86 | 87 | load_mesh = obj.load_obj(str(output_path / 'tmp' / 'mesh.obj')) 88 | load_mesh = mesh.unit_size(load_mesh) 89 | 90 | ms.add_mesh(pymeshlab.Mesh(vertex_matrix=load_mesh.v_pos.cpu().numpy(), face_matrix=load_mesh.t_pos_idx.cpu().numpy())) 91 | ms.save_current_mesh(str(output_path / 'tmp' / 'mesh.obj'), save_vertex_color=False) 92 | 93 | # TODO: Need these for rendering even if we don't optimize textures 94 | texture_map = texture.create_trainable(np.random.uniform(size=[512]*2 + [3], low=0.0, high=1.0), [512]*2, True) 95 | normal_map = texture.create_trainable(np.array([0, 0, 1]), [512]*2, True) 96 | specular_map = texture.create_trainable(np.array([0, 0, 0]), [512]*2, True) 97 | 98 | load_mesh = mesh.Mesh( 99 | material={ 100 | 'bsdf': cfg.bsdf, 101 | 'kd': texture_map, 102 | 'ks': specular_map, 103 | 'normal': normal_map, 104 | }, 105 | base=load_mesh # Get UVs from original loaded mesh 106 | ) 107 | 108 | jacobian_source = SourceMesh.SourceMesh(0, str(output_path / 'tmp' / 'mesh.obj'), {}, 1, ttype=torch.float) 109 | if len(list((output_path / 'tmp').glob('*.npz'))) > 0: 110 | logging.warn(f'Using existing Jacobian .npz files in {str(output_path)}/tmp/ ! Please check if this is intentional.') 111 | jacobian_source.load() 112 | jacobian_source.to(device) 113 | 114 | with torch.no_grad(): 115 | gt_jacobians = jacobian_source.jacobians_from_vertices(load_mesh.v_pos.unsqueeze(0)) 116 | gt_jacobians.requires_grad_(True) 117 | 118 | optimizer = torch.optim.Adam([gt_jacobians], lr=cfg.lr) 119 | cams_data = CameraBatch( 120 | cfg.train_res, 121 | [cfg.dist_min, cfg.dist_max], 122 | [cfg.azim_min, cfg.azim_max], 123 | [cfg.elev_alpha, cfg.elev_beta, cfg.elev_max], 124 | [cfg.fov_min, cfg.fov_max], 125 | cfg.aug_loc, 126 | cfg.aug_light, 127 | cfg.aug_bkg, 128 | cfg.batch_size, 129 | rand_solid=True 130 | ) 131 | cams = torch.utils.data.DataLoader(cams_data, cfg.batch_size, num_workers=0, pin_memory=True) 132 | best_losses = {'CLIP': np.inf, 'total': np.inf} 133 | 134 | for out_type in ['final', 'best_clip', 'best_total']: 135 | os.makedirs(output_path / f'mesh_{out_type}', exist_ok=True) 136 | os.makedirs(output_path / 'images', exist_ok=True) 137 | logger = SummaryWriter(str(output_path / 'logs')) 138 | 139 | rot_ang = 0.0 140 | t_loop = tqdm(range(cfg.epochs), leave=False) 141 | 142 | if cfg.resize_method == 'cubic': 143 | resize_method = cubic 144 | elif cfg.resize_method == 'linear': 145 | resize_method = linear 146 | elif cfg.resize_method == 'lanczos2': 147 | resize_method = lanczos2 148 | elif cfg.resize_method == 'lanczos3': 149 | resize_method = lanczos3 150 | 151 | for it in t_loop: 152 | 153 | # updated vertices from jacobians 154 | n_vert = jacobian_source.vertices_from_jacobians(gt_jacobians).squeeze() 155 | 156 | # TODO: More texture code required to make it work ... 157 | ready_texture = texture.Texture2D( 158 | kornia.filters.gaussian_blur2d( 159 | load_mesh.material['kd'].data.permute(0, 3, 1, 2), 160 | kernel_size=(7, 7), 161 | sigma=(3, 3), 162 | ).permute(0, 2, 3, 1).contiguous() 163 | ) 164 | 165 | kd_notex = texture.Texture2D(torch.full_like(ready_texture.data, 0.5)) 166 | 167 | ready_specular = texture.Texture2D( 168 | kornia.filters.gaussian_blur2d( 169 | load_mesh.material['ks'].data.permute(0, 3, 1, 2), 170 | kernel_size=(7, 7), 171 | sigma=(3, 3), 172 | ).permute(0, 2, 3, 1).contiguous() 173 | ) 174 | 175 | ready_normal = texture.Texture2D( 176 | kornia.filters.gaussian_blur2d( 177 | load_mesh.material['normal'].data.permute(0, 3, 1, 2), 178 | kernel_size=(7, 7), 179 | sigma=(3, 3), 180 | ).permute(0, 2, 3, 1).contiguous() 181 | ) 182 | 183 | # Final mesh 184 | m = mesh.Mesh( 185 | n_vert, 186 | load_mesh.t_pos_idx, 187 | material={ 188 | 'bsdf': cfg.bsdf, 189 | 'kd': kd_notex, 190 | 'ks': ready_specular, 191 | 'normal': ready_normal, 192 | }, 193 | base=load_mesh # gets uvs etc from here 194 | ) 195 | 196 | render_mesh = create_scene([m.eval()], sz=512) 197 | if it == 0: 198 | base_mesh = render_mesh.clone() 199 | base_mesh = mesh.auto_normals(base_mesh) 200 | base_mesh = mesh.compute_tangents(base_mesh) 201 | render_mesh = mesh.auto_normals(render_mesh) 202 | render_mesh = mesh.compute_tangents(render_mesh) 203 | 204 | # Logging mesh 205 | if it % cfg.log_interval == 0: 206 | with torch.no_grad(): 207 | params = get_camera_params( 208 | cfg.log_elev, 209 | rot_ang, 210 | cfg.log_dist, 211 | cfg.log_res, 212 | cfg.log_fov, 213 | ) 214 | rot_ang += 1 215 | log_mesh = mesh.unit_size(render_mesh.eval(params)) 216 | log_image = render.render_mesh( 217 | glctx, 218 | log_mesh, 219 | params['mvp'], 220 | params['campos'], 221 | params['lightpos'], 222 | cfg.log_light_power, 223 | cfg.log_res, 224 | 1, 225 | background=torch.ones(1, cfg.log_res, cfg.log_res, 3).to(device) 226 | ) 227 | 228 | log_image = video.ready_image(log_image) 229 | logger.add_mesh('predicted_mesh', vertices=log_mesh.v_pos.unsqueeze(0), faces=log_mesh.t_pos_idx.unsqueeze(0), global_step=it) 230 | 231 | if cfg.adapt_dist and it > 0: 232 | with torch.no_grad(): 233 | v_pos = m.v_pos.clone() 234 | vmin = v_pos.amin(dim=0) 235 | vmax = v_pos.amax(dim=0) 236 | v_pos -= (vmin + vmax) / 2 237 | mult = torch.cat([v_pos.amin(dim=0), v_pos.amax(dim=0)]).abs().amax().cpu() 238 | cams.dataset.dist_min = cfg.dist_min * mult 239 | cams.dataset.dist_max = cfg.dist_max * mult 240 | 241 | params_camera = next(iter(cams)) 242 | for key in params_camera: 243 | params_camera[key] = params_camera[key].to(device) 244 | 245 | final_mesh = render_mesh.eval(params_camera) 246 | 247 | train_render = render.render_mesh( 248 | glctx, 249 | final_mesh, 250 | params_camera['mvp'], 251 | params_camera['campos'], 252 | params_camera['lightpos'], 253 | cfg.light_power, 254 | cfg.train_res, 255 | spp=1, 256 | num_layers=1, 257 | msaa=False, 258 | background=params_camera['bkgs'] 259 | ).permute(0, 3, 1, 2) 260 | train_render = resize(train_render, out_shape=(224, 224), interp_method=resize_method) 261 | 262 | train_rast_map = render.render_mesh( 263 | glctx, 264 | final_mesh, 265 | params_camera['mvp'], 266 | params_camera['campos'], 267 | params_camera['lightpos'], 268 | cfg.light_power, 269 | cfg.train_res, 270 | spp=1, 271 | num_layers=1, 272 | msaa=False, 273 | background=params_camera['bkgs'], 274 | return_rast_map=True 275 | ) 276 | 277 | if it == 0: 278 | params_camera = next(iter(cams)) 279 | for key in params_camera: 280 | params_camera[key] = params_camera[key].to(device) 281 | base_render = render.render_mesh( 282 | glctx, 283 | base_mesh.eval(params_camera), 284 | params_camera['mvp'], 285 | params_camera['campos'], 286 | params_camera['lightpos'], 287 | cfg.light_power, 288 | cfg.train_res, 289 | spp=1, 290 | num_layers=1, 291 | msaa=False, 292 | background=params_camera['bkgs'], 293 | ).permute(0, 3, 1, 2) 294 | base_render = resize(base_render, out_shape=(224, 224), interp_method=resize_method) 295 | 296 | if it % cfg.log_interval_im == 0: 297 | log_idx = torch.randperm(cfg.batch_size)[:5] 298 | s_log = train_render[log_idx, :, :, :] 299 | s_log = torchvision.utils.make_grid(s_log) 300 | ndarr = s_log.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy() 301 | im = Image.fromarray(ndarr) 302 | im.save(str(output_path / 'images' / f'epoch_{it}.png')) 303 | 304 | obj.write_obj( 305 | str(output_path / 'mesh_final'), 306 | m.eval() 307 | ) 308 | 309 | optimizer.zero_grad() 310 | 311 | # CLIP similarity losses 312 | normalized_clip_render = (train_render - clip_mean[None, :, None, None]) / clip_std[None, :, None, None] 313 | image_embeds = model.encode_image( 314 | normalized_clip_render 315 | ) 316 | with torch.no_grad(): 317 | normalized_base_render = (base_render - clip_mean[None, :, None, None]) / clip_std[None, :, None, None] 318 | base_embeds = model.encode_image(normalized_base_render) 319 | 320 | orig_image_embeds = image_embeds.clone() / image_embeds.norm(dim=1, keepdim=True) 321 | delta_image_embeds = image_embeds - base_embeds 322 | delta_image_embeds = delta_image_embeds / delta_image_embeds.norm(dim=1, keepdim=True) 323 | 324 | clip_loss = cosine_avg(orig_image_embeds, target_text_embeds) 325 | delta_clip_loss = cosine_avg(delta_image_embeds, delta_text_embeds) 326 | logger.add_scalar('clip_loss', clip_loss, global_step=it) 327 | logger.add_scalar('delta_clip_loss', delta_clip_loss, global_step=it) 328 | 329 | # Jacobian regularization 330 | r_loss = (((gt_jacobians) - torch.eye(3, 3, device=device)) ** 2).mean() 331 | logger.add_scalar('jacobian_regularization', r_loss, global_step=it) 332 | 333 | # Consistency loss 334 | # Get mapping from vertex to pixels 335 | curr_vp_map = get_vp_map(final_mesh.v_pos, params_camera['mvp'], 224) 336 | for idx, rast_faces in enumerate(train_rast_map[:, :, :, 3].view(cfg.batch_size, -1)): 337 | u_faces = rast_faces.unique().long()[1:] - 1 338 | t = torch.arange(len(final_mesh.v_pos), device=device) 339 | u_ret = torch.cat([t, final_mesh.t_pos_idx[u_faces].flatten()]).unique(return_counts=True) 340 | non_verts = u_ret[0][u_ret[1] < 2] 341 | curr_vp_map[idx][non_verts] = torch.tensor([224, 224], device=device) 342 | 343 | # Get mapping from vertex to patch 344 | med = (fe.old_stride - 1) / 2 345 | curr_vp_map[curr_vp_map < med] = med 346 | curr_vp_map[(curr_vp_map > 224 - fe.old_stride) & (curr_vp_map < 224)] = 223 - med 347 | curr_patch_map = ((curr_vp_map - med) / fe.new_stride).round() 348 | flat_patch_map = curr_patch_map[..., 0] * (((224 - fe.old_stride) / fe.new_stride) + 1) + curr_patch_map[..., 1] 349 | 350 | # Deep features 351 | patch_feats = fe(normalized_clip_render) 352 | flat_patch_map[flat_patch_map > patch_feats[0].shape[-1] - 1] = patch_feats[0].shape[-1] 353 | flat_patch_map = flat_patch_map.long()[:, None, :].repeat(1, patch_feats[0].shape[1], 1) 354 | 355 | deep_feats = patch_feats[cfg.consistency_vit_layer] 356 | deep_feats = torch.nn.functional.pad(deep_feats, (0, 1)) 357 | deep_feats = torch.gather(deep_feats, dim=2, index=flat_patch_map) 358 | deep_feats = torch.nn.functional.normalize(deep_feats, dim=1, eps=1e-6) 359 | 360 | elev_d = torch.cdist(params_camera['elev'].unsqueeze(1), params_camera['elev'].unsqueeze(1)).abs() < torch.deg2rad(torch.tensor(cfg.consistency_elev_filter)) 361 | azim_d = torch.cdist(params_camera['azim'].unsqueeze(1), params_camera['azim'].unsqueeze(1)).abs() < torch.deg2rad(torch.tensor(cfg.consistency_azim_filter)) 362 | 363 | cosines = torch.einsum('ijk, lkj -> ilk', deep_feats, deep_feats.permute(0, 2, 1)) 364 | cosines = (cosines * azim_d.unsqueeze(-1) * elev_d.unsqueeze(-1)).permute(2, 0, 1).triu(1) 365 | consistency_loss = cosines[cosines != 0].mean() 366 | logger.add_scalar('consistency_loss', consistency_loss, global_step=it) 367 | 368 | total_loss = cfg.clip_weight * clip_loss + cfg.delta_clip_weight * delta_clip_loss + \ 369 | cfg.regularize_jacobians_weight * r_loss - cfg.consistency_loss_weight * consistency_loss 370 | logger.add_scalar('total_loss', total_loss, global_step=it) 371 | 372 | if best_losses['total'] > total_loss: 373 | best_losses['total'] = total_loss.detach() 374 | obj.write_obj( 375 | str(output_path / 'mesh_best_total'), 376 | m.eval() 377 | ) 378 | if best_losses['CLIP'] > clip_loss: 379 | best_losses['CLIP'] = clip_loss.detach() 380 | obj.write_obj( 381 | str(output_path / 'mesh_best_clip'), 382 | m.eval() 383 | ) 384 | 385 | total_loss.backward() 386 | optimizer.step() 387 | t_loop.set_description(f'CLIP Loss = {clip_loss.item()}, Total Loss = {total_loss.item()}') 388 | 389 | video.close() 390 | obj.write_obj( 391 | str(output_path / 'mesh_final'), 392 | m.eval() 393 | ) 394 | 395 | return 396 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import torch 4 | import random 5 | import argparse 6 | import numpy as np 7 | 8 | from loop import loop 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument('--config', help='Path to config file', type=str, default='./example_config.yml') 13 | parser.add_argument('--output_path', help='Output directory (will be created)', type=str, default=argparse.SUPPRESS) 14 | parser.add_argument('--gpu', help='GPU index', type=int, default=argparse.SUPPRESS) 15 | parser.add_argument('--seed', help='Random seed', type=int, default=argparse.SUPPRESS) 16 | 17 | # CLIP-related 18 | parser.add_argument('--text_prompt', help='Target text prompt', type=str, default=argparse.SUPPRESS) 19 | parser.add_argument('--base_text_prompt', help='Base text prompt describing input mesh', type=str, default=argparse.SUPPRESS) 20 | parser.add_argument('--clip_model', help='CLIP Model for text comparison', type=str, default=argparse.SUPPRESS) 21 | parser.add_argument('--consistency_clip_model', help='CLIP Model for consistency', type=str, default=argparse.SUPPRESS) 22 | parser.add_argument('--consistency_vit_stride', help='New stride for ViT patch interpolation', type=int, default=argparse.SUPPRESS) 23 | parser.add_argument('--consistency_vit_layer', help='Which layer to take ViT patch features from (0-11)', type=int, default=argparse.SUPPRESS) 24 | 25 | # Mesh 26 | parser.add_argument('--mesh', help='Path to input mesh', type=str, default=argparse.SUPPRESS) 27 | parser.add_argument('--retriangulate', help='Use isotropic remeshing', type=int, default=argparse.SUPPRESS, choices=[0, 1]) 28 | 29 | # Render settings 30 | parser.add_argument('--bsdf', help='Render technique', type=str, default=argparse.SUPPRESS, choices=['diffuse', 'pbr']) 31 | 32 | # Hyper-parameters 33 | parser.add_argument('--lr', help='Learning rate', type=float, default=argparse.SUPPRESS) 34 | parser.add_argument('--epochs', help='Number of optimization steps', type=int, default=argparse.SUPPRESS) 35 | parser.add_argument('--clip_weight', help='Weight for CLIP loss', type=float, default=argparse.SUPPRESS) 36 | parser.add_argument('--delta_clip_weight', help='Wight for delta-CLIP loss', type=float, default=argparse.SUPPRESS) 37 | parser.add_argument('--regularize_jacobians_weight', help='Weight for jacobian regularization', type=float, default=argparse.SUPPRESS) 38 | parser.add_argument('--consistency_loss_weight', help='Weight for viewpoint consistency penalty', type=float, default=argparse.SUPPRESS) 39 | parser.add_argument('--consistency_elev_filter', help='Elev. angle threshold for filtering out pairs of viewpoints for consistency loss', type=float, default=argparse.SUPPRESS) 40 | parser.add_argument('--consistency_azim_filter', help='Azim. angle threshold for filtering out pairs of viewpoints for consistency loss', type=float, default=argparse.SUPPRESS) 41 | parser.add_argument('--batch_size', help='Number of images rendered at the same time', type=int, default=argparse.SUPPRESS) 42 | parser.add_argument('--train_res', help='Resolution of render before downscaling to CLIP size', type=int, default=argparse.SUPPRESS) 43 | parser.add_argument('--resize_method', help='Image downsampling/upsampling method', type=str, default=argparse.SUPPRESS, choices=['cubic', 'linear', 'lanczos2', 'lanczos3']) 44 | ## Camera Parameters ## 45 | parser.add_argument('--fov_min', help='Minimum camera field of view angle during renders', type=float, default=argparse.SUPPRESS) 46 | parser.add_argument('--fov_max', help='Maximum camera field of view angle during renders', type=float, default=argparse.SUPPRESS) 47 | parser.add_argument('--dist_min', help='Minimum distance of camera from mesh during renders', type=float, default=argparse.SUPPRESS) 48 | parser.add_argument('--dist_max', help='Maximum distance of camera from mesh during renders', type=float, default=argparse.SUPPRESS) 49 | parser.add_argument('--light_power', help='Light intensity', type=float, default=argparse.SUPPRESS) 50 | parser.add_argument('--elev_alpha', help='Alpha parameter for Beta distribution for elevation sampling', type=float, default=argparse.SUPPRESS) 51 | parser.add_argument('--elev_beta', help='Beta parameter for Beta distribution for elevation sampling', type=float, default=argparse.SUPPRESS) 52 | parser.add_argument('--elev_max', help='Maximum elevation angle in degree', type=float, default=argparse.SUPPRESS) 53 | parser.add_argument('--azim_min', help='Minimum azimuth angle in degree', type=float, default=argparse.SUPPRESS) 54 | parser.add_argument('--azim_max', help='Maximum azimuth angle in degree', type=float, default=argparse.SUPPRESS) 55 | parser.add_argument('--aug_loc', help='Offset mesh from center of image?', type=int, default=argparse.SUPPRESS, choices=[0, 1]) 56 | parser.add_argument('--aug_light', help='Augment the direction of light around the camera', type=int, default=argparse.SUPPRESS, choices=[0, 1]) 57 | parser.add_argument('--aug_bkg', help='Augment the background', type=int, default=argparse.SUPPRESS, choices=[0, 1]) 58 | parser.add_argument('--adapt_dist', help='Adjust camera distance to account for scale of shape', type=int, default=argparse.SUPPRESS, choices=[0, 1]) 59 | 60 | # Logging 61 | parser.add_argument('--log_interval', help='Interval for logging, every X epochs', type=int, default=argparse.SUPPRESS) 62 | parser.add_argument('--log_interval_im', help='Interval for logging renders image, every X epochs', type=int, default=argparse.SUPPRESS) 63 | parser.add_argument('--log_elev', help='Logging elevation angle', type=float, default=argparse.SUPPRESS) 64 | parser.add_argument('--log_fov', help='Logging field of view', type=float, default=argparse.SUPPRESS) 65 | parser.add_argument('--log_dist', help='Logging distance from object', type=float, default=argparse.SUPPRESS) 66 | parser.add_argument('--log_res', help='Logging render resolution', type=int, default=argparse.SUPPRESS) 67 | parser.add_argument('--log_light_power', help='Light intensity for logging', type=float, default=argparse.SUPPRESS) 68 | 69 | args = parser.parse_args() 70 | if args.config is not None: 71 | with open(args.config, 'r') as f: 72 | try: 73 | cfg = yaml.safe_load(f) 74 | except yaml.YAMLError as e: 75 | print(e) 76 | 77 | for key in vars(args): 78 | cfg[key] = vars(args)[key] 79 | 80 | print(yaml.dump(cfg, default_flow_style=False)) 81 | random.seed(cfg['seed']) 82 | os.environ['PYTHONHASHSEED'] = str(cfg['seed']) 83 | np.random.seed(cfg['seed']) 84 | torch.manual_seed(cfg['seed']) 85 | torch.cuda.manual_seed(cfg['seed']) 86 | torch.backends.cudnn.deterministic = True 87 | 88 | loop(cfg) 89 | print('Done') 90 | 91 | if __name__ == '__main__': 92 | main() 93 | 94 | -------------------------------------------------------------------------------- /meshes/spot.mtl: -------------------------------------------------------------------------------- 1 | 2 | # Blender MTL File: 'None' 3 | # Material Count: 1 4 | 5 | newmtl Default_OBJ 6 | Ns 250.000000 7 | Ka 1.000000 1.000000 1.000000 8 | Kd 0.800000 0.800000 0.800000 9 | Ks 0.500000 0.500000 0.500000 10 | Ke 0.000000 0.000000 0.000000 11 | Ni 1.450000 12 | d 1.000000 13 | illum 2 14 | -------------------------------------------------------------------------------- /nvdiffmodeling/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, NVIDIA Corporation. All rights reserved. 2 | 3 | 4 | Nvidia Source Code License (1-Way Commercial) 5 | 6 | ======================================================================= 7 | 8 | 1. Definitions 9 | 10 | "Licensor" means any person or entity that distributes its Work. 11 | 12 | "Software" means the original work of authorship made available under 13 | this License. 14 | 15 | "Work" means the Software and any additions to or derivative works of 16 | the Software that are made available under this License. 17 | 18 | The terms "reproduce," "reproduction," "derivative works," and 19 | "distribution" have the meaning as provided under U.S. copyright law; 20 | provided, however, that for the purposes of this License, derivative 21 | works shall not include works that remain separable from, or merely 22 | link (or bind by name) to the interfaces of, the Work. 23 | 24 | Works, including the Software, are "made available" under this License 25 | by including in or with the Work either (a) a copyright notice 26 | referencing the applicability of this License to the Work, or (b) a 27 | copy of this License. 28 | 29 | 2. License Grants 30 | 31 | 2.1 Copyright Grant. Subject to the terms and conditions of this 32 | License, each Licensor grants to you a perpetual, worldwide, 33 | non-exclusive, royalty-free, copyright license to reproduce, 34 | prepare derivative works of, publicly display, publicly perform, 35 | sublicense and distribute its Work and any resulting derivative 36 | works in any form. 37 | 38 | 3. Limitations 39 | 40 | 3.1 Redistribution. You may reproduce or distribute the Work only 41 | if (a) you do so under this License, (b) you include a complete 42 | copy of this License with your distribution, and (c) you retain 43 | without modification any copyright, patent, trademark, or 44 | attribution notices that are present in the Work. 45 | 46 | 3.2 Derivative Works. You may specify that additional or different 47 | terms apply to the use, reproduction, and distribution of your 48 | derivative works of the Work ("Your Terms") only if (a) Your Terms 49 | provide that the use limitation in Section 3.3 applies to your 50 | derivative works, and (b) you identify the specific derivative 51 | works that are subject to Your Terms. Notwithstanding Your Terms, 52 | this License (including the redistribution requirements in Section 53 | 3.1) will continue to apply to the Work itself. 54 | 55 | 3.3 Use Limitation. The Work and any derivative works thereof only 56 | may be used or intended for use non-commercially. The Work or 57 | derivative works thereof may be used or intended for use by Nvidia 58 | or its affiliates commercially or non-commercially. As used herein, 59 | "non-commercially" means for research or evaluation purposes only 60 | and not for any direct or indirect monetary gain. 61 | 62 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim 63 | against any Licensor (including any claim, cross-claim or 64 | counterclaim in a lawsuit) to enforce any patents that you allege 65 | are infringed by any Work, then your rights under this License from 66 | such Licensor (including the grant in Section 2.1) will terminate 67 | immediately. 68 | 69 | 3.5 Trademarks. This License does not grant any rights to use any 70 | Licensor's or its affiliates' names, logos, or trademarks, except 71 | as necessary to reproduce the notices described in this License. 72 | 73 | 3.6 Termination. If you violate any term of this License, then your 74 | rights under this License (including the grant in Section 2.1) will 75 | terminate immediately. 76 | 77 | 4. Disclaimer of Warranty. 78 | 79 | THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY 80 | KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF 81 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR 82 | NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER 83 | THIS LICENSE. 84 | 85 | 5. Limitation of Liability. 86 | 87 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL 88 | THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE 89 | SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, 90 | INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF 91 | OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK 92 | (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, 93 | LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER 94 | COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF 95 | THE POSSIBILITY OF SUCH DAMAGES. 96 | 97 | ======================================================================= 98 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/material.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import numpy as np 11 | import torch 12 | 13 | from . import util 14 | from . import texture 15 | from . import mesh 16 | 17 | ###################################################################################### 18 | # .mtl material format loading / storing 19 | ###################################################################################### 20 | 21 | def load_mtl(fn, clear_ks=True): 22 | import re 23 | mtl_path = os.path.dirname(fn) 24 | 25 | # Read file 26 | with open(fn) as f: 27 | lines = f.readlines() 28 | 29 | # Parse materials 30 | materials = [] 31 | for line in lines: 32 | split_line = re.split(' +|\t+|\n+', line.strip()) 33 | prefix = split_line[0].lower() 34 | data = split_line[1:] 35 | if 'newmtl' in prefix: 36 | material = {'name' : data[0]} 37 | materials += [material] 38 | elif materials: 39 | if 'bsdf' in prefix or 'map_kd' in prefix or 'map_ks' in prefix or 'bump' in prefix: 40 | material[prefix] = data[0] 41 | else: 42 | material[prefix] = torch.tensor(tuple(float(d) for d in data), dtype=torch.float32, device='cuda') 43 | 44 | # Convert everything to textures. Our code expects 'kd' and 'ks' to be texture maps. So replace constants with 1x1 maps 45 | for mat in materials: 46 | if not 'bsdf' in mat: 47 | mat['bsdf'] = 'pbr' 48 | 49 | if 'map_kd' in mat: 50 | mat['kd'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_kd'])) 51 | else: 52 | mat['kd'] = texture.Texture2D(mat['kd']) 53 | 54 | if 'map_ks' in mat: 55 | mat['ks'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_ks']), channels=3) 56 | else: 57 | mat['ks'] = texture.Texture2D(mat['ks']) 58 | 59 | if 'bump' in mat: 60 | mat['normal'] = texture.load_texture2D(os.path.join(mtl_path, mat['bump']), lambda_fn=lambda x: x * 2 - 1, channels=3) 61 | 62 | # Convert Kd from sRGB to linear RGB 63 | mat['kd'] = texture.srgb_to_rgb(mat['kd']) 64 | 65 | if clear_ks: 66 | # Override ORM occlusion (red) channel by zeros. We hijack this channel 67 | for mip in mat['ks'].getMips(): 68 | mip[..., 0] = 0.0 69 | 70 | return materials 71 | 72 | def save_mtl(fn, material): 73 | folder = os.path.dirname(fn) 74 | with open(fn, "w") as f: 75 | f.write('newmtl defaultMat\n') 76 | if material is not None: 77 | f.write('bsdf %s\n' % material['bsdf']) 78 | f.write('map_kd texture_kd.png\n') 79 | texture.save_texture2D(os.path.join(folder, 'texture_kd.png'), texture.rgb_to_srgb(material['kd'])) 80 | f.write('map_ks texture_ks.png\n') 81 | texture.save_texture2D(os.path.join(folder, 'texture_ks.png'), material['ks']) 82 | f.write('bump texture_n.png\n') 83 | texture.save_texture2D(os.path.join(folder, 'texture_n.png'), material['normal'], lambda_fn=lambda x:(x+1)*0.5) 84 | else: 85 | f.write('Kd 1 1 1\n') 86 | f.write('Ks 0 0 0\n') 87 | f.write('Ka 0 0 0\n') 88 | f.write('Tf 1 1 1\n') 89 | f.write('Ni 1\n') 90 | f.write('Ns 0\n') 91 | 92 | ###################################################################################### 93 | # Merge multiple materials into a single uber-material 94 | ###################################################################################### 95 | 96 | def _upscale_replicate(x, full_res): 97 | x = x.permute(0, 3, 1, 2) 98 | x = torch.nn.functional.pad(x, (0, full_res[1] - x.shape[3], 0, full_res[0] - x.shape[2]), 'replicate') 99 | return x.permute(0, 2, 3, 1).contiguous() 100 | 101 | def merge_materials(materials, texcoords, tfaces, mfaces): 102 | assert len(materials) > 0 103 | for mat in materials: 104 | assert mat['bsdf'] == materials[0]['bsdf'], "All materials must have the same BSDF (uber shader)" 105 | assert ('normal' in mat) is ('normal' in materials[0]), "All materials must have either normal map enabled or disabled" 106 | 107 | uber_material = { 108 | 'name' : 'uber_material', 109 | 'bsdf' : materials[0]['bsdf'], 110 | } 111 | 112 | textures = ['kd', 'ks', 'normal'] 113 | 114 | # Find maximum texture resolution across all materials and textures 115 | max_res = None 116 | for mat in materials: 117 | for tex in textures: 118 | tex_res = np.array(mat[tex].getRes()) if tex in mat else np.array([1, 1]) 119 | max_res = np.maximum(max_res, tex_res) if max_res is not None else tex_res 120 | 121 | # Compute size of compund texture and round up to nearest PoT 122 | full_res = 2**np.ceil(np.log2(max_res * np.array([1, len(materials)]))).astype(np.int) 123 | 124 | # Normalize texture resolution across all materials & combine into a single large texture 125 | for tex in textures: 126 | if tex in materials[0]: 127 | tex_data = torch.cat(tuple(util.scale_img_nhwc(mat[tex].data, tuple(max_res)) for mat in materials), dim=2) # Lay out all textures horizontally, NHWC so dim2 is x 128 | tex_data = _upscale_replicate(tex_data, full_res) 129 | uber_material[tex] = texture.Texture2D(tex_data) 130 | 131 | # Compute scaling values for used / unused texture area 132 | s_coeff = [full_res[0] / max_res[0], full_res[1] / max_res[1]] 133 | 134 | # Recompute texture coordinates to cooincide with new composite texture 135 | new_tverts = {} 136 | new_tverts_data = [] 137 | for fi in range(len(tfaces)): 138 | matIdx = mfaces[fi] 139 | for vi in range(3): 140 | ti = tfaces[fi][vi] 141 | if not (ti in new_tverts): 142 | new_tverts[ti] = {} 143 | if not (matIdx in new_tverts[ti]): # create new vertex 144 | new_tverts_data.append([(matIdx + texcoords[ti][0]) / s_coeff[1], texcoords[ti][1] / s_coeff[0]]) # Offset texture coodrinate (x direction) by material id & scale to local space. Note, texcoords are (u,v) but texture is stored (w,h) so the indexes swap here 145 | new_tverts[ti][matIdx] = len(new_tverts_data) - 1 146 | tfaces[fi][vi] = new_tverts[ti][matIdx] # reindex vertex 147 | 148 | return uber_material, new_tverts_data, tfaces 149 | 150 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/obj.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import numpy as np 11 | import torch 12 | 13 | from . import util 14 | from . import texture 15 | from . import mesh 16 | from . import material 17 | 18 | ###################################################################################### 19 | # Utility functions 20 | ###################################################################################### 21 | 22 | def _write_weights(folder, mesh): 23 | if mesh.v_weights is not None: 24 | file = os.path.join(folder, 'mesh.weights') 25 | np.save(file, mesh.v_weights.detach().cpu().numpy()) 26 | 27 | def _write_bones(folder, mesh): 28 | if mesh.bone_mtx is not None: 29 | file = os.path.join(folder, 'mesh.bones') 30 | np.save(file, mesh.bone_mtx.detach().cpu().numpy()) 31 | 32 | def _find_mat(materials, name): 33 | for mat in materials: 34 | if mat['name'] == name: 35 | return mat 36 | return materials[0] # Materials 0 is the default 37 | 38 | ###################################################################################### 39 | # Create mesh object from objfile 40 | ###################################################################################### 41 | 42 | def load_obj(filename, clear_ks=True, mtl_override=None): 43 | obj_path = os.path.dirname(filename) 44 | 45 | # Read entire file 46 | with open(filename) as f: 47 | lines = f.readlines() 48 | 49 | # Load materials 50 | all_materials = [ 51 | { 52 | 'name' : '_default_mat', 53 | 'bsdf' : 'falcor', 54 | 'kd' : texture.Texture2D(torch.tensor([0.5, 0.5, 0.5], dtype=torch.float32, device='cuda')), 55 | 'ks' : texture.Texture2D(torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32, device='cuda')) 56 | } 57 | ] 58 | if mtl_override is None: 59 | for line in lines: 60 | if len(line.split()) == 0: 61 | continue 62 | if line.split()[0] == 'mtllib': 63 | all_materials += material.load_mtl(os.path.join(obj_path, line.split()[1]), clear_ks) # Read in entire material library 64 | else: 65 | all_materials += material.load_mtl(mtl_override) 66 | 67 | # load vertices 68 | vertices, texcoords, normals = [], [], [] 69 | for line in lines: 70 | if len(line.split()) == 0: 71 | continue 72 | 73 | prefix = line.split()[0].lower() 74 | if prefix == 'v': 75 | vertices.append([float(v) for v in line.split()[1:]][:3]) 76 | elif prefix == 'vt': 77 | val = [float(v) for v in line.split()[1:]] 78 | texcoords.append([val[0], 1.0 - val[1]]) 79 | elif prefix == 'vn': 80 | normals.append([float(v) for v in line.split()[1:]]) 81 | 82 | # load faces 83 | activeMatIdx = None 84 | used_materials = [] 85 | faces, tfaces, nfaces, mfaces = [], [], [], [] 86 | for line in lines: 87 | if len(line.split()) == 0: 88 | continue 89 | 90 | prefix = line.split()[0].lower() 91 | if prefix == 'usemtl': # Track used materials 92 | mat = _find_mat(all_materials, line.split()[1]) 93 | if not mat in used_materials: 94 | used_materials.append(mat) 95 | activeMatIdx = used_materials.index(mat) 96 | elif prefix == 'f': # Parse face 97 | vs = line.split()[1:] 98 | nv = len(vs) 99 | vv = vs[0].split('/') 100 | v0 = int(vv[0]) - 1 101 | if len(vv) > 1: 102 | t0 = int(vv[1]) - 1 if vv[1] != "" else -1 103 | n0 = int(vv[2]) - 1 if vv[2] != "" else -1 104 | else: 105 | t0 = -1 106 | n0 = -1 107 | for i in range(nv - 2): # Triangulate polygons 108 | vv = vs[i + 1].split('/') 109 | v1 = int(vv[0]) - 1 110 | if len(vv) > 1: 111 | t1 = int(vv[1]) - 1 if vv[1] != "" else -1 112 | n1 = int(vv[2]) - 1 if vv[2] != "" else -1 113 | else: 114 | t1 = -1 115 | n1 = -1 116 | vv = vs[i + 2].split('/') 117 | v2 = int(vv[0]) - 1 118 | if len(vv) > 1: 119 | t2 = int(vv[1]) - 1 if vv[1] != "" else -1 120 | n2 = int(vv[2]) - 1 if vv[2] != "" else -1 121 | else: 122 | t2 = -1 123 | n2 = -1 124 | mfaces.append(activeMatIdx) 125 | faces.append([v0, v1, v2]) 126 | tfaces.append([t0, t1, t2]) 127 | nfaces.append([n0, n1, n2]) 128 | assert len(tfaces) == len(faces) and len(nfaces) == len (faces) 129 | 130 | # Create an "uber" material by combining all textures into a larger texture 131 | if len(used_materials) > 1: 132 | uber_material, texcoords, tfaces = material.merge_materials(used_materials, texcoords, tfaces, mfaces) 133 | elif len(used_materials) == 1: 134 | uber_material = used_materials[0] 135 | else: 136 | uber_material = None 137 | 138 | vertices = torch.tensor(vertices, dtype=torch.float32, device='cuda') 139 | texcoords = torch.tensor(texcoords, dtype=torch.float32, device='cuda') if len(texcoords) > 0 else None 140 | normals = torch.tensor(normals, dtype=torch.float32, device='cuda') if len(normals) > 0 else None 141 | 142 | faces = torch.tensor(faces, dtype=torch.int64, device='cuda') 143 | tfaces = torch.tensor(tfaces, dtype=torch.int64, device='cuda') if texcoords is not None else None 144 | nfaces = torch.tensor(nfaces, dtype=torch.int64, device='cuda') if normals is not None else None 145 | 146 | # Read weights and bones if available 147 | try: 148 | v_weights = torch.tensor(np.load(os.path.splitext(filename)[0] + ".weights.npy"), dtype=torch.float32, device='cuda') 149 | bone_mtx = torch.tensor(np.load(os.path.splitext(filename)[0] + ".bones.npy"), dtype=torch.float32, device='cuda') 150 | except: 151 | v_weights, bone_mtx = None, None 152 | 153 | return mesh.Mesh(vertices, faces, normals, nfaces, texcoords, tfaces, v_weights=v_weights, bone_mtx=bone_mtx, material=uber_material) 154 | 155 | ###################################################################################### 156 | # Save mesh object to objfile 157 | ###################################################################################### 158 | 159 | def write_obj(folder, mesh, verbose=True): 160 | obj_file = os.path.join(folder, 'mesh.obj') 161 | if verbose: 162 | print("Writing mesh: ", obj_file) 163 | with open(obj_file, "w") as f: 164 | f.write("mtllib mesh.mtl\n") 165 | f.write("g default\n") 166 | 167 | v_pos = mesh.v_pos.detach().cpu().numpy() if mesh.v_pos is not None else None 168 | v_nrm = mesh.v_nrm.detach().cpu().numpy() if mesh.v_nrm is not None else None 169 | v_tex = mesh.v_tex.detach().cpu().numpy() if mesh.v_tex is not None else None 170 | 171 | t_pos_idx = mesh.t_pos_idx.detach().cpu().numpy() if mesh.t_pos_idx is not None else None 172 | t_nrm_idx = mesh.t_nrm_idx.detach().cpu().numpy() if mesh.t_nrm_idx is not None else None 173 | t_tex_idx = mesh.t_tex_idx.detach().cpu().numpy() if mesh.t_tex_idx is not None else None 174 | if verbose: 175 | print(" writing %d vertices" % len(v_pos)) 176 | for v in v_pos: 177 | f.write('v {} {} {} \n'.format(v[0], v[1], v[2])) 178 | 179 | if v_tex is not None: 180 | if verbose: 181 | print(" writing %d texcoords" % len(v_tex)) 182 | assert(len(t_pos_idx) == len(t_tex_idx)) 183 | for v in v_tex: 184 | f.write('vt {} {} \n'.format(v[0], 1.0 - v[1])) 185 | 186 | if v_nrm is not None: 187 | if verbose: 188 | print(" writing %d normals" % len(v_nrm)) 189 | assert(len(t_pos_idx) == len(t_nrm_idx)) 190 | for v in v_nrm: 191 | f.write('vn {} {} {}\n'.format(v[0], v[1], v[2])) 192 | 193 | # faces 194 | f.write("s 1 \n") 195 | f.write("g pMesh1\n") 196 | f.write("usemtl defaultMat\n") 197 | 198 | # Write faces 199 | if verbose: 200 | print(" writing %d faces" % len(t_pos_idx)) 201 | for i in range(len(t_pos_idx)): 202 | f.write("f ") 203 | for j in range(3): 204 | f.write(' %s/%s/%s' % (str(t_pos_idx[i][j]+1), '' if v_tex is None else str(t_tex_idx[i][j]+1), '' if v_nrm is None else str(t_nrm_idx[i][j]+1))) 205 | f.write("\n") 206 | 207 | mtl_file = os.path.join(folder, 'mesh.mtl') 208 | if verbose: 209 | print("Writing material: ", mtl_file) 210 | material.save_mtl(mtl_file, mesh.material) 211 | 212 | _write_weights(folder, mesh) 213 | _write_bones(folder, mesh) 214 | if verbose: 215 | print("Done exporting mesh") 216 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/regularizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import numpy as np 11 | import torch 12 | 13 | from . import util 14 | from . import texture 15 | 16 | ###################################################################################### 17 | # Computes the avergage edge length of a mesh. 18 | # Rough estimate of the tessellation of a mesh. Can be used e.g. to clamp gradients 19 | ###################################################################################### 20 | def avg_edge_length(opt_mesh): 21 | with torch.no_grad(): 22 | opt_mesh = opt_mesh.eval() 23 | nVerts = opt_mesh.v_pos.shape[0] 24 | t_pos_idx = opt_mesh.t_pos_idx.detach().cpu().numpy() 25 | 26 | # Find unique edges 27 | ix_i = [] 28 | ix_j = [] 29 | edge_verts = {} 30 | for tri in t_pos_idx: 31 | for (i0, i1) in [(tri[0], tri[1]), (tri[1], tri[2]), (tri[2], tri[0])]: 32 | if (i1, i0) not in edge_verts.keys(): 33 | edge_verts[(i0, i1)] = True 34 | ix_i += [i0] 35 | ix_j += [i1] 36 | 37 | # Setup torch tensors 38 | ix_i = torch.tensor(ix_i, dtype=torch.int64, device='cuda') 39 | ix_j = torch.tensor(ix_j, dtype=torch.int64, device='cuda') 40 | 41 | # Gather edge vertex pairs 42 | x_i = opt_mesh.v_pos[ix_i, :] 43 | x_j = opt_mesh.v_pos[ix_j, :] 44 | 45 | # Compute edge length 46 | term = torch.sqrt((x_j - x_i)**2) 47 | 48 | # Compute avg edge length 49 | return (torch.sum(term) / len(x_i)).item() 50 | 51 | ###################################################################################### 52 | # Edge length regularizer 53 | ###################################################################################### 54 | def edge_length_regularizer(mesh): 55 | class mesh_op_edge_length_regularizer: 56 | def __init__(self, mesh): 57 | self.mesh = mesh 58 | 59 | mesh = mesh.eval() 60 | nVerts = mesh.v_pos.shape[0] 61 | t_pos_idx = mesh.t_pos_idx.detach().cpu().numpy() 62 | 63 | # Find unique edges 64 | ix_i = [] 65 | ix_j = [] 66 | edge_verts = {} 67 | for tri in t_pos_idx: 68 | for (i0, i1) in [(tri[0], tri[1]), (tri[1], tri[2]), (tri[2], tri[0])]: 69 | if (i1, i0) not in edge_verts.keys(): 70 | edge_verts[(i0, i1)] = True 71 | ix_i += [i0] 72 | ix_j += [i1] 73 | 74 | # Setup torch tensors 75 | self.ix_i = torch.tensor(ix_i, dtype=torch.int64, device='cuda') 76 | self.ix_j = torch.tensor(ix_j, dtype=torch.int64, device='cuda') 77 | 78 | def eval(self, params={}): 79 | mesh = self.mesh.eval(params) 80 | 81 | # Gather edge vertex pairs 82 | x_i = mesh.v_pos[self.ix_i, :] 83 | x_j = mesh.v_pos[self.ix_j, :] 84 | 85 | # Compute edge length 86 | term = torch.sqrt((x_j - x_i)**2 + 1e-20) 87 | 88 | # Compute avg edge length 89 | return torch.var(term) 90 | 91 | return mesh_op_edge_length_regularizer(mesh) 92 | 93 | ###################################################################################### 94 | # Laplacian regularization using umbrella operator (Fujiwara / Desbrun). 95 | # https://mgarland.org/class/geom04/material/smoothing.pdf 96 | ###################################################################################### 97 | def laplace_regularizer_const(opt_mesh, base_mesh=None): 98 | class mesh_op_laplace_regularizer_const: 99 | def __init__(self, opt_mesh, base_mesh): 100 | self.inputs = [opt_mesh, base_mesh] 101 | 102 | opt_mesh = opt_mesh.eval() 103 | self.nVerts = opt_mesh.v_pos.shape[0] 104 | t_pos_idx = opt_mesh.t_pos_idx.detach().cpu().numpy() 105 | 106 | # Build vertex neighbor rings 107 | vtx_n = [[] for _ in range(self.nVerts)] 108 | for tri in t_pos_idx: 109 | for (i0, i1) in [(tri[0], tri[1]), (tri[1], tri[2]), (tri[2], tri[0])]: 110 | vtx_n[i0].append(i1) 111 | 112 | # Collect index/weight pairs to compute each Laplacian vector for each vertex. 113 | # Similar notation to https://mgarland.org/class/geom04/material/smoothing.pdf 114 | ix_j, ix_i, w_ij = [], [], [] 115 | for i in range(self.nVerts): 116 | m = len(vtx_n[i]) 117 | ix_i += [i] * m 118 | ix_j += vtx_n[i] 119 | w_ij += [1.0 / m] * m 120 | 121 | # Setup torch tensors 122 | self.ix_i = torch.tensor(ix_i, dtype=torch.int64, device='cuda') 123 | self.ix_j = torch.tensor(ix_j, dtype=torch.int64, device='cuda') 124 | self.w_ij = torch.tensor(w_ij, dtype=torch.float32, device='cuda')[:, None] 125 | 126 | def eval(self, params={}): 127 | opt_mesh = self.inputs[0].eval(params) 128 | base_mesh = self.inputs[1].eval(params) if self.inputs[1] is not None else None 129 | 130 | # differences or absolute version (see paper) 131 | if base_mesh is not None: 132 | v_pos = opt_mesh.v_pos - base_mesh.v_pos 133 | else: 134 | v_pos = opt_mesh.v_pos 135 | 136 | # Gather edge vertex pairs 137 | x_i = v_pos[self.ix_i, :] 138 | x_j = v_pos[self.ix_j, :] 139 | 140 | # Compute Laplacian differences: (x_j - x_i) * w_ij 141 | term = (x_j - x_i) * self.w_ij 142 | 143 | # Sum everyhing 144 | term = util.segment_sum(term, self.ix_i) 145 | 146 | return torch.mean(term**2) 147 | 148 | return mesh_op_laplace_regularizer_const(opt_mesh, base_mesh) 149 | 150 | ###################################################################################### 151 | # Curvature based regularizer 152 | ###################################################################################### 153 | def face_normal_regularizer(opt_mesh): 154 | class mesh_op_face_normal_regularizer: 155 | def __init__(self, opt_mesh): 156 | self.input = opt_mesh 157 | 158 | imesh = opt_mesh.eval() 159 | self.nVerts = imesh.v_pos.shape[0] 160 | t_pos_idx = imesh.t_pos_idx.detach().cpu().numpy() 161 | 162 | # Generate edge lists 163 | edge_tris = {} 164 | for tri_idx, tri in enumerate(t_pos_idx): 165 | for (i0, i1) in [(tri[0], tri[1]), (tri[1], tri[2]), (tri[2], tri[0])]: 166 | if (i1, i0) in edge_tris.keys(): 167 | edge_tris[(i1, i0)] += [tri_idx] 168 | else: 169 | edge_tris[(i0, i1)] = [tri_idx] 170 | 171 | # Get all good edges with 2 incident triangles 172 | shared_edge_idx = [] 173 | for edge in edge_tris.values(): 174 | if len(edge) == 2: 175 | shared_edge_idx += [edge] 176 | self.edge_tri_idx = torch.tensor(shared_edge_idx, dtype=torch.int64, device='cuda') 177 | 178 | def eval(self, params={}): 179 | imesh = self.input.eval(params) 180 | 181 | # Compute face normals 182 | v0 = imesh.v_pos[imesh.t_pos_idx[:, 0], :] 183 | v1 = imesh.v_pos[imesh.t_pos_idx[:, 1], :] 184 | v2 = imesh.v_pos[imesh.t_pos_idx[:, 2], :] 185 | face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0)) 186 | 187 | # Fetch normals for both faces sharind an edge 188 | n0 = face_normals[self.edge_tri_idx[:, 0], :] 189 | n1 = face_normals[self.edge_tri_idx[:, 1], :] 190 | 191 | # Compute error metric based on normal difference 192 | term = torch.clamp(util.dot(n0, n1), min=-1.0, max=1.0) 193 | term = (1.0 - term) * 0.5 194 | 195 | return torch.mean(torch.abs(term)) 196 | 197 | return mesh_op_face_normal_regularizer(opt_mesh) 198 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/render.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import torch 11 | import torch.nn.functional as F 12 | import nvdiffrast.torch as dr 13 | 14 | from . import util 15 | from . import mesh 16 | from . import renderutils as ru 17 | 18 | # ============================================================================================== 19 | # Helper functions 20 | # ============================================================================================== 21 | def interpolate(attr, rast, attr_idx, rast_db=None): 22 | return dr.interpolate(attr.contiguous(), rast, attr_idx, rast_db=rast_db, diff_attrs=None if rast_db is None else 'all') 23 | 24 | # ============================================================================================== 25 | # pixel shader 26 | # ============================================================================================== 27 | def shade( 28 | gb_pos, 29 | gb_geometric_normal, 30 | gb_normal, 31 | gb_tangent, 32 | gb_texc, 33 | gb_texc_deriv, 34 | view_pos, 35 | light_pos, 36 | light_power, 37 | material, 38 | min_roughness 39 | ): 40 | 41 | ################################################################################ 42 | # Texture lookups 43 | ################################################################################ 44 | 45 | kd = material['kd'].sample(gb_texc, gb_texc_deriv) 46 | ks = material['ks'].sample(gb_texc, gb_texc_deriv)[..., 0:3] # skip alpha 47 | perturbed_nrm = None 48 | if 'normal' in material: 49 | perturbed_nrm = material['normal'].sample(gb_texc, gb_texc_deriv) 50 | 51 | gb_normal = ru.prepare_shading_normal(gb_pos, view_pos, perturbed_nrm, gb_normal, gb_tangent, gb_geometric_normal, two_sided_shading=True, opengl=True) 52 | 53 | # Separate kd into alpha and color, default alpha = 1 54 | alpha = kd[..., 3:4] if kd.shape[-1] == 4 else torch.ones_like(kd[..., 0:1]) 55 | kd = kd[..., 0:3] 56 | 57 | ################################################################################ 58 | # Evaluate BSDF 59 | ################################################################################ 60 | 61 | assert 'bsdf' in material, "Material must specify a BSDF type" 62 | if material['bsdf'] == 'pbr': 63 | shaded_col = ru.pbr_bsdf(kd, ks, gb_pos, gb_normal, view_pos, light_pos, min_roughness) * light_power 64 | elif material['bsdf'] == 'diffuse': 65 | shaded_col = kd * ru.lambert(gb_normal, util.safe_normalize(light_pos - gb_pos)) * light_power 66 | elif material['bsdf'] == 'normal': 67 | shaded_col = (gb_normal + 1.0)*0.5 68 | elif material['bsdf'] == 'tangent': 69 | shaded_col = (gb_tangent + 1.0)*0.5 70 | else: 71 | assert False, "Invalid BSDF '%s'" % material['bsdf'] 72 | 73 | out = torch.cat((shaded_col, alpha), dim=-1) 74 | 75 | return out 76 | 77 | # ============================================================================================== 78 | # Render a depth slice of the mesh (scene), some limitations: 79 | # - Single mesh 80 | # - Single light 81 | # - Single material 82 | # ============================================================================================== 83 | def render_layer( 84 | rast, 85 | rast_deriv, 86 | mesh, 87 | view_pos, 88 | light_pos, 89 | light_power, 90 | resolution, 91 | min_roughness, 92 | spp, 93 | msaa 94 | ): 95 | 96 | full_res = resolution*spp 97 | 98 | ################################################################################ 99 | # Rasterize 100 | ################################################################################ 101 | 102 | # Scale down to shading resolution when MSAA is enabled, otherwise shade at full resolution 103 | if spp > 1 and msaa: 104 | rast_out_s = util.scale_img_nhwc(rast, [resolution, resolution], mag='nearest', min='nearest') 105 | rast_out_deriv_s = util.scale_img_nhwc(rast_deriv, [resolution, resolution], mag='nearest', min='nearest') * spp 106 | else: 107 | rast_out_s = rast 108 | rast_out_deriv_s = rast_deriv 109 | 110 | ################################################################################ 111 | # Interpolate attributes 112 | ################################################################################ 113 | 114 | # Interpolate world space position 115 | gb_pos, _ = interpolate(mesh.v_pos[None, ...], rast_out_s, mesh.t_pos_idx.int()) 116 | 117 | # Compute geometric normals. We need those because of bent normals trick (for bump mapping) 118 | v0 = mesh.v_pos[mesh.t_pos_idx[:, 0], :] 119 | v1 = mesh.v_pos[mesh.t_pos_idx[:, 1], :] 120 | v2 = mesh.v_pos[mesh.t_pos_idx[:, 2], :] 121 | face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0)) 122 | face_normal_indices = (torch.arange(0, face_normals.shape[0], dtype=torch.int64, device='cuda')[:, None]).repeat(1, 3) 123 | gb_geometric_normal, _ = interpolate(face_normals[None, ...], rast_out_s, face_normal_indices.int()) 124 | 125 | # Compute tangent space 126 | assert mesh.v_nrm is not None and mesh.v_tng is not None 127 | gb_normal, _ = interpolate(mesh.v_nrm[None, ...], rast_out_s, mesh.t_nrm_idx.int()) 128 | gb_tangent, _ = interpolate(mesh.v_tng[None, ...], rast_out_s, mesh.t_tng_idx.int()) # Interpolate tangents 129 | 130 | # Texure coordinate 131 | assert mesh.v_tex is not None 132 | gb_texc, gb_texc_deriv = interpolate(mesh.v_tex[None, ...], rast_out_s, mesh.t_tex_idx.int(), rast_db=rast_out_deriv_s) 133 | 134 | ################################################################################ 135 | # Shade 136 | ################################################################################ 137 | 138 | color = shade(gb_pos, gb_geometric_normal, gb_normal, gb_tangent, gb_texc, gb_texc_deriv, 139 | view_pos, light_pos, light_power, mesh.material, min_roughness) 140 | 141 | ################################################################################ 142 | # Prepare output 143 | ################################################################################ 144 | 145 | # Scale back up to visibility resolution if using MSAA 146 | if spp > 1 and msaa: 147 | color = util.scale_img_nhwc(color, [full_res, full_res], mag='nearest', min='nearest') 148 | 149 | # Return color & raster output for peeling 150 | return color 151 | 152 | 153 | # ============================================================================================== 154 | # Render a depth peeled mesh (scene), some limitations: 155 | # - Single mesh 156 | # - Single light 157 | # - Single material 158 | # ============================================================================================== 159 | def render_mesh( 160 | ctx, 161 | mesh, 162 | mtx_in, 163 | view_pos, 164 | light_pos, 165 | light_power, 166 | resolution, 167 | spp = 1, 168 | num_layers = 1, 169 | msaa = False, 170 | background = None, 171 | antialias = True, 172 | min_roughness = 0.08, 173 | return_rast_map = False, 174 | ): 175 | assert not (return_rast_map and num_layers > 1) 176 | 177 | def prepare_input_vector(x): 178 | x = torch.tensor(x, dtype=torch.float32, device='cuda') if not torch.is_tensor(x) else x 179 | return x[:, None, None, :] if len(x.shape) == 2 else x 180 | 181 | full_res = resolution*spp 182 | 183 | # Convert numpy arrays to torch tensors 184 | mtx_in = torch.tensor(mtx_in, dtype=torch.float32, device='cuda') if not torch.is_tensor(mtx_in) else mtx_in 185 | light_pos = prepare_input_vector(light_pos) 186 | light_power = prepare_input_vector(light_power) 187 | view_pos = prepare_input_vector(view_pos) 188 | 189 | # clip space transform 190 | v_pos_clip = ru.xfm_points(mesh.v_pos[None, ...], mtx_in) 191 | 192 | # Render all layers front-to-back 193 | layers = [] 194 | with dr.DepthPeeler(ctx, v_pos_clip, mesh.t_pos_idx.int(), [resolution*spp, resolution*spp]) as peeler: 195 | for _ in range(num_layers): 196 | rast, db = peeler.rasterize_next_layer() 197 | layers += [(render_layer(rast, db, mesh, view_pos, light_pos, light_power, resolution, min_roughness, spp, msaa), rast)] 198 | 199 | if return_rast_map: 200 | return rast.detach() 201 | 202 | # Clear to background layer 203 | if background is not None: 204 | assert background.shape[1] == resolution and background.shape[2] == resolution 205 | if spp > 1: 206 | background = util.scale_img_nhwc(background, [full_res, full_res], mag='nearest', min='nearest') 207 | accum_col = background 208 | else: 209 | accum_col = torch.zeros(size=(1, full_res, full_res, 3), dtype=torch.float32, device='cuda') 210 | 211 | # Composite BACK-TO-FRONT 212 | for color, rast in reversed(layers): 213 | alpha = (rast[..., -1:] > 0) * color[..., 3:4] 214 | accum_col = torch.lerp(accum_col, color[..., 0:3], alpha) 215 | if antialias: 216 | accum_col = dr.antialias(accum_col.contiguous(), rast, v_pos_clip, mesh.t_pos_idx.int()) # TODO: need to support bfloat16 217 | 218 | # Downscale to framebuffer resolution. Use avg pooling 219 | out = util.avg_pool_nhwc(accum_col, spp) if spp > 1 else accum_col 220 | 221 | return out 222 | 223 | 224 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from .ops import xfm_points, xfm_vectors, image_loss, prepare_shading_normal, lambert, pbr_specular, pbr_bsdf, _fresnel_shlick, _ndf_ggx, _lambda_ggx, _masking_smith 10 | __all__ = ["xfm_vectors", "xfm_points", "image_loss", "prepare_shading_normal", "lambert", "pbr_specular", "pbr_bsdf", "_fresnel_shlick", "_ndf_ggx", "_lambda_ggx", "_masking_smith", ] 11 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/bsdf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import math 10 | import torch 11 | 12 | NORMAL_THRESHOLD = 0.1 13 | 14 | ################################################################################ 15 | # Vector utility functions 16 | ################################################################################ 17 | 18 | def _dot(x, y): 19 | return torch.sum(x*y, -1, keepdim=True) 20 | 21 | def _reflect(x, n): 22 | return 2*_dot(x, n)*n - x 23 | 24 | def _safe_normalize(x): 25 | return torch.nn.functional.normalize(x, dim = -1) 26 | 27 | def _bend_normal(view_vec, smooth_nrm, geom_nrm, two_sided_shading): 28 | # Swap normal direction for backfacing surfaces 29 | if two_sided_shading: 30 | smooth_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, smooth_nrm, -smooth_nrm) 31 | geom_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, geom_nrm, -geom_nrm) 32 | 33 | t = torch.clamp(_dot(view_vec, smooth_nrm) / NORMAL_THRESHOLD, min=0, max=1) 34 | return torch.lerp(geom_nrm, smooth_nrm, t) 35 | 36 | 37 | def _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl): 38 | smooth_bitang = _safe_normalize(torch.cross(smooth_tng, smooth_nrm)) 39 | if opengl: 40 | shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] - smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0) 41 | else: 42 | shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] + smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0) 43 | return _safe_normalize(shading_nrm) 44 | 45 | def bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl): 46 | smooth_nrm = _safe_normalize(smooth_nrm) 47 | smooth_tng = _safe_normalize(smooth_tng) 48 | view_vec = _safe_normalize(view_pos - pos) 49 | shading_nrm = _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl) 50 | return _bend_normal(view_vec, shading_nrm, geom_nrm, two_sided_shading) 51 | 52 | ################################################################################ 53 | # Simple lambertian diffuse BSDF 54 | ################################################################################ 55 | 56 | def bsdf_lambert(nrm, wi): 57 | return torch.clamp(_dot(nrm, wi), min=0.0) / math.pi 58 | 59 | ################################################################################ 60 | # Phong specular, loosely based on mitsuba implementation 61 | ################################################################################ 62 | 63 | def bsdf_phong(nrm, wo, wi, N): 64 | dp_r = torch.clamp(_dot(_reflect(wo, nrm), wi), min=0.0, max=1.0) 65 | dp_l = torch.clamp(_dot(nrm, wi), min=0.0, max=1.0) 66 | return (dp_r ** N) * dp_l * (N + 2) / (2 * math.pi) 67 | 68 | ################################################################################ 69 | # PBR's implementation of GGX specular 70 | ################################################################################ 71 | 72 | specular_epsilon = 1e-4 73 | 74 | def bsdf_fresnel_shlick(f0, f90, cosTheta): 75 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon) 76 | return f0 + (f90 - f0) * (1.0 - _cosTheta) ** 5.0 77 | 78 | def bsdf_ndf_ggx(alphaSqr, cosTheta): 79 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon) 80 | d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1 81 | return alphaSqr / (d * d * math.pi) 82 | 83 | def bsdf_lambda_ggx(alphaSqr, cosTheta): 84 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon) 85 | cosThetaSqr = _cosTheta * _cosTheta 86 | tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr 87 | res = 0.5 * (torch.sqrt(1 + alphaSqr * tanThetaSqr) - 1.0) 88 | return res 89 | 90 | def bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO): 91 | lambdaI = bsdf_lambda_ggx(alphaSqr, cosThetaI) 92 | lambdaO = bsdf_lambda_ggx(alphaSqr, cosThetaO) 93 | return 1 / (1 + lambdaI + lambdaO) 94 | 95 | def bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08): 96 | _alpha = torch.clamp(alpha, min=min_roughness*min_roughness, max=1.0) 97 | alphaSqr = _alpha * _alpha 98 | 99 | h = _safe_normalize(wo + wi) 100 | woDotN = _dot(wo, nrm) 101 | wiDotN = _dot(wi, nrm) 102 | woDotH = _dot(wo, h) 103 | nDotH = _dot(nrm, h) 104 | 105 | D = bsdf_ndf_ggx(alphaSqr, nDotH) 106 | G = bsdf_masking_smith_ggx_correlated(alphaSqr, woDotN, wiDotN) 107 | F = bsdf_fresnel_shlick(col, 1, woDotH) 108 | 109 | w = F * D * G * 0.25 / torch.clamp(woDotN, min=specular_epsilon) 110 | 111 | frontfacing = (woDotN > specular_epsilon) & (wiDotN > specular_epsilon) 112 | return torch.where(frontfacing, w, torch.zeros_like(w)) 113 | 114 | def bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness=0.08): 115 | wo = _safe_normalize(view_pos - pos) 116 | wi = _safe_normalize(light_pos - pos) 117 | 118 | spec_str = arm[..., 0:1] # x component 119 | roughness = arm[..., 1:2] # y component 120 | metallic = arm[..., 2:3] # z component 121 | ks = (0.04 * (1.0 - metallic) + kd * metallic) * (1 - spec_str) 122 | kd = kd * (1.0 - metallic) 123 | 124 | diffuse = kd * bsdf_lambert(nrm, wi) 125 | specular = bsdf_pbr_specular(ks, nrm, wo, wi, roughness*roughness, min_roughness=min_roughness) 126 | return diffuse + specular 127 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/bsdf.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | #include "common.h" 12 | 13 | struct LambertKernelParams 14 | { 15 | Tensor nrm; 16 | Tensor wi; 17 | Tensor out; 18 | dim3 gridSize; 19 | }; 20 | 21 | struct FresnelShlickKernelParams 22 | { 23 | Tensor f0; 24 | Tensor f90; 25 | Tensor cosTheta; 26 | Tensor out; 27 | dim3 gridSize; 28 | }; 29 | 30 | struct NdfGGXParams 31 | { 32 | Tensor alphaSqr; 33 | Tensor cosTheta; 34 | Tensor out; 35 | dim3 gridSize; 36 | }; 37 | 38 | struct MaskingSmithParams 39 | { 40 | Tensor alphaSqr; 41 | Tensor cosThetaI; 42 | Tensor cosThetaO; 43 | Tensor out; 44 | dim3 gridSize; 45 | }; 46 | 47 | struct PbrSpecular 48 | { 49 | Tensor col; 50 | Tensor nrm; 51 | Tensor wo; 52 | Tensor wi; 53 | Tensor alpha; 54 | Tensor out; 55 | dim3 gridSize; 56 | float min_roughness; 57 | }; 58 | 59 | struct PbrBSDF 60 | { 61 | Tensor kd; 62 | Tensor arm; 63 | Tensor pos; 64 | Tensor nrm; 65 | Tensor view_pos; 66 | Tensor light_pos; 67 | Tensor out; 68 | dim3 gridSize; 69 | float min_roughness; 70 | }; 71 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/common.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | #include 11 | 12 | //------------------------------------------------------------------------ 13 | // Block and grid size calculators for kernel launches. 14 | 15 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims) 16 | { 17 | int maxThreads = maxWidth * maxHeight; 18 | if (maxThreads <= 1 || (dims.x * dims.y) <= 1) 19 | return dim3(1, 1, 1); // Degenerate. 20 | 21 | // Start from max size. 22 | int bw = maxWidth; 23 | int bh = maxHeight; 24 | 25 | // Optimizations for weirdly sized buffers. 26 | if (dims.x < bw) 27 | { 28 | // Decrease block width to smallest power of two that covers the buffer width. 29 | while ((bw >> 1) >= dims.x) 30 | bw >>= 1; 31 | 32 | // Maximize height. 33 | bh = maxThreads / bw; 34 | if (bh > dims.y) 35 | bh = dims.y; 36 | } 37 | else if (dims.y < bh) 38 | { 39 | // Halve height and double width until fits completely inside buffer vertically. 40 | while (bh > dims.y) 41 | { 42 | bh >>= 1; 43 | if (bw < dims.x) 44 | bw <<= 1; 45 | } 46 | } 47 | 48 | // Done. 49 | return dim3(bw, bh, 1); 50 | } 51 | 52 | // returns the size of a block that can be reduced using horizontal SIMD operations (e.g. __shfl_xor_sync) 53 | dim3 getWarpSize(dim3 blockSize) 54 | { 55 | return dim3( 56 | std::min(blockSize.x, 32u), 57 | std::min(std::max(32u / blockSize.x, 1u), std::min(32u, blockSize.y)), 58 | std::min(std::max(32u / (blockSize.x * blockSize.y), 1u), std::min(32u, blockSize.z)) 59 | ); 60 | } 61 | 62 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims) 63 | { 64 | dim3 gridSize; 65 | gridSize.x = (dims.x - 1) / blockSize.x + 1; 66 | gridSize.y = (dims.y - 1) / blockSize.y + 1; 67 | gridSize.z = (dims.z - 1) / blockSize.z + 1; 68 | return gridSize; 69 | } 70 | 71 | //------------------------------------------------------------------------ 72 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/common.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #include 11 | #include 12 | 13 | #include "vec3f.h" 14 | #include "vec4f.h" 15 | #include "tensor.h" 16 | 17 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims); 18 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims); 19 | 20 | #ifdef __CUDACC__ 21 | 22 | #ifdef _MSC_VER 23 | #define M_PI 3.14159265358979323846f 24 | #endif 25 | 26 | __host__ __device__ static inline dim3 getWarpSize(dim3 blockSize) 27 | { 28 | return dim3( 29 | min(blockSize.x, 32u), 30 | min(max(32u / blockSize.x, 1u), min(32u, blockSize.y)), 31 | min(max(32u / (blockSize.x * blockSize.y), 1u), min(32u, blockSize.z)) 32 | ); 33 | } 34 | 35 | __device__ static inline float clamp(float val, float mn, float mx) { return min(max(val, mn), mx); } 36 | #else 37 | dim3 getWarpSize(dim3 blockSize); 38 | #endif -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/loss.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | 11 | #include "common.h" 12 | #include "loss.h" 13 | 14 | //------------------------------------------------------------------------ 15 | // Utils 16 | 17 | __device__ inline float bwdAbs(float x) { return x == 0.0f ? 0.0f : x < 0.0f ? -1.0f : 1.0f; } 18 | 19 | __device__ float warpSum(float val) { 20 | for (int i = 1; i < 32; i *= 2) 21 | val += __shfl_xor_sync(0xFFFFFFFF, val, i); 22 | return val; 23 | } 24 | 25 | //------------------------------------------------------------------------ 26 | // Tonemapping 27 | 28 | __device__ inline float fwdSRGB(float x) 29 | { 30 | return x > 0.0031308f ? powf(max(x, 0.0031308f), 1.0f / 2.4f) * 1.055f - 0.055f : 12.92f * max(x, 0.0f); 31 | } 32 | 33 | __device__ inline void bwdSRGB(float x, float &d_x, float d_out) 34 | { 35 | if (x > 0.0031308f) 36 | d_x += d_out * 0.439583f / powf(x, 0.583333f); 37 | else if (x > 0.0f) 38 | d_x += d_out * 12.92f; 39 | } 40 | 41 | __device__ inline vec3f fwdTonemapLogSRGB(vec3f x) 42 | { 43 | return vec3f(fwdSRGB(logf(x.x + 1.0f)), fwdSRGB(logf(x.y + 1.0f)), fwdSRGB(logf(x.z + 1.0f))); 44 | } 45 | 46 | __device__ inline void bwdTonemapLogSRGB(vec3f x, vec3f& d_x, vec3f d_out) 47 | { 48 | if (x.x > 0.0f && x.x < 65535.0f) 49 | { 50 | bwdSRGB(logf(x.x + 1.0f), d_x.x, d_out.x); 51 | d_x.x *= 1 / (x.x + 1.0f); 52 | } 53 | if (x.y > 0.0f && x.y < 65535.0f) 54 | { 55 | bwdSRGB(logf(x.y + 1.0f), d_x.y, d_out.y); 56 | d_x.y *= 1 / (x.y + 1.0f); 57 | } 58 | if (x.z > 0.0f && x.z < 65535.0f) 59 | { 60 | bwdSRGB(logf(x.z + 1.0f), d_x.z, d_out.z); 61 | d_x.z *= 1 / (x.z + 1.0f); 62 | } 63 | } 64 | 65 | __device__ inline float fwdRELMSE(float img, float target, float eps = 0.1f) 66 | { 67 | return (img - target) * (img - target) / (img * img + target * target + eps); 68 | } 69 | 70 | __device__ inline void bwdRELMSE(float img, float target, float &d_img, float &d_target, float d_out, float eps = 0.1f) 71 | { 72 | float denom = (target * target + img * img + eps); 73 | d_img += d_out * 2 * (img - target) * (target * (target + img) + eps) / (denom * denom); 74 | d_target -= d_out * 2 * (img - target) * (img * (target + img) + eps) / (denom * denom); 75 | } 76 | 77 | __device__ inline float fwdSMAPE(float img, float target, float eps=0.01f) 78 | { 79 | return abs(img - target) / (img + target + eps); 80 | } 81 | 82 | __device__ inline void bwdSMAPE(float img, float target, float& d_img, float& d_target, float d_out, float eps = 0.01f) 83 | { 84 | float denom = (target + img + eps); 85 | d_img += d_out * bwdAbs(img - target) * (2 * target + eps) / (denom * denom); 86 | d_target -= d_out * bwdAbs(img - target) * (2 * img + eps) / (denom * denom); 87 | } 88 | 89 | //------------------------------------------------------------------------ 90 | // Kernels 91 | 92 | __global__ void imgLossFwdKernel(LossKernelParams p) 93 | { 94 | // Calculate pixel position. 95 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 96 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y; 97 | unsigned int pz = blockIdx.z; 98 | 99 | float floss = 0.0f; 100 | if (px < p.gridSize.x && py < p.gridSize.y && pz < p.gridSize.z) 101 | { 102 | vec3f img = p.img.fetch3(px, py, pz); 103 | vec3f target = p.target.fetch3(px, py, pz); 104 | 105 | img = vec3f(clamp(img.x, 0.0f, 65535.0f), clamp(img.y, 0.0f, 65535.0f), clamp(img.z, 0.0f, 65535.0f)); 106 | target = vec3f(clamp(target.x, 0.0f, 65535.0f), clamp(target.y, 0.0f, 65535.0f), clamp(target.z, 0.0f, 65535.0f)); 107 | 108 | if (p.tonemapper == TONEMAPPER_LOG_SRGB) 109 | { 110 | img = fwdTonemapLogSRGB(img); 111 | target = fwdTonemapLogSRGB(target); 112 | } 113 | 114 | vec3f vloss(0); 115 | if (p.loss == LOSS_MSE) 116 | vloss = (img - target) * (img - target); 117 | else if (p.loss == LOSS_RELMSE) 118 | vloss = vec3f(fwdRELMSE(img.x, target.x), fwdRELMSE(img.y, target.y), fwdRELMSE(img.z, target.z)); 119 | else if (p.loss == LOSS_SMAPE) 120 | vloss = vec3f(fwdSMAPE(img.x, target.x), fwdSMAPE(img.y, target.y), fwdSMAPE(img.z, target.z)); 121 | else 122 | vloss = vec3f(abs(img.x - target.x), abs(img.y - target.y), abs(img.z - target.z)); 123 | 124 | floss = sum(vloss) / 3.0f; 125 | } 126 | 127 | floss = warpSum(floss); 128 | 129 | dim3 warpSize = getWarpSize(blockDim); 130 | if (px < p.gridSize.x && py < p.gridSize.y && pz < p.gridSize.z && threadIdx.x % warpSize.x == 0 && threadIdx.y % warpSize.y == 0 && threadIdx.z % warpSize.z == 0) 131 | p.out.store(px / warpSize.x, py / warpSize.y, pz / warpSize.z, floss); 132 | } 133 | 134 | __global__ void imgLossBwdKernel(LossKernelParams p) 135 | { 136 | // Calculate pixel position. 137 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 138 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y; 139 | unsigned int pz = blockIdx.z; 140 | 141 | if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z) 142 | return; 143 | 144 | dim3 warpSize = getWarpSize(blockDim); 145 | 146 | vec3f _img = p.img.fetch3(px, py, pz); 147 | vec3f _target = p.target.fetch3(px, py, pz); 148 | float d_out = p.out.fetch1(px / warpSize.x, py / warpSize.y, pz / warpSize.z); 149 | 150 | ///////////////////////////////////////////////////////////////////// 151 | // FWD 152 | 153 | vec3f img = _img, target = _target; 154 | if (p.tonemapper == TONEMAPPER_LOG_SRGB) 155 | { 156 | img = fwdTonemapLogSRGB(img); 157 | target = fwdTonemapLogSRGB(target); 158 | } 159 | 160 | ///////////////////////////////////////////////////////////////////// 161 | // BWD 162 | 163 | vec3f d_vloss = vec3f(d_out, d_out, d_out) / 3.0f; 164 | 165 | vec3f d_img(0), d_target(0); 166 | if (p.loss == LOSS_MSE) 167 | { 168 | d_img = vec3f(d_vloss.x * 2 * (img.x - target.x), d_vloss.y * 2 * (img.y - target.y), d_vloss.x * 2 * (img.z - target.z)); 169 | d_target = -d_img; 170 | } 171 | else if (p.loss == LOSS_RELMSE) 172 | { 173 | bwdRELMSE(img.x, target.x, d_img.x, d_target.x, d_vloss.x); 174 | bwdRELMSE(img.y, target.y, d_img.y, d_target.y, d_vloss.y); 175 | bwdRELMSE(img.z, target.z, d_img.z, d_target.z, d_vloss.z); 176 | } 177 | else if (p.loss == LOSS_SMAPE) 178 | { 179 | bwdSMAPE(img.x, target.x, d_img.x, d_target.x, d_vloss.x); 180 | bwdSMAPE(img.y, target.y, d_img.y, d_target.y, d_vloss.y); 181 | bwdSMAPE(img.z, target.z, d_img.z, d_target.z, d_vloss.z); 182 | } 183 | else 184 | { 185 | d_img = d_vloss * vec3f(bwdAbs(img.x - target.x), bwdAbs(img.y - target.y), bwdAbs(img.z - target.z)); 186 | d_target = -d_img; 187 | } 188 | 189 | 190 | if (p.tonemapper == TONEMAPPER_LOG_SRGB) 191 | { 192 | vec3f d__img(0), d__target(0); 193 | bwdTonemapLogSRGB(_img, d__img, d_img); 194 | bwdTonemapLogSRGB(_target, d__target, d_target); 195 | d_img = d__img; d_target = d__target; 196 | } 197 | 198 | if (_img.x <= 0.0f || _img.x >= 65535.0f) d_img.x = 0; 199 | if (_img.y <= 0.0f || _img.y >= 65535.0f) d_img.y = 0; 200 | if (_img.z <= 0.0f || _img.z >= 65535.0f) d_img.z = 0; 201 | if (_target.x <= 0.0f || _target.x >= 65535.0f) d_target.x = 0; 202 | if (_target.y <= 0.0f || _target.y >= 65535.0f) d_target.y = 0; 203 | if (_target.z <= 0.0f || _target.z >= 65535.0f) d_target.z = 0; 204 | 205 | p.img.store_grad(px, py, pz, d_img); 206 | p.target.store_grad(px, py, pz, d_target); 207 | } -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/loss.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | #include "common.h" 12 | 13 | enum TonemapperType 14 | { 15 | TONEMAPPER_NONE = 0, 16 | TONEMAPPER_LOG_SRGB = 1 17 | }; 18 | 19 | enum LossType 20 | { 21 | LOSS_L1 = 0, 22 | LOSS_MSE = 1, 23 | LOSS_RELMSE = 2, 24 | LOSS_SMAPE = 3 25 | }; 26 | 27 | struct LossKernelParams 28 | { 29 | Tensor img; 30 | Tensor target; 31 | Tensor out; 32 | dim3 gridSize; 33 | TonemapperType tonemapper; 34 | LossType loss; 35 | }; 36 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/mesh.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | 11 | #include "common.h" 12 | #include "mesh.h" 13 | 14 | 15 | //------------------------------------------------------------------------ 16 | // Kernels 17 | 18 | __global__ void xfmPointsFwdKernel(XfmKernelParams p) 19 | { 20 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 21 | unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z; 22 | 23 | __shared__ float mtx[4][4]; 24 | if (threadIdx.x < 16) 25 | mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0)); 26 | __syncthreads(); 27 | 28 | if (px >= p.gridSize.x) 29 | return; 30 | 31 | vec3f pos( 32 | p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)), 33 | p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)), 34 | p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0)) 35 | ); 36 | 37 | if (p.isPoints) 38 | { 39 | p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0] + mtx[3][0]); 40 | p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1] + mtx[3][1]); 41 | p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2] + mtx[3][2]); 42 | p.out.store(p.out.nhwcIndex(pz, px, 3, 0), pos.x * mtx[0][3] + pos.y * mtx[1][3] + pos.z * mtx[2][3] + mtx[3][3]); 43 | } 44 | else 45 | { 46 | p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0]); 47 | p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1]); 48 | p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2]); 49 | } 50 | } 51 | 52 | __global__ void xfmPointsBwdKernel(XfmKernelParams p) 53 | { 54 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 55 | unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z; 56 | 57 | __shared__ float mtx[4][4]; 58 | if (threadIdx.x < 16) 59 | mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0)); 60 | __syncthreads(); 61 | 62 | if (px >= p.gridSize.x) 63 | return; 64 | 65 | vec3f pos( 66 | p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)), 67 | p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)), 68 | p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0)) 69 | ); 70 | 71 | vec4f d_out( 72 | p.out.fetch(p.out.nhwcIndex(pz, px, 0, 0)), 73 | p.out.fetch(p.out.nhwcIndex(pz, px, 1, 0)), 74 | p.out.fetch(p.out.nhwcIndex(pz, px, 2, 0)), 75 | p.out.fetch(p.out.nhwcIndex(pz, px, 3, 0)) 76 | ); 77 | 78 | if (p.isPoints) 79 | { 80 | p.points.store_grad(p.points._nhwcIndex(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2] + d_out.w * mtx[0][3]); 81 | p.points.store_grad(p.points._nhwcIndex(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2] + d_out.w * mtx[1][3]); 82 | p.points.store_grad(p.points._nhwcIndex(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2] + d_out.w * mtx[2][3]); 83 | } 84 | else 85 | { 86 | p.points.store_grad(p.points._nhwcIndex(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2]); 87 | p.points.store_grad(p.points._nhwcIndex(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2]); 88 | p.points.store_grad(p.points._nhwcIndex(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2]); 89 | } 90 | } -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/mesh.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | #include "common.h" 12 | 13 | struct XfmKernelParams 14 | { 15 | bool isPoints; 16 | Tensor points; 17 | Tensor matrix; 18 | Tensor out; 19 | dim3 gridSize; 20 | }; 21 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/normal.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "common.h" 10 | #include "normal.h" 11 | 12 | #define NORMAL_THRESHOLD 0.1f 13 | 14 | //------------------------------------------------------------------------ 15 | // Perturb shading normal by tangent frame 16 | 17 | __device__ vec3f fwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, bool opengl) 18 | { 19 | vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm); 20 | vec3f smooth_bitng = safeNormalize(_smooth_bitng); 21 | vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f); 22 | return safeNormalize(_shading_nrm); 23 | } 24 | 25 | __device__ void bwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, vec3f &d_perturbed_nrm, vec3f &d_smooth_nrm, vec3f &d_smooth_tng, const vec3f d_out, bool opengl) 26 | { 27 | //////////////////////////////////////////////////////////////////////// 28 | // FWD 29 | vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm); 30 | vec3f smooth_bitng = safeNormalize(_smooth_bitng); 31 | vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f); 32 | 33 | //////////////////////////////////////////////////////////////////////// 34 | // BWD 35 | vec3f d_shading_nrm(0); 36 | bwdSafeNormalize(_shading_nrm, d_shading_nrm, d_out); 37 | 38 | vec3f d_smooth_bitng(0); 39 | 40 | if (perturbed_nrm.z > 0.0f) 41 | { 42 | d_smooth_nrm += d_shading_nrm * perturbed_nrm.z; 43 | d_perturbed_nrm.z += sum(d_shading_nrm * smooth_nrm); 44 | } 45 | 46 | d_smooth_bitng += (opengl ? -1 : 1) * d_shading_nrm * perturbed_nrm.y; 47 | d_perturbed_nrm.y += (opengl ? -1 : 1) * sum(d_shading_nrm * smooth_bitng); 48 | 49 | d_smooth_tng += d_shading_nrm * perturbed_nrm.x; 50 | d_perturbed_nrm.x += sum(d_shading_nrm * smooth_tng); 51 | 52 | vec3f d__smooth_bitng(0); 53 | bwdSafeNormalize(_smooth_bitng, d__smooth_bitng, d_smooth_bitng); 54 | 55 | bwdCross(smooth_tng, smooth_nrm, d_smooth_tng, d_smooth_nrm, d__smooth_bitng); 56 | } 57 | 58 | //------------------------------------------------------------------------ 59 | #define bent_nrm_eps 0.001f 60 | 61 | __device__ vec3f fwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm) 62 | { 63 | float dp = dot(view_vec, smooth_nrm); 64 | float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f); 65 | return geom_nrm * (1.0f - t) + smooth_nrm * t; 66 | } 67 | 68 | __device__ void bwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm, vec3f& d_view_vec, vec3f& d_smooth_nrm, vec3f& d_geom_nrm, const vec3f d_out) 69 | { 70 | //////////////////////////////////////////////////////////////////////// 71 | // FWD 72 | float dp = dot(view_vec, smooth_nrm); 73 | float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f); 74 | 75 | //////////////////////////////////////////////////////////////////////// 76 | // BWD 77 | if (dp > NORMAL_THRESHOLD) 78 | d_smooth_nrm += d_out; 79 | else 80 | { 81 | // geom_nrm * (1.0f - t) + smooth_nrm * t; 82 | d_geom_nrm += d_out * (1.0f - t); 83 | d_smooth_nrm += d_out * t; 84 | float d_t = sum(d_out * (smooth_nrm - geom_nrm)); 85 | 86 | float d_dp = dp < 0.0f || dp > NORMAL_THRESHOLD ? 0.0f : d_t / NORMAL_THRESHOLD; 87 | 88 | bwdDot(view_vec, smooth_nrm, d_view_vec, d_smooth_nrm, d_dp); 89 | } 90 | } 91 | 92 | //------------------------------------------------------------------------ 93 | // Kernels 94 | 95 | __global__ void PrepareShadingNormalFwdKernel(PrepareShadingNormalKernelParams p) 96 | { 97 | // Calculate pixel position. 98 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 99 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y; 100 | unsigned int pz = blockIdx.z; 101 | if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z) 102 | return; 103 | 104 | vec3f pos = p.pos.fetch3(px, py, pz); 105 | vec3f view_pos = p.view_pos.fetch3(px, py, pz); 106 | vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz); 107 | vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz); 108 | vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz); 109 | vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz); 110 | 111 | vec3f smooth_nrm = safeNormalize(_smooth_nrm); 112 | vec3f smooth_tng = safeNormalize(_smooth_tng); 113 | vec3f view_vec = safeNormalize(view_pos - pos); 114 | vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl); 115 | 116 | vec3f res; 117 | if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f) 118 | res = fwdBendNormal(view_vec, -shading_nrm, -geom_nrm); 119 | else 120 | res = fwdBendNormal(view_vec, shading_nrm, geom_nrm); 121 | 122 | p.out.store(px, py, pz, res); 123 | } 124 | 125 | __global__ void PrepareShadingNormalBwdKernel(PrepareShadingNormalKernelParams p) 126 | { 127 | // Calculate pixel position. 128 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 129 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y; 130 | unsigned int pz = blockIdx.z; 131 | if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z) 132 | return; 133 | 134 | vec3f pos = p.pos.fetch3(px, py, pz); 135 | vec3f view_pos = p.view_pos.fetch3(px, py, pz); 136 | vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz); 137 | vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz); 138 | vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz); 139 | vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz); 140 | vec3f d_out = p.out.fetch3(px, py, pz); 141 | 142 | /////////////////////////////////////////////////////////////////////////////////////////////////// 143 | // FWD 144 | 145 | vec3f smooth_nrm = safeNormalize(_smooth_nrm); 146 | vec3f smooth_tng = safeNormalize(_smooth_tng); 147 | vec3f _view_vec = view_pos - pos; 148 | vec3f view_vec = safeNormalize(view_pos - pos); 149 | 150 | vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl); 151 | 152 | /////////////////////////////////////////////////////////////////////////////////////////////////// 153 | // BWD 154 | 155 | vec3f d_view_vec(0), d_shading_nrm(0), d_geom_nrm(0); 156 | if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f) 157 | { 158 | bwdBendNormal(view_vec, -shading_nrm, -geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out); 159 | d_shading_nrm = -d_shading_nrm; 160 | d_geom_nrm = -d_geom_nrm; 161 | } 162 | else 163 | bwdBendNormal(view_vec, shading_nrm, geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out); 164 | 165 | vec3f d_perturbed_nrm(0), d_smooth_nrm(0), d_smooth_tng(0); 166 | bwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, d_perturbed_nrm, d_smooth_nrm, d_smooth_tng, d_shading_nrm, p.opengl); 167 | 168 | vec3f d__view_vec(0), d__smooth_nrm(0), d__smooth_tng(0); 169 | bwdSafeNormalize(_view_vec, d__view_vec, d_view_vec); 170 | bwdSafeNormalize(_smooth_nrm, d__smooth_nrm, d_smooth_nrm); 171 | bwdSafeNormalize(_smooth_tng, d__smooth_tng, d_smooth_tng); 172 | 173 | p.pos.store_grad(px, py, pz, -d__view_vec); 174 | p.view_pos.store_grad(px, py, pz, d__view_vec); 175 | p.perturbed_nrm.store_grad(px, py, pz, d_perturbed_nrm); 176 | p.smooth_nrm.store_grad(px, py, pz, d__smooth_nrm); 177 | p.smooth_tng.store_grad(px, py, pz, d__smooth_tng); 178 | p.geom_nrm.store_grad(px, py, pz, d_geom_nrm); 179 | } -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/normal.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | #include "common.h" 12 | 13 | struct PrepareShadingNormalKernelParams 14 | { 15 | Tensor pos; 16 | Tensor view_pos; 17 | Tensor perturbed_nrm; 18 | Tensor smooth_nrm; 19 | Tensor smooth_tng; 20 | Tensor geom_nrm; 21 | Tensor out; 22 | dim3 gridSize; 23 | bool two_sided_shading, opengl; 24 | }; 25 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/tensor.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #if defined(__CUDACC__) && defined(BFLOAT16) 11 | #include // bfloat16 is float32 compatible with less mantissa bits 12 | #endif 13 | 14 | //--------------------------------------------------------------------------------- 15 | // CUDA-side Tensor class for in/out parameter parsing. Can be float32 or bfloat16 16 | 17 | struct Tensor 18 | { 19 | void* val; 20 | void* d_val; 21 | int dims[4]; 22 | int strides[4]; 23 | bool fp16; 24 | Tensor() : val(nullptr), d_val(nullptr), fp16(true), dims{ 0, 0, 0, 0 }, strides{ 0, 0, 0, 0 } {} 25 | 26 | #ifdef __CUDACC__ 27 | // Helpers to index and read/write a single element 28 | __device__ inline int _nhwcIndex(int n, int h, int w, int c) const { return n * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]; } 29 | __device__ inline int nhwcIndex(int n, int h, int w, int c) const { return (dims[0] == 1 ? 0 : n * strides[0]) + (dims[1] == 1 ? 0 : h * strides[1]) + (dims[2] == 1 ? 0 : w * strides[2]) + (dims[3] == 1 ? 0 : c * strides[3]); } 30 | __device__ inline int nhwcIndexContinuous(int n, int h, int w, int c) const { return ((n * dims[1] + h) * dims[2] + w) * dims[3] + c; } 31 | #ifdef BFLOAT16 32 | __device__ inline float fetch(unsigned int idx) const { return fp16 ? __bfloat162float(((__nv_bfloat16*)val)[idx]) : ((float*)val)[idx]; } 33 | __device__ inline void store(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)val)[idx] = __float2bfloat16(_val); else ((float*)val)[idx] = _val; } 34 | __device__ inline void store_grad(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)d_val)[idx] = __float2bfloat16(_val); else ((float*)d_val)[idx] = _val; } 35 | #else 36 | __device__ inline float fetch(unsigned int idx) const { return ((float*)val)[idx]; } 37 | __device__ inline void store(unsigned int idx, float _val) { ((float*)val)[idx] = _val; } 38 | __device__ inline void store_grad(unsigned int idx, float _val) { ((float*)d_val)[idx] = _val; } 39 | #endif 40 | 41 | ////////////////////////////////////////////////////////////////////////////////////////// 42 | // Fetch, use broadcasting for tensor dimensions of size 1 43 | __device__ inline float fetch1(unsigned int x, unsigned int y, unsigned int z) const 44 | { 45 | return fetch(nhwcIndex(z, y, x, 0)); 46 | } 47 | 48 | __device__ inline vec3f fetch3(unsigned int x, unsigned int y, unsigned int z) const 49 | { 50 | return vec3f( 51 | fetch(nhwcIndex(z, y, x, 0)), 52 | fetch(nhwcIndex(z, y, x, 1)), 53 | fetch(nhwcIndex(z, y, x, 2)) 54 | ); 55 | } 56 | 57 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////// 58 | // Store, no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside 59 | __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, float _val) 60 | { 61 | store(_nhwcIndex(z, y, x, 0), _val); 62 | } 63 | 64 | __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, vec3f _val) 65 | { 66 | store(_nhwcIndex(z, y, x, 0), _val.x); 67 | store(_nhwcIndex(z, y, x, 1), _val.y); 68 | store(_nhwcIndex(z, y, x, 2), _val.z); 69 | } 70 | 71 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////// 72 | // Store gradient , no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside 73 | __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, float _val) 74 | { 75 | store_grad(nhwcIndexContinuous(z, y, x, 0), _val); 76 | } 77 | 78 | __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, vec3f _val) 79 | { 80 | store_grad(nhwcIndexContinuous(z, y, x, 0), _val.x); 81 | store_grad(nhwcIndexContinuous(z, y, x, 1), _val.y); 82 | store_grad(nhwcIndexContinuous(z, y, x, 2), _val.z); 83 | } 84 | #endif 85 | 86 | }; 87 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/vec3f.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | struct vec3f 12 | { 13 | float x, y, z; 14 | 15 | #ifdef __CUDACC__ 16 | __device__ vec3f() { } 17 | __device__ vec3f(float v) { x = v; y = v; z = v; } 18 | __device__ vec3f(float _x, float _y, float _z) { x = _x; y = _y; z = _z; } 19 | __device__ vec3f(float3 v) { x = v.x; y = v.y; z = v.z; } 20 | 21 | __device__ inline vec3f& operator+=(const vec3f& b) { x += b.x; y += b.y; z += b.z; return *this; } 22 | __device__ inline vec3f& operator-=(const vec3f& b) { x -= b.x; y -= b.y; z -= b.z; return *this; } 23 | __device__ inline vec3f& operator*=(const vec3f& b) { x *= b.x; y *= b.y; z *= b.z; return *this; } 24 | __device__ inline vec3f& operator/=(const vec3f& b) { x /= b.x; y /= b.y; z /= b.z; return *this; } 25 | #endif 26 | }; 27 | 28 | #ifdef __CUDACC__ 29 | __device__ static inline vec3f operator+(const vec3f& a, const vec3f& b) { return vec3f(a.x + b.x, a.y + b.y, a.z + b.z); } 30 | __device__ static inline vec3f operator-(const vec3f& a, const vec3f& b) { return vec3f(a.x - b.x, a.y - b.y, a.z - b.z); } 31 | __device__ static inline vec3f operator*(const vec3f& a, const vec3f& b) { return vec3f(a.x * b.x, a.y * b.y, a.z * b.z); } 32 | __device__ static inline vec3f operator/(const vec3f& a, const vec3f& b) { return vec3f(a.x / b.x, a.y / b.y, a.z / b.z); } 33 | __device__ static inline vec3f operator-(const vec3f& a) { return vec3f(-a.x, -a.y, -a.z); } 34 | 35 | __device__ static inline float sum(vec3f a) 36 | { 37 | return a.x + a.y + a.z; 38 | } 39 | 40 | __device__ static inline vec3f cross(vec3f a, vec3f b) 41 | { 42 | vec3f out; 43 | out.x = a.y * b.z - a.z * b.y; 44 | out.y = a.z * b.x - a.x * b.z; 45 | out.z = a.x * b.y - a.y * b.x; 46 | return out; 47 | } 48 | 49 | __device__ static inline void bwdCross(vec3f a, vec3f b, vec3f &d_a, vec3f &d_b, vec3f d_out) 50 | { 51 | d_a.x += d_out.z * b.y - d_out.y * b.z; 52 | d_a.y += d_out.x * b.z - d_out.z * b.x; 53 | d_a.z += d_out.y * b.x - d_out.x * b.y; 54 | 55 | d_b.x += d_out.y * a.z - d_out.z * a.y; 56 | d_b.y += d_out.z * a.x - d_out.x * a.z; 57 | d_b.z += d_out.x * a.y - d_out.y * a.x; 58 | } 59 | 60 | __device__ static inline float dot(vec3f a, vec3f b) 61 | { 62 | return a.x * b.x + a.y * b.y + a.z * b.z; 63 | } 64 | 65 | __device__ static inline void bwdDot(vec3f a, vec3f b, vec3f& d_a, vec3f& d_b, float d_out) 66 | { 67 | d_a.x += d_out * b.x; d_a.y += d_out * b.y; d_a.z += d_out * b.z; 68 | d_b.x += d_out * a.x; d_b.y += d_out * a.y; d_b.z += d_out * a.z; 69 | } 70 | 71 | __device__ static inline vec3f reflect(vec3f x, vec3f n) 72 | { 73 | return n * 2.0f * dot(n, x) - x; 74 | } 75 | 76 | __device__ static inline void bwdReflect(vec3f x, vec3f n, vec3f& d_x, vec3f& d_n, const vec3f d_out) 77 | { 78 | d_x.x += d_out.x * (2 * n.x * n.x - 1) + d_out.y * (2 * n.x * n.y) + d_out.z * (2 * n.x * n.z); 79 | d_x.y += d_out.x * (2 * n.x * n.y) + d_out.y * (2 * n.y * n.y - 1) + d_out.z * (2 * n.y * n.z); 80 | d_x.z += d_out.x * (2 * n.x * n.z) + d_out.y * (2 * n.y * n.z) + d_out.z * (2 * n.z * n.z - 1); 81 | 82 | d_n.x += d_out.x * (2 * (2 * n.x * x.x + n.y * x.y + n.z * x.z)) + d_out.y * (2 * n.y * x.x) + d_out.z * (2 * n.z * x.x); 83 | d_n.y += d_out.x * (2 * n.x * x.y) + d_out.y * (2 * (n.x * x.x + 2 * n.y * x.y + n.z * x.z)) + d_out.z * (2 * n.z * x.y); 84 | d_n.z += d_out.x * (2 * n.x * x.z) + d_out.y * (2 * n.y * x.z) + d_out.z * (2 * (n.x * x.x + n.y * x.y + 2 * n.z * x.z)); 85 | } 86 | 87 | __device__ static inline vec3f safeNormalize(vec3f v) 88 | { 89 | float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); 90 | return l > 0.0f ? (v / l) : vec3f(0.0f); 91 | } 92 | 93 | __device__ static inline void bwdSafeNormalize(const vec3f v, vec3f& d_v, const vec3f d_out) 94 | { 95 | 96 | float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); 97 | if (l > 0.0f) 98 | { 99 | float fac = 1.0 / powf(v.x * v.x + v.y * v.y + v.z * v.z, 1.5f); 100 | d_v.x += (d_out.x * (v.y * v.y + v.z * v.z) - d_out.y * (v.x * v.y) - d_out.z * (v.x * v.z)) * fac; 101 | d_v.y += (d_out.y * (v.x * v.x + v.z * v.z) - d_out.x * (v.y * v.x) - d_out.z * (v.y * v.z)) * fac; 102 | d_v.z += (d_out.z * (v.x * v.x + v.y * v.y) - d_out.x * (v.z * v.x) - d_out.y * (v.z * v.y)) * fac; 103 | } 104 | } 105 | 106 | #endif -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/c_src/vec4f.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | struct vec4f 12 | { 13 | float x, y, z, w; 14 | 15 | #ifdef __CUDACC__ 16 | __device__ vec4f() { } 17 | __device__ vec4f(float v) { x = v; y = v; z = v; w = v; } 18 | __device__ vec4f(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; } 19 | __device__ vec4f(float4 v) { x = v.x; y = v.y; z = v.z; w = v.w; } 20 | #endif 21 | }; 22 | 23 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import torch 10 | 11 | #---------------------------------------------------------------------------- 12 | # HDR image losses 13 | #---------------------------------------------------------------------------- 14 | 15 | def _tonemap_srgb(f): 16 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f) 17 | 18 | def _SMAPE(img, target, eps=0.01): 19 | nom = torch.abs(img - target) 20 | denom = torch.abs(img) + torch.abs(target) + 0.01 21 | return torch.mean(nom / denom) 22 | 23 | def _RELMSE(img, target, eps=0.1): 24 | nom = (img - target) * (img - target) 25 | denom = img * img + target * target + 0.1 26 | return torch.mean(nom / denom) 27 | 28 | def image_loss_fn(img, target, loss, tonemapper): 29 | if tonemapper == 'log_srgb': 30 | img = _tonemap_srgb(torch.log(torch.clamp(img, min=0, max=65535) + 1)) 31 | target = _tonemap_srgb(torch.log(torch.clamp(target, min=0, max=65535) + 1)) 32 | 33 | if loss == 'mse': 34 | return torch.nn.functional.mse_loss(img, target) 35 | elif loss == 'smape': 36 | return _SMAPE(img, target) 37 | elif loss == 'relmse': 38 | return _RELMSE(img, target) 39 | else: 40 | return torch.nn.functional.l1_loss(img, target) 41 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import os 11 | import sys 12 | import torch 13 | import torch.utils.cpp_extension 14 | 15 | from .bsdf import * 16 | from .loss import * 17 | 18 | #---------------------------------------------------------------------------- 19 | # C++/Cuda plugin compiler/loader. 20 | 21 | _plugin = None 22 | if _plugin is None: 23 | 24 | # Make sure we can find the necessary compiler and libary binaries. 25 | if os.name == 'nt': 26 | def find_cl_path(): 27 | import glob 28 | for edition in ['Enterprise', 'Professional', 'BuildTools', 'Community']: 29 | paths = sorted(glob.glob(r"C:\Program Files (x86)\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition), reverse=True) 30 | if paths: 31 | return paths[0] 32 | 33 | # If cl.exe is not on path, try to find it. 34 | if os.system("where cl.exe >nul 2>nul") != 0: 35 | cl_path = find_cl_path() 36 | if cl_path is None: 37 | raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") 38 | os.environ['PATH'] += ';' + cl_path 39 | 40 | # Linker options. 41 | if os.name == 'posix': 42 | ldflags = ['-lcuda'] 43 | elif os.name == 'nt': 44 | ldflags = ['/DEFAULTLIB:cuda'] 45 | 46 | # List of sources. 47 | source_files = [ 48 | 'c_src/mesh.cu', 49 | 'c_src/loss.cu', 50 | 'c_src/bsdf.cu', 51 | 'c_src/normal.cu', 52 | 'c_src/common.cpp', 53 | 'c_src/torch_bindings.cpp' 54 | ] 55 | 56 | # Some containers set this to contain old architectures that won't compile. We only need the one installed in the machine. 57 | os.environ['TORCH_CUDA_ARCH_LIST'] = '' 58 | 59 | # Compile and load. 60 | source_paths = [os.path.join(os.path.dirname(__file__), fn) for fn in source_files] 61 | torch.utils.cpp_extension.load(name='renderutils_plugin', sources=source_paths, extra_ldflags=ldflags, with_cuda=True, verbose=True) 62 | 63 | # Import, cache, and return the compiled module. 64 | import renderutils_plugin 65 | _plugin = renderutils_plugin 66 | 67 | #---------------------------------------------------------------------------- 68 | # Internal kernels, just used for testing functionality 69 | 70 | class _fresnel_shlick_func(torch.autograd.Function): 71 | @staticmethod 72 | def forward(ctx, f0, f90, cosTheta): 73 | out = _plugin.fresnel_shlick_fwd(f0, f90, cosTheta, False) 74 | ctx.save_for_backward(f0, f90, cosTheta) 75 | return out 76 | 77 | @staticmethod 78 | def backward(ctx, dout): 79 | f0, f90, cosTheta = ctx.saved_variables 80 | return _plugin.fresnel_shlick_bwd(f0, f90, cosTheta, dout) + (None,) 81 | 82 | def _fresnel_shlick(f0, f90, cosTheta, use_python=False): 83 | if use_python: 84 | out = bsdf_fresnel_shlick(f0, f90, cosTheta) 85 | else: 86 | out = _fresnel_shlick_func.apply(f0, f90, cosTheta) 87 | 88 | if torch.is_anomaly_enabled(): 89 | assert torch.all(torch.isfinite(out)), "Output of _fresnel_shlick contains inf or NaN" 90 | return out 91 | 92 | 93 | class _ndf_ggx_func(torch.autograd.Function): 94 | @staticmethod 95 | def forward(ctx, alphaSqr, cosTheta): 96 | out = _plugin.ndf_ggx_fwd(alphaSqr, cosTheta, False) 97 | ctx.save_for_backward(alphaSqr, cosTheta) 98 | return out 99 | 100 | @staticmethod 101 | def backward(ctx, dout): 102 | alphaSqr, cosTheta = ctx.saved_variables 103 | return _plugin.ndf_ggx_bwd(alphaSqr, cosTheta, dout) + (None,) 104 | 105 | def _ndf_ggx(alphaSqr, cosTheta, use_python=False): 106 | if use_python: 107 | out = bsdf_ndf_ggx(alphaSqr, cosTheta) 108 | else: 109 | out = _ndf_ggx_func.apply(alphaSqr, cosTheta) 110 | 111 | if torch.is_anomaly_enabled(): 112 | assert torch.all(torch.isfinite(out)), "Output of _ndf_ggx contains inf or NaN" 113 | return out 114 | 115 | class _lambda_ggx_func(torch.autograd.Function): 116 | @staticmethod 117 | def forward(ctx, alphaSqr, cosTheta): 118 | out = _plugin.lambda_ggx_fwd(alphaSqr, cosTheta, False) 119 | ctx.save_for_backward(alphaSqr, cosTheta) 120 | return out 121 | 122 | @staticmethod 123 | def backward(ctx, dout): 124 | alphaSqr, cosTheta = ctx.saved_variables 125 | return _plugin.lambda_ggx_bwd(alphaSqr, cosTheta, dout) + (None,) 126 | 127 | def _lambda_ggx(alphaSqr, cosTheta, use_python=False): 128 | if use_python: 129 | out = bsdf_lambda_ggx(alphaSqr, cosTheta) 130 | else: 131 | out = _lambda_ggx_func.apply(alphaSqr, cosTheta) 132 | 133 | if torch.is_anomaly_enabled(): 134 | assert torch.all(torch.isfinite(out)), "Output of _lambda_ggx contains inf or NaN" 135 | return out 136 | 137 | class _masking_smith_func(torch.autograd.Function): 138 | @staticmethod 139 | def forward(ctx, alphaSqr, cosThetaI, cosThetaO): 140 | ctx.save_for_backward(alphaSqr, cosThetaI, cosThetaO) 141 | out = _plugin.masking_smith_fwd(alphaSqr, cosThetaI, cosThetaO, False) 142 | return out 143 | 144 | @staticmethod 145 | def backward(ctx, dout): 146 | alphaSqr, cosThetaI, cosThetaO = ctx.saved_variables 147 | return _plugin.masking_smith_bwd(alphaSqr, cosThetaI, cosThetaO, dout) + (None,) 148 | 149 | def _masking_smith(alphaSqr, cosThetaI, cosThetaO, use_python=False): 150 | if use_python: 151 | out = bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO) 152 | else: 153 | out = _masking_smith_func.apply(alphaSqr, cosThetaI, cosThetaO) 154 | 155 | if torch.is_anomaly_enabled(): 156 | assert torch.all(torch.isfinite(out)), "Output of _masking_smith contains inf or NaN" 157 | return out 158 | 159 | #---------------------------------------------------------------------------- 160 | # Shading normal setup (bump mapping + bent normals) 161 | 162 | class _prepare_shading_normal_func(torch.autograd.Function): 163 | @staticmethod 164 | def forward(ctx, pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl): 165 | ctx.two_sided_shading, ctx.opengl = two_sided_shading, opengl 166 | out = _plugin.prepare_shading_normal_fwd(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl, False) 167 | ctx.save_for_backward(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm) 168 | return out 169 | 170 | @staticmethod 171 | def backward(ctx, dout): 172 | pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm = ctx.saved_variables 173 | return _plugin.prepare_shading_normal_bwd(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, dout, ctx.two_sided_shading, ctx.opengl) + (None, None, None) 174 | 175 | def prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading=True, opengl=True, use_python=False): 176 | '''Takes care of all corner cases and produces a final normal used for shading: 177 | - Constructs tangent space 178 | - Flips normal direction based on geometric normal for two sided Shading 179 | - Perturbs shading normal by normal map 180 | - Bends backfacing normals towards the camera to avoid shading artifacts 181 | 182 | All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent. 183 | 184 | Args: 185 | pos: World space g-buffer position. 186 | view_pos: Camera position in world space (typically using broadcasting). 187 | perturbed_nrm: Trangent-space normal perturbation from normal map lookup. 188 | smooth_nrm: Interpolated vertex normals. 189 | smooth_tng: Interpolated vertex tangents. 190 | geom_nrm: Geometric (face) normals. 191 | two_sided_shading: Use one/two sided shading 192 | opengl: Use OpenGL/DirectX normal map conventions 193 | use_python: Use PyTorch implementation (for validation) 194 | Returns: 195 | Final shading normal 196 | ''' 197 | 198 | if perturbed_nrm is None: 199 | perturbed_nrm = torch.tensor([0, 0, 1], dtype=torch.float32, device='cuda', requires_grad=False)[None, None, None, ...] 200 | 201 | if use_python: 202 | out = bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl) 203 | else: 204 | out = _prepare_shading_normal_func.apply(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl) 205 | 206 | if torch.is_anomaly_enabled(): 207 | assert torch.all(torch.isfinite(out)), "Output of prepare_shading_normal contains inf or NaN" 208 | return out 209 | 210 | #---------------------------------------------------------------------------- 211 | # BSDF functions 212 | 213 | class _lambert_func(torch.autograd.Function): 214 | @staticmethod 215 | def forward(ctx, nrm, wi): 216 | out = _plugin.lambert_fwd(nrm, wi, False) 217 | ctx.save_for_backward(nrm, wi) 218 | return out 219 | 220 | @staticmethod 221 | def backward(ctx, dout): 222 | nrm, wi = ctx.saved_variables 223 | return _plugin.lambert_bwd(nrm, wi, dout) + (None,) 224 | 225 | def lambert(nrm, wi, use_python=False): 226 | '''Lambertian bsdf. 227 | All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent. 228 | 229 | Args: 230 | nrm: World space shading normal. 231 | wi: World space light vector. 232 | use_python: Use PyTorch implementation (for validation) 233 | 234 | Returns: 235 | Shaded diffuse value with shape [minibatch_size, height, width, 1] 236 | ''' 237 | 238 | if use_python: 239 | out = bsdf_lambert(nrm, wi) 240 | else: 241 | out = _lambert_func.apply(nrm, wi) 242 | 243 | if torch.is_anomaly_enabled(): 244 | assert torch.all(torch.isfinite(out)), "Output of lambert contains inf or NaN" 245 | return out 246 | 247 | class _pbr_specular_func(torch.autograd.Function): 248 | @staticmethod 249 | def forward(ctx, col, nrm, wo, wi, alpha, min_roughness): 250 | ctx.save_for_backward(col, nrm, wo, wi, alpha) 251 | ctx.min_roughness = min_roughness 252 | out = _plugin.pbr_specular_fwd(col, nrm, wo, wi, alpha, min_roughness, False) 253 | return out 254 | 255 | @staticmethod 256 | def backward(ctx, dout): 257 | col, nrm, wo, wi, alpha = ctx.saved_variables 258 | return _plugin.pbr_specular_bwd(col, nrm, wo, wi, alpha, ctx.min_roughness, dout) + (None, None) 259 | 260 | def pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08, use_python=False): 261 | '''Physically-based specular bsdf. 262 | All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent unless otherwise noted. 263 | 264 | Args: 265 | col: Specular lobe color 266 | nrm: World space shading normal. 267 | wo: World space camera vector. 268 | wi: World space light vector 269 | alpha: Specular roughness parameter with shape [minibatch_size, height, width, 1] 270 | min_roughness: Scalar roughness clamping threshold 271 | 272 | use_python: Use PyTorch implementation (for validation) 273 | Returns: 274 | Shaded specular color 275 | ''' 276 | 277 | if use_python: 278 | out = bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=min_roughness) 279 | else: 280 | out = _pbr_specular_func.apply(col, nrm, wo, wi, alpha, min_roughness) 281 | 282 | if torch.is_anomaly_enabled(): 283 | assert torch.all(torch.isfinite(out)), "Output of pbr_specular contains inf or NaN" 284 | return out 285 | 286 | class _pbr_bsdf_func(torch.autograd.Function): 287 | @staticmethod 288 | def forward(ctx, kd, arm, pos, nrm, view_pos, light_pos, min_roughness): 289 | ctx.save_for_backward(kd, arm, pos, nrm, view_pos, light_pos) 290 | ctx.min_roughness = min_roughness 291 | out = _plugin.pbr_bsdf_fwd(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, False) 292 | return out 293 | 294 | @staticmethod 295 | def backward(ctx, dout): 296 | kd, arm, pos, nrm, view_pos, light_pos = ctx.saved_variables 297 | return _plugin.pbr_bsdf_bwd(kd, arm, pos, nrm, view_pos, light_pos, ctx.min_roughness, dout) + (None, None) 298 | 299 | def pbr_bsdf(kd, arm, pos, nrm, view_pos, light_pos, min_roughness=0.08, use_python=False): 300 | '''Physically-based bsdf, both diffuse & specular lobes 301 | All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent unless otherwise noted. 302 | 303 | Args: 304 | kd: Diffuse albedo. 305 | arm: Specular parameters (attenuation, linear roughness, metalness). 306 | pos: World space position. 307 | nrm: World space shading normal. 308 | view_pos: Camera position in world space, typically using broadcasting. 309 | light_pos: Light position in world space, typically using broadcasting. 310 | min_roughness: Scalar roughness clamping threshold 311 | 312 | use_python: Use PyTorch implementation (for validation) 313 | 314 | Returns: 315 | Shaded color. 316 | ''' 317 | 318 | if use_python: 319 | out = bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness=min_roughness) 320 | else: 321 | out = _pbr_bsdf_func.apply(kd, arm, pos, nrm, view_pos, light_pos, min_roughness) 322 | 323 | if torch.is_anomaly_enabled(): 324 | assert torch.all(torch.isfinite(out)), "Output of pbr_bsdf contains inf or NaN" 325 | return out 326 | 327 | #---------------------------------------------------------------------------- 328 | # Fast image loss function 329 | 330 | class _image_loss_func(torch.autograd.Function): 331 | @staticmethod 332 | def forward(ctx, img, target, loss, tonemapper): 333 | ctx.loss, ctx.tonemapper = loss, tonemapper 334 | ctx.save_for_backward(img, target) 335 | out = _plugin.image_loss_fwd(img, target, loss, tonemapper, False) 336 | return out 337 | 338 | @staticmethod 339 | def backward(ctx, dout): 340 | img, target = ctx.saved_variables 341 | return _plugin.image_loss_bwd(img, target, dout, ctx.loss, ctx.tonemapper) + (None, None, None) 342 | 343 | def image_loss(img, target, loss='l1', tonemapper='none', use_python=False): 344 | '''Compute HDR image loss. Combines tonemapping and loss into a single kernel for better perf. 345 | All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent unless otherwise noted. 346 | 347 | Args: 348 | img: Input image. 349 | target: Target (reference) image. 350 | loss: Type of loss. Valid options are ['l1', 'mse', 'smape', 'relmse'] 351 | tonemapper: Tonemapping operations. Valid options are ['none', 'log_srgb'] 352 | use_python: Use PyTorch implementation (for validation) 353 | 354 | Returns: 355 | Image space loss (scalar value). 356 | ''' 357 | if use_python: 358 | out = image_loss_fn(img, target, loss, tonemapper) 359 | else: 360 | out = _image_loss_func.apply(img, target, loss, tonemapper) 361 | out = torch.sum(out) / (img.shape[0]*img.shape[1]*img.shape[2]) 362 | 363 | if torch.is_anomaly_enabled(): 364 | assert torch.all(torch.isfinite(out)), "Output of image_loss contains inf or NaN" 365 | return out 366 | 367 | #---------------------------------------------------------------------------- 368 | # Transform points function 369 | 370 | class _xfm_func(torch.autograd.Function): 371 | @staticmethod 372 | def forward(ctx, points, matrix, isPoints): 373 | ctx.save_for_backward(points, matrix) 374 | ctx.isPoints = isPoints 375 | out = _plugin.xfm_fwd(points, matrix, isPoints, False) 376 | return out 377 | 378 | @staticmethod 379 | def backward(ctx, dout): 380 | points, matrix = ctx.saved_variables 381 | return (_plugin.xfm_bwd(points, matrix, dout, ctx.isPoints),) + (None, None, None) 382 | 383 | def xfm_points(points, matrix, use_python=False): 384 | '''Transform points. 385 | Note: this method does not back-propagate matrix gradients by default for performance reasons. For matrix gradients, 386 | enable use_python=True or use torch.matmul instead. 387 | 388 | Args: 389 | points: Tensor containing 3D points with shape [minibatch_size, num_vertices, 3] or [1, num_vertices, 3] 390 | matrix: A 4x4 transform matrix with shape [minibatch_size, 4, 4] 391 | use_python: Use PyTorch's torch.matmul (for validation) 392 | Returns: 393 | Transformed points in homogeneous 4D with shape [minibatch_size, num_vertices, 4]. 394 | ''' 395 | if use_python: 396 | out = torch.matmul(torch.nn.functional.pad(points, pad=(0,1), mode='constant', value=1.0), torch.transpose(matrix, 1, 2)) 397 | else: 398 | out = _xfm_func.apply(points, matrix, True) 399 | 400 | if torch.is_anomaly_enabled(): 401 | assert torch.all(torch.isfinite(out)), "Output of xfm_points contains inf or NaN" 402 | return out 403 | 404 | def xfm_vectors(vectors, matrix, use_python=False): 405 | '''Transform vectors. 406 | Note: this method does not back-propagate matrix gradients by default for performance reasons. For matrix gradients, 407 | enable use_python=True or use torch.matmul instead. 408 | 409 | Args: 410 | vectors: Tensor containing 3D vectors with shape [minibatch_size, num_vertices, 3] or [1, num_vertices, 3] 411 | matrix: A 4x4 transform matrix with shape [minibatch_size, 4, 4] 412 | use_python: Use PyTorch's torch.matmul (for validation) 413 | 414 | Returns: 415 | Transformed vectors in homogeneous 4D with shape [minibatch_size, num_vertices, 4]. 416 | ''' 417 | 418 | if use_python: 419 | out = torch.matmul(torch.nn.functional.pad(vectors, pad=(0,1), mode='constant', value=0.0), torch.transpose(matrix, 1, 2))[..., 0:3].contiguous() 420 | else: 421 | out = _xfm_func.apply(vectors, matrix, False) 422 | 423 | if torch.is_anomaly_enabled(): 424 | assert torch.all(torch.isfinite(out)), "Output of xfm_vectors contains inf or NaN" 425 | return out 426 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/tests/test_bsdf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import torch 11 | 12 | import os 13 | import sys 14 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 15 | import renderutils as ru 16 | 17 | RES = 4 18 | DTYPE = torch.float32 19 | 20 | def relative_loss(name, ref, cuda): 21 | ref = ref.float() 22 | cuda = cuda.float() 23 | print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item()) 24 | 25 | def test_normal(): 26 | pos_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 27 | pos_ref = pos_cuda.clone().detach().requires_grad_(True) 28 | view_pos_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 29 | view_pos_ref = view_pos_cuda.clone().detach().requires_grad_(True) 30 | perturbed_nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 31 | perturbed_nrm_ref = perturbed_nrm_cuda.clone().detach().requires_grad_(True) 32 | smooth_nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 33 | smooth_nrm_ref = smooth_nrm_cuda.clone().detach().requires_grad_(True) 34 | smooth_tng_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 35 | smooth_tng_ref = smooth_tng_cuda.clone().detach().requires_grad_(True) 36 | geom_nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 37 | geom_nrm_ref = geom_nrm_cuda.clone().detach().requires_grad_(True) 38 | target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda') 39 | 40 | ref = ru.prepare_shading_normal(pos_ref, view_pos_ref, perturbed_nrm_ref, smooth_nrm_ref, smooth_tng_ref, geom_nrm_ref, True, use_python=True) 41 | ref_loss = torch.nn.MSELoss()(ref, target) 42 | ref_loss.backward() 43 | 44 | cuda = ru.prepare_shading_normal(pos_cuda, view_pos_cuda, perturbed_nrm_cuda, smooth_nrm_cuda, smooth_tng_cuda, geom_nrm_cuda, True) 45 | cuda_loss = torch.nn.MSELoss()(cuda, target) 46 | cuda_loss.backward() 47 | 48 | print("-------------------------------------------------------------") 49 | print(" bent normal") 50 | print("-------------------------------------------------------------") 51 | relative_loss("res:", ref, cuda) 52 | relative_loss("pos:", pos_ref.grad, pos_cuda.grad) 53 | relative_loss("view_pos:", view_pos_ref.grad, view_pos_cuda.grad) 54 | relative_loss("perturbed_nrm:", perturbed_nrm_ref.grad, perturbed_nrm_cuda.grad) 55 | relative_loss("smooth_nrm:", smooth_nrm_ref.grad, smooth_nrm_cuda.grad) 56 | relative_loss("smooth_tng:", smooth_tng_ref.grad, smooth_tng_cuda.grad) 57 | relative_loss("geom_nrm:", geom_nrm_ref.grad, geom_nrm_cuda.grad) 58 | 59 | def test_schlick(): 60 | f0_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 61 | f0_ref = f0_cuda.clone().detach().requires_grad_(True) 62 | f90_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 63 | f90_ref = f90_cuda.clone().detach().requires_grad_(True) 64 | cosT_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) * 2.0 65 | cosT_cuda = cosT_cuda.clone().detach().requires_grad_(True) 66 | cosT_ref = cosT_cuda.clone().detach().requires_grad_(True) 67 | target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda') 68 | 69 | ref = ru._fresnel_shlick(f0_ref, f90_ref, cosT_ref, use_python=True) 70 | ref_loss = torch.nn.MSELoss()(ref, target) 71 | ref_loss.backward() 72 | 73 | cuda = ru._fresnel_shlick(f0_cuda, f90_cuda, cosT_cuda) 74 | cuda_loss = torch.nn.MSELoss()(cuda, target) 75 | cuda_loss.backward() 76 | 77 | print("-------------------------------------------------------------") 78 | print(" Fresnel shlick") 79 | print("-------------------------------------------------------------") 80 | relative_loss("res:", ref, cuda) 81 | relative_loss("f0:", f0_ref.grad, f0_cuda.grad) 82 | relative_loss("f90:", f90_ref.grad, f90_cuda.grad) 83 | relative_loss("cosT:", cosT_ref.grad, cosT_cuda.grad) 84 | 85 | def test_ndf_ggx(): 86 | alphaSqr_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) 87 | alphaSqr_cuda = alphaSqr_cuda.clone().detach().requires_grad_(True) 88 | alphaSqr_ref = alphaSqr_cuda.clone().detach().requires_grad_(True) 89 | cosT_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) * 3.0 - 1 90 | cosT_cuda = cosT_cuda.clone().detach().requires_grad_(True) 91 | cosT_ref = cosT_cuda.clone().detach().requires_grad_(True) 92 | target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda') 93 | 94 | ref = ru._ndf_ggx(alphaSqr_ref, cosT_ref, use_python=True) 95 | ref_loss = torch.nn.MSELoss()(ref, target) 96 | ref_loss.backward() 97 | 98 | cuda = ru._ndf_ggx(alphaSqr_cuda, cosT_cuda) 99 | cuda_loss = torch.nn.MSELoss()(cuda, target) 100 | cuda_loss.backward() 101 | 102 | print("-------------------------------------------------------------") 103 | print(" Ndf GGX") 104 | print("-------------------------------------------------------------") 105 | relative_loss("res:", ref, cuda) 106 | relative_loss("alpha:", alphaSqr_ref.grad, alphaSqr_cuda.grad) 107 | relative_loss("cosT:", cosT_ref.grad, cosT_cuda.grad) 108 | 109 | def test_lambda_ggx(): 110 | alphaSqr_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) 111 | alphaSqr_ref = alphaSqr_cuda.clone().detach().requires_grad_(True) 112 | cosT_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) * 3.0 - 1 113 | cosT_cuda = cosT_cuda.clone().detach().requires_grad_(True) 114 | cosT_ref = cosT_cuda.clone().detach().requires_grad_(True) 115 | target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda') 116 | 117 | ref = ru._lambda_ggx(alphaSqr_ref, cosT_ref, use_python=True) 118 | ref_loss = torch.nn.MSELoss()(ref, target) 119 | ref_loss.backward() 120 | 121 | cuda = ru._lambda_ggx(alphaSqr_cuda, cosT_cuda) 122 | cuda_loss = torch.nn.MSELoss()(cuda, target) 123 | cuda_loss.backward() 124 | 125 | print("-------------------------------------------------------------") 126 | print(" Lambda GGX") 127 | print("-------------------------------------------------------------") 128 | relative_loss("res:", ref, cuda) 129 | relative_loss("alpha:", alphaSqr_ref.grad, alphaSqr_cuda.grad) 130 | relative_loss("cosT:", cosT_ref.grad, cosT_cuda.grad) 131 | 132 | def test_masking_smith(): 133 | alphaSqr_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) 134 | alphaSqr_ref = alphaSqr_cuda.clone().detach().requires_grad_(True) 135 | cosThetaI_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) 136 | cosThetaI_ref = cosThetaI_cuda.clone().detach().requires_grad_(True) 137 | cosThetaO_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) 138 | cosThetaO_ref = cosThetaO_cuda.clone().detach().requires_grad_(True) 139 | target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda') 140 | 141 | ref = ru._masking_smith(alphaSqr_ref, cosThetaI_ref, cosThetaO_ref, use_python=True) 142 | ref_loss = torch.nn.MSELoss()(ref, target) 143 | ref_loss.backward() 144 | 145 | cuda = ru._masking_smith(alphaSqr_cuda, cosThetaI_cuda, cosThetaO_cuda) 146 | cuda_loss = torch.nn.MSELoss()(cuda, target) 147 | cuda_loss.backward() 148 | 149 | print("-------------------------------------------------------------") 150 | print(" Smith masking term") 151 | print("-------------------------------------------------------------") 152 | relative_loss("res:", ref, cuda) 153 | relative_loss("alpha:", alphaSqr_ref.grad, alphaSqr_cuda.grad) 154 | relative_loss("cosThetaI:", cosThetaI_ref.grad, cosThetaI_cuda.grad) 155 | relative_loss("cosThetaO:", cosThetaO_ref.grad, cosThetaO_cuda.grad) 156 | 157 | def test_lambert(): 158 | normals_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 159 | normals_ref = normals_cuda.clone().detach().requires_grad_(True) 160 | wi_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 161 | wi_ref = wi_cuda.clone().detach().requires_grad_(True) 162 | target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda') 163 | 164 | ref = ru.lambert(normals_ref, wi_ref, use_python=True) 165 | ref_loss = torch.nn.MSELoss()(ref, target) 166 | ref_loss.backward() 167 | 168 | cuda = ru.lambert(normals_cuda, wi_cuda) 169 | cuda_loss = torch.nn.MSELoss()(cuda, target) 170 | cuda_loss.backward() 171 | 172 | print("-------------------------------------------------------------") 173 | print(" Lambert") 174 | print("-------------------------------------------------------------") 175 | relative_loss("res:", ref, cuda) 176 | relative_loss("nrm:", normals_ref.grad, normals_cuda.grad) 177 | relative_loss("wi:", wi_ref.grad, wi_cuda.grad) 178 | 179 | def test_pbr_specular(): 180 | col_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 181 | col_ref = col_cuda.clone().detach().requires_grad_(True) 182 | nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 183 | nrm_ref = nrm_cuda.clone().detach().requires_grad_(True) 184 | wi_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 185 | wi_ref = wi_cuda.clone().detach().requires_grad_(True) 186 | wo_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 187 | wo_ref = wo_cuda.clone().detach().requires_grad_(True) 188 | alpha_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) 189 | alpha_ref = alpha_cuda.clone().detach().requires_grad_(True) 190 | target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda') 191 | 192 | ref = ru.pbr_specular(col_ref, nrm_ref, wo_ref, wi_ref, alpha_ref, use_python=True) 193 | ref_loss = torch.nn.MSELoss()(ref, target) 194 | ref_loss.backward() 195 | 196 | cuda = ru.pbr_specular(col_cuda, nrm_cuda, wo_cuda, wi_cuda, alpha_cuda) 197 | cuda_loss = torch.nn.MSELoss()(cuda, target) 198 | cuda_loss.backward() 199 | 200 | print("-------------------------------------------------------------") 201 | print(" Pbr specular") 202 | print("-------------------------------------------------------------") 203 | 204 | relative_loss("res:", ref, cuda) 205 | if col_ref.grad is not None: 206 | relative_loss("col:", col_ref.grad, col_cuda.grad) 207 | if nrm_ref.grad is not None: 208 | relative_loss("nrm:", nrm_ref.grad, nrm_cuda.grad) 209 | if wi_ref.grad is not None: 210 | relative_loss("wi:", wi_ref.grad, wi_cuda.grad) 211 | if wo_ref.grad is not None: 212 | relative_loss("wo:", wo_ref.grad, wo_cuda.grad) 213 | if alpha_ref.grad is not None: 214 | relative_loss("alpha:", alpha_ref.grad, alpha_cuda.grad) 215 | 216 | def test_pbr_bsdf(): 217 | kd_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 218 | kd_ref = kd_cuda.clone().detach().requires_grad_(True) 219 | arm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 220 | arm_ref = arm_cuda.clone().detach().requires_grad_(True) 221 | pos_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 222 | pos_ref = pos_cuda.clone().detach().requires_grad_(True) 223 | nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 224 | nrm_ref = nrm_cuda.clone().detach().requires_grad_(True) 225 | view_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 226 | view_ref = view_cuda.clone().detach().requires_grad_(True) 227 | light_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 228 | light_ref = light_cuda.clone().detach().requires_grad_(True) 229 | target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda') 230 | 231 | ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True) 232 | ref_loss = torch.nn.MSELoss()(ref, target) 233 | ref_loss.backward() 234 | 235 | cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda) 236 | cuda_loss = torch.nn.MSELoss()(cuda, target) 237 | cuda_loss.backward() 238 | 239 | print("-------------------------------------------------------------") 240 | print(" Pbr BSDF") 241 | print("-------------------------------------------------------------") 242 | 243 | relative_loss("res:", ref, cuda) 244 | if kd_ref.grad is not None: 245 | relative_loss("kd:", kd_ref.grad, kd_cuda.grad) 246 | if arm_ref.grad is not None: 247 | relative_loss("arm:", arm_ref.grad, arm_cuda.grad) 248 | if pos_ref.grad is not None: 249 | relative_loss("pos:", pos_ref.grad, pos_cuda.grad) 250 | if nrm_ref.grad is not None: 251 | relative_loss("nrm:", nrm_ref.grad, nrm_cuda.grad) 252 | if view_ref.grad is not None: 253 | relative_loss("view:", view_ref.grad, view_cuda.grad) 254 | if light_ref.grad is not None: 255 | relative_loss("light:", light_ref.grad, light_cuda.grad) 256 | 257 | test_normal() 258 | 259 | test_schlick() 260 | test_ndf_ggx() 261 | test_lambda_ggx() 262 | test_masking_smith() 263 | 264 | test_lambert() 265 | test_pbr_specular() 266 | test_pbr_bsdf() 267 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/tests/test_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import torch 11 | 12 | import os 13 | import sys 14 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 15 | import renderutils as ru 16 | 17 | RES = 8 18 | DTYPE = torch.float32 19 | 20 | def tonemap_srgb(f): 21 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f) 22 | 23 | def l1(output, target): 24 | x = torch.clamp(output, min=0, max=65535) 25 | r = torch.clamp(target, min=0, max=65535) 26 | x = tonemap_srgb(torch.log(x + 1)) 27 | r = tonemap_srgb(torch.log(r + 1)) 28 | return torch.nn.functional.l1_loss(x,r) 29 | 30 | def relative_loss(name, ref, cuda): 31 | ref = ref.float() 32 | cuda = cuda.float() 33 | print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item()) 34 | 35 | def test_loss(loss, tonemapper): 36 | img_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 37 | img_ref = img_cuda.clone().detach().requires_grad_(True) 38 | target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 39 | target_ref = target_cuda.clone().detach().requires_grad_(True) 40 | 41 | ref_loss = ru.image_loss(img_ref, target_ref, loss=loss, tonemapper=tonemapper, use_python=True) 42 | ref_loss.backward() 43 | 44 | cuda_loss = ru.image_loss(img_cuda, target_cuda, loss=loss, tonemapper=tonemapper) 45 | cuda_loss.backward() 46 | 47 | print("-------------------------------------------------------------") 48 | print(" Loss: %s, %s" % (loss, tonemapper)) 49 | print("-------------------------------------------------------------") 50 | 51 | relative_loss("res:", ref_loss, cuda_loss) 52 | relative_loss("img:", img_ref.grad, img_cuda.grad) 53 | relative_loss("target:", target_ref.grad, target_cuda.grad) 54 | 55 | 56 | test_loss('l1', 'none') 57 | test_loss('l1', 'log_srgb') 58 | test_loss('mse', 'log_srgb') 59 | test_loss('smape', 'none') 60 | test_loss('relmse', 'none') 61 | test_loss('mse', 'none') -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/tests/test_mesh.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import torch 11 | 12 | import os 13 | import sys 14 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 15 | import renderutils as ru 16 | 17 | BATCH = 8 18 | RES = 1024 19 | DTYPE = torch.float32 20 | 21 | torch.manual_seed(0) 22 | 23 | def tonemap_srgb(f): 24 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f) 25 | 26 | def l1(output, target): 27 | x = torch.clamp(output, min=0, max=65535) 28 | r = torch.clamp(target, min=0, max=65535) 29 | x = tonemap_srgb(torch.log(x + 1)) 30 | r = tonemap_srgb(torch.log(r + 1)) 31 | return torch.nn.functional.l1_loss(x,r) 32 | 33 | def relative_loss(name, ref, cuda): 34 | ref = ref.float() 35 | cuda = cuda.float() 36 | print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref)).item()) 37 | 38 | def test_xfm_points(): 39 | points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 40 | points_ref = points_cuda.clone().detach().requires_grad_(True) 41 | mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False) 42 | mtx_ref = mtx_cuda.clone().detach().requires_grad_(True) 43 | target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True) 44 | 45 | ref_out = ru.xfm_points(points_ref, mtx_ref, use_python=True) 46 | ref_loss = torch.nn.MSELoss()(ref_out, target) 47 | ref_loss.backward() 48 | 49 | cuda_out = ru.xfm_points(points_cuda, mtx_cuda) 50 | cuda_loss = torch.nn.MSELoss()(cuda_out, target) 51 | cuda_loss.backward() 52 | 53 | print("-------------------------------------------------------------") 54 | 55 | relative_loss("res:", ref_out, cuda_out) 56 | relative_loss("points:", points_ref.grad, points_cuda.grad) 57 | 58 | def test_xfm_vectors(): 59 | points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 60 | points_ref = points_cuda.clone().detach().requires_grad_(True) 61 | points_cuda_p = points_cuda.clone().detach().requires_grad_(True) 62 | points_ref_p = points_cuda.clone().detach().requires_grad_(True) 63 | mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False) 64 | mtx_ref = mtx_cuda.clone().detach().requires_grad_(True) 65 | target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True) 66 | 67 | ref_out = ru.xfm_vectors(points_ref.contiguous(), mtx_ref, use_python=True) 68 | ref_loss = torch.nn.MSELoss()(ref_out, target[..., 0:3]) 69 | ref_loss.backward() 70 | 71 | cuda_out = ru.xfm_vectors(points_cuda.contiguous(), mtx_cuda) 72 | cuda_loss = torch.nn.MSELoss()(cuda_out, target[..., 0:3]) 73 | cuda_loss.backward() 74 | 75 | ref_out_p = ru.xfm_points(points_ref_p.contiguous(), mtx_ref, use_python=True) 76 | ref_loss_p = torch.nn.MSELoss()(ref_out_p, target) 77 | ref_loss_p.backward() 78 | 79 | cuda_out_p = ru.xfm_points(points_cuda_p.contiguous(), mtx_cuda) 80 | cuda_loss_p = torch.nn.MSELoss()(cuda_out_p, target) 81 | cuda_loss_p.backward() 82 | 83 | print("-------------------------------------------------------------") 84 | 85 | relative_loss("res:", ref_out, cuda_out) 86 | relative_loss("points:", points_ref.grad, points_cuda.grad) 87 | relative_loss("points_p:", points_ref_p.grad, points_cuda_p.grad) 88 | 89 | test_xfm_points() 90 | test_xfm_vectors() 91 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/renderutils/tests/test_perf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import torch 11 | 12 | import os 13 | import sys 14 | import time 15 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 16 | import renderutils as ru 17 | 18 | DTYPE=torch.float32 19 | 20 | def test_bsdf(BATCH, RES, ITR): 21 | kd_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 22 | kd_ref = kd_cuda.clone().detach().requires_grad_(True) 23 | arm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 24 | arm_ref = arm_cuda.clone().detach().requires_grad_(True) 25 | pos_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 26 | pos_ref = pos_cuda.clone().detach().requires_grad_(True) 27 | nrm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 28 | nrm_ref = nrm_cuda.clone().detach().requires_grad_(True) 29 | view_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 30 | view_ref = view_cuda.clone().detach().requires_grad_(True) 31 | light_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 32 | light_ref = light_cuda.clone().detach().requires_grad_(True) 33 | target = torch.rand(BATCH, RES, RES, 3, device='cuda') 34 | 35 | start = torch.cuda.Event(enable_timing=True) 36 | end = torch.cuda.Event(enable_timing=True) 37 | 38 | ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda) 39 | 40 | print("--- Testing: [%d, %d, %d] ---" % (BATCH, RES, RES)) 41 | 42 | start.record() 43 | for i in range(ITR): 44 | ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True) 45 | end.record() 46 | torch.cuda.synchronize() 47 | print("Pbr BSDF python:", start.elapsed_time(end)) 48 | 49 | start.record() 50 | for i in range(ITR): 51 | cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda) 52 | end.record() 53 | torch.cuda.synchronize() 54 | print("Pbr BSDF cuda:", start.elapsed_time(end)) 55 | 56 | test_bsdf(1, 512, 1000) 57 | test_bsdf(16, 512, 1000) 58 | test_bsdf(1, 2048, 1000) 59 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/texture.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import numpy as np 11 | import torch 12 | import nvdiffrast.torch as dr 13 | 14 | from . import util 15 | 16 | ######################################################################################################## 17 | # Simple texture class. A texture can be either 18 | # - A 3D tensor (using auto mipmaps) 19 | # - A list of 3D tensors (full custom mip hierarchy) 20 | ######################################################################################################## 21 | 22 | class Texture2D: 23 | # Initializes a texture from image data. 24 | # Input can be constant value (1D array) or texture (3D array) or mip hierarchy (list of 3d arrays) 25 | def __init__(self, init): 26 | if isinstance(init, np.ndarray): 27 | init = torch.tensor(init, dtype=torch.float32, device='cuda') 28 | elif isinstance(init, list) and len(init) == 1: 29 | init = init[0] 30 | 31 | if isinstance(init, list) or len(init.shape) == 4: 32 | self.data = init 33 | elif len(init.shape) == 3: 34 | self.data = init[None, ...] 35 | else: 36 | self.data = init[None, None, None, :] # Convert constant to 1x1 tensor 37 | 38 | # Filtered (trilinear) sample texture at a given location 39 | def sample(self, texc, texc_deriv, filter_mode='linear-mipmap-linear', data_fmt=torch.float32): 40 | if isinstance(self.data, list): 41 | out = dr.texture(self.data[0], texc, texc_deriv, mip=self.data[1:], filter_mode=filter_mode) 42 | else: 43 | out = dr.texture(self.data, texc, texc_deriv, filter_mode=filter_mode) 44 | return out.to(data_fmt) 45 | 46 | def getRes(self): 47 | return self.getMips()[0].shape[1:3] 48 | 49 | def getMips(self): 50 | if isinstance(self.data, list): 51 | return self.data 52 | else: 53 | return [self.data] 54 | 55 | # In-place clamp with no derivative to make sure values are in valid range after training 56 | def clamp_(self, min=None, max=None): 57 | with torch.no_grad(): 58 | for mip in self.getMips(): 59 | mip.clamp_(min=min, max=max) 60 | 61 | # In-place clamp with no derivative to make sure values are in valid range after training 62 | def clamp_rgb_(self, minR=None, maxR=None, minG=None, maxG=None, minB=None, maxB=None): 63 | with torch.no_grad(): 64 | for mip in self.getMips(): 65 | mip[...,0].clamp_(min=minR, max=maxR) 66 | mip[...,1].clamp_(min=minG, max=maxG) 67 | mip[...,2].clamp_(min=minB, max=maxB) 68 | 69 | ######################################################################################################## 70 | # Helper function to create a trainable texture from a regular texture. The trainable weights are 71 | # initialized with texture data as an initial guess 72 | ######################################################################################################## 73 | 74 | def create_trainable(init, res, auto_mipmaps): 75 | with torch.no_grad(): 76 | if isinstance(init, Texture2D): 77 | assert isinstance(init.data, torch.Tensor) 78 | init = init.data 79 | elif isinstance(init, np.ndarray): 80 | init = torch.tensor(init, dtype=torch.float32, device='cuda') 81 | 82 | # Pad to NHWC if needed 83 | if len(init.shape) == 1: # Extend constant to NHWC tensor 84 | init = init[None, None, None, :] 85 | elif len(init.shape) == 3: 86 | init = init[None, ...] 87 | 88 | # Scale input to desired resolution. 89 | init = util.scale_img_nhwc(init, res) 90 | 91 | # Genreate custom mipchain 92 | if not auto_mipmaps: 93 | mip_chain = [init.clone().detach().requires_grad_(True)] 94 | while mip_chain[-1].shape[1] > 1 or mip_chain[-1].shape[2] > 1: 95 | new_size = [max(mip_chain[-1].shape[1] // 2, 1), max(mip_chain[-1].shape[2] // 2, 1)] 96 | init = util.scale_img_nhwc(mip_chain[-1], new_size) 97 | mip_chain += [init.clone().detach().requires_grad_(True)] 98 | return Texture2D(mip_chain) 99 | else: 100 | return Texture2D(init.clone().detach().requires_grad_(True)) 101 | 102 | ######################################################################################################## 103 | # Convert texture to and from SRGB 104 | ######################################################################################################## 105 | 106 | def srgb_to_rgb(texture): 107 | return Texture2D(list(util.srgb_to_rgb(mip) for mip in texture.getMips())) 108 | 109 | def rgb_to_srgb(texture): 110 | return Texture2D(list(util.rgb_to_srgb(mip) for mip in texture.getMips())) 111 | 112 | ######################################################################################################## 113 | # Utility functions for loading / storing a texture 114 | ######################################################################################################## 115 | 116 | def _load_mip2D(fn, lambda_fn=None, channels=None): 117 | imgdata = torch.tensor(util.load_image(fn), dtype=torch.float32, device='cuda') 118 | if channels is not None: 119 | imgdata = imgdata[..., 0:channels] 120 | if lambda_fn is not None: 121 | imgdata = lambda_fn(imgdata) 122 | return imgdata.detach().clone() 123 | 124 | def load_texture2D(fn, lambda_fn=None, channels=None): 125 | base, ext = os.path.splitext(fn) 126 | if os.path.exists(base + "_0" + ext): 127 | mips = [] 128 | while os.path.exists(base + ("_%d" % len(mips)) + ext): 129 | mips += [_load_mip2D(base + ("_%d" % len(mips)) + ext, lambda_fn, channels)] 130 | return Texture2D(mips) 131 | else: 132 | return Texture2D(_load_mip2D(fn, lambda_fn, channels)) 133 | 134 | def _save_mip2D(fn, mip, mipidx, lambda_fn): 135 | if lambda_fn is not None: 136 | data = lambda_fn(mip).detach().cpu().numpy() 137 | else: 138 | data = mip.detach().cpu().numpy() 139 | 140 | if mipidx is None: 141 | util.save_image(fn, data) 142 | else: 143 | base, ext = os.path.splitext(fn) 144 | util.save_image(base + ("_%d" % mipidx) + ext, data) 145 | 146 | def save_texture2D(fn, tex, lambda_fn=None): 147 | if isinstance(tex.data, list): 148 | for i, mip in enumerate(tex.data): 149 | _save_mip2D(fn, mip[0,...], i, lambda_fn) 150 | else: 151 | _save_mip2D(fn, tex.data[0,...], None, lambda_fn) 152 | -------------------------------------------------------------------------------- /nvdiffmodeling/src/util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import sys 11 | import numpy as np 12 | import torch 13 | import nvdiffrast.torch as dr 14 | import imageio 15 | 16 | #---------------------------------------------------------------------------- 17 | # Vector operations 18 | #---------------------------------------------------------------------------- 19 | 20 | def dot(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: 21 | return torch.sum(x*y, -1, keepdim=True) 22 | 23 | def reflect(x: torch.Tensor, n: torch.Tensor) -> torch.Tensor: 24 | return 2*dot(x, n)*n - x 25 | 26 | def length(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor: 27 | return torch.sqrt(torch.clamp(dot(x,x), min=eps)) # Clamp to avoid nan gradients because grad(sqrt(0)) = NaN 28 | 29 | def safe_normalize(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor: 30 | return x / length(x, eps) 31 | 32 | def to_hvec(x: torch.Tensor, w: float) -> torch.Tensor: 33 | return torch.nn.functional.pad(x, pad=(0,1), mode='constant', value=w) 34 | 35 | #---------------------------------------------------------------------------- 36 | # Tonemapping 37 | #---------------------------------------------------------------------------- 38 | 39 | def tonemap_srgb(f: torch.Tensor) -> torch.Tensor: 40 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f) 41 | 42 | #---------------------------------------------------------------------------- 43 | # sRGB color transforms 44 | #---------------------------------------------------------------------------- 45 | 46 | def _rgb_to_srgb(f: torch.Tensor) -> torch.Tensor: 47 | return torch.where(f <= 0.0031308, f * 12.92, torch.pow(torch.clamp(f, 0.0031308), 1.0/2.4)*1.055 - 0.055) 48 | 49 | def rgb_to_srgb(f: torch.Tensor) -> torch.Tensor: 50 | assert f.shape[-1] == 3 or f.shape[-1] == 4 51 | out = torch.cat((_rgb_to_srgb(f[..., 0:3]), f[..., 3:4]), dim=-1) if f.shape[-1] == 4 else _rgb_to_srgb(f) 52 | assert out.shape[0] == f.shape[0] and out.shape[1] == f.shape[1] and out.shape[2] == f.shape[2] 53 | return out 54 | 55 | def _srgb_to_rgb(f: torch.Tensor) -> torch.Tensor: 56 | return torch.where(f <= 0.04045, f / 12.92, torch.pow((torch.clamp(f, 0.04045) + 0.055) / 1.055, 2.4)) 57 | 58 | def srgb_to_rgb(f: torch.Tensor) -> torch.Tensor: 59 | assert f.shape[-1] == 3 or f.shape[-1] == 4 60 | out = torch.cat((_srgb_to_rgb(f[..., 0:3]), f[..., 3:4]), dim=-1) if f.shape[-1] == 4 else _srgb_to_rgb(f) 61 | assert out.shape[0] == f.shape[0] and out.shape[1] == f.shape[1] and out.shape[2] == f.shape[2] 62 | return out 63 | 64 | #---------------------------------------------------------------------------- 65 | # Displacement texture lookup 66 | #---------------------------------------------------------------------------- 67 | 68 | def get_miplevels(texture: np.ndarray) -> float: 69 | minDim = min(texture.shape[0], texture.shape[1]) 70 | return np.floor(np.log2(minDim)) 71 | 72 | # TODO: Handle wrapping maybe 73 | def tex_2d(tex_map : torch.Tensor, coords : torch.Tensor, filter='nearest') -> torch.Tensor: 74 | tex_map = tex_map[None, ...] # Add batch dimension 75 | tex_map = tex_map.permute(0, 3, 1, 2) # NHWC -> NCHW 76 | tex = torch.nn.functional.grid_sample(tex_map, coords[None, None, ...] * 2 - 1, mode=filter, align_corners=False) 77 | tex = tex.permute(0, 2, 3, 1) # NCHW -> NHWC 78 | return tex[0, 0, ...] 79 | 80 | #---------------------------------------------------------------------------- 81 | # Image scaling 82 | #---------------------------------------------------------------------------- 83 | 84 | def scale_img_hwc(x : torch.Tensor, size, mag='bilinear', min='area') -> torch.Tensor: 85 | return scale_img_nhwc(x[None, ...], size, mag, min)[0] 86 | 87 | def scale_img_nhwc(x : torch.Tensor, size, mag='bilinear', min='area') -> torch.Tensor: 88 | assert (x.shape[1] >= size[0] and x.shape[2] >= size[1]) or (x.shape[1] < size[0] and x.shape[2] < size[1]), "Trying to magnify image in one dimension and minify in the other" 89 | y = x.permute(0, 3, 1, 2) # NHWC -> NCHW 90 | if x.shape[1] > size[0] and x.shape[2] > size[1]: # Minification, previous size was bigger 91 | y = torch.nn.functional.interpolate(y, size, mode=min) 92 | else: # Magnification 93 | if mag == 'bilinear' or mag == 'bicubic': 94 | y = torch.nn.functional.interpolate(y, size, mode=mag, align_corners=True) 95 | else: 96 | y = torch.nn.functional.interpolate(y, size, mode=mag) 97 | return y.permute(0, 2, 3, 1).contiguous() # NCHW -> NHWC 98 | 99 | def avg_pool_nhwc(x : torch.Tensor, size) -> torch.Tensor: 100 | y = x.permute(0, 3, 1, 2) # NHWC -> NCHW 101 | y = torch.nn.functional.avg_pool2d(y, size) 102 | return y.permute(0, 2, 3, 1).contiguous() # NCHW -> NHWC 103 | 104 | #---------------------------------------------------------------------------- 105 | # Behaves similar to tf.segment_sum 106 | #---------------------------------------------------------------------------- 107 | 108 | def segment_sum(data: torch.Tensor, segment_ids: torch.Tensor) -> torch.Tensor: 109 | num_segments = torch.unique_consecutive(segment_ids).shape[0] 110 | 111 | # Repeats ids until same dimension as data 112 | if len(segment_ids.shape) == 1: 113 | s = torch.prod(torch.tensor(data.shape[1:], dtype=torch.int64, device='cuda')).long() 114 | segment_ids = segment_ids.repeat_interleave(s).view(segment_ids.shape[0], *data.shape[1:]) 115 | 116 | assert data.shape == segment_ids.shape, "data.shape and segment_ids.shape should be equal" 117 | 118 | shape = [num_segments] + list(data.shape[1:]) 119 | result = torch.zeros(*shape, dtype=torch.float32, device='cuda') 120 | result = result.scatter_add(0, segment_ids, data) 121 | return result 122 | 123 | #---------------------------------------------------------------------------- 124 | # Projection and transformation matrix helpers. 125 | #---------------------------------------------------------------------------- 126 | 127 | def projection(x=0.1, n=1.0, f=50.0): 128 | return np.array([[n/x, 0, 0, 0], 129 | [ 0, n/-x, 0, 0], 130 | [ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)], 131 | [ 0, 0, -1, 0]]).astype(np.float32) 132 | 133 | def translate(x, y, z): 134 | return np.array([[1, 0, 0, x], 135 | [0, 1, 0, y], 136 | [0, 0, 1, z], 137 | [0, 0, 0, 1]]).astype(np.float32) 138 | 139 | def rotate_x(a): 140 | s, c = np.sin(a), np.cos(a) 141 | return np.array([[1, 0, 0, 0], 142 | [0, c, s, 0], 143 | [0, -s, c, 0], 144 | [0, 0, 0, 1]]).astype(np.float32) 145 | 146 | def rotate_y(a): 147 | s, c = np.sin(a), np.cos(a) 148 | return np.array([[ c, 0, s, 0], 149 | [ 0, 1, 0, 0], 150 | [-s, 0, c, 0], 151 | [ 0, 0, 0, 1]]).astype(np.float32) 152 | 153 | def scale(s): 154 | return np.array([[ s, 0, 0, 0], 155 | [ 0, s, 0, 0], 156 | [ 0, 0, s, 0], 157 | [ 0, 0, 0, 1]]).astype(np.float32) 158 | 159 | def lookAt(eye, at, up): 160 | a = eye - at 161 | b = up 162 | w = a / np.linalg.norm(a) 163 | u = np.cross(b, w) 164 | u = u / np.linalg.norm(u) 165 | v = np.cross(w, u) 166 | translate = np.array([[1, 0, 0, -eye[0]], 167 | [0, 1, 0, -eye[1]], 168 | [0, 0, 1, -eye[2]], 169 | [0, 0, 0, 1]]).astype(np.float32) 170 | rotate = np.array([[u[0], u[1], u[2], 0], 171 | [v[0], v[1], v[2], 0], 172 | [w[0], w[1], w[2], 0], 173 | [0, 0, 0, 1]]).astype(np.float32) 174 | return np.matmul(rotate, translate) 175 | 176 | def random_rotation_translation(t): 177 | m = np.random.normal(size=[3, 3]) 178 | m[1] = np.cross(m[0], m[2]) 179 | m[2] = np.cross(m[0], m[1]) 180 | m = m / np.linalg.norm(m, axis=1, keepdims=True) 181 | m = np.pad(m, [[0, 1], [0, 1]], mode='constant') 182 | m[3, 3] = 1.0 183 | m[:3, 3] = np.random.uniform(-t, t, size=[3]) 184 | return m 185 | 186 | 187 | #---------------------------------------------------------------------------- 188 | # Cosine sample around a vector N 189 | #---------------------------------------------------------------------------- 190 | def cosine_sample(N : np.ndarray) -> np.ndarray: 191 | # construct local frame 192 | N = N/np.linalg.norm(N) 193 | 194 | dx0 = np.array([0, N[2], -N[1]]) 195 | dx1 = np.array([-N[2], 0, N[0]]) 196 | 197 | dx = dx0 if np.dot(dx0,dx0) > np.dot(dx1,dx1) else dx1 198 | dx = dx/np.linalg.norm(dx) 199 | dy = np.cross(N,dx) 200 | dy = dy/np.linalg.norm(dy) 201 | 202 | # cosine sampling in local frame 203 | phi = 2.0*np.pi*np.random.uniform() 204 | s = np.random.uniform() 205 | costheta = np.sqrt(s) 206 | sintheta = np.sqrt(1.0 - s) 207 | 208 | # cartesian vector in local space 209 | x = np.cos(phi)*sintheta 210 | y = np.sin(phi)*sintheta 211 | z = costheta 212 | 213 | # local to world 214 | return dx*x + dy*y + N*z 215 | 216 | 217 | #---------------------------------------------------------------------------- 218 | # Cosine sampled light directions around the vector N 219 | #---------------------------------------------------------------------------- 220 | def cosine_sample_texture(res, N : np.ndarray) -> torch.Tensor: 221 | # construct local frame 222 | N = N/np.linalg.norm(N) 223 | 224 | dx0 = np.array([0, N[2], -N[1]]) 225 | dx1 = np.array([-N[2], 0, N[0]]) 226 | 227 | dx = dx0 if np.dot(dx0,dx0) > np.dot(dx1,dx1) else dx1 228 | dx = dx/np.linalg.norm(dx) 229 | dy = np.cross(N,dx) 230 | dy = dy/np.linalg.norm(dy) 231 | 232 | X = torch.tensor(dx, dtype=torch.float32, device='cuda') 233 | Y = torch.tensor(dy, dtype=torch.float32, device='cuda') 234 | Z = torch.tensor(N, dtype=torch.float32, device='cuda') 235 | 236 | # cosine sampling in local frame 237 | 238 | phi = 2.0*np.pi*torch.rand(res, res, 1, dtype=torch.float32, device='cuda') 239 | s = torch.rand(res, res, 1, dtype=torch.float32, device='cuda') 240 | costheta = torch.sqrt(s) 241 | sintheta = torch.sqrt(1.0 - s) 242 | 243 | # cartesian vector in local space 244 | x = torch.cos(phi)*sintheta 245 | y = torch.sin(phi)*sintheta 246 | z = costheta 247 | 248 | # local to world 249 | return X*x + Y*y + Z*z 250 | 251 | #---------------------------------------------------------------------------- 252 | # Bilinear downsample by 2x. 253 | #---------------------------------------------------------------------------- 254 | 255 | def bilinear_downsample(x : torch.tensor) -> torch.Tensor: 256 | w = torch.tensor([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=torch.float32, device=x.device) / 64.0 257 | w = w.expand(x.shape[-1], 1, 4, 4) 258 | x = torch.nn.functional.conv2d(x.permute(0, 3, 1, 2), w, padding=1, stride=2, groups=x.shape[-1]) 259 | return x.permute(0, 2, 3, 1) 260 | 261 | #---------------------------------------------------------------------------- 262 | # Bilinear downsample log(spp) steps 263 | #---------------------------------------------------------------------------- 264 | 265 | def bilinear_downsample(x : torch.tensor, spp) -> torch.Tensor: 266 | w = torch.tensor([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=torch.float32, device=x.device) / 64.0 267 | g = x.shape[-1] 268 | w = w.expand(g, 1, 4, 4) 269 | x = x.permute(0, 3, 1, 2) # NHWC -> NCHW 270 | steps = int(np.log2(spp)) 271 | for _ in range(steps): 272 | xp = torch.nn.functional.pad(x, (1,1,1,1), mode='replicate') 273 | x = torch.nn.functional.conv2d(xp, w, padding=0, stride=2, groups=g) 274 | return x.permute(0, 2, 3, 1).contiguous() # NCHW -> NHWC 275 | 276 | 277 | #---------------------------------------------------------------------------- 278 | # Image display function using OpenGL. 279 | #---------------------------------------------------------------------------- 280 | 281 | _glfw_window = None 282 | def display_image(image, zoom=None, size=None, title=None): # HWC 283 | # Import OpenGL and glfw. 284 | import OpenGL.GL as gl 285 | import glfw 286 | 287 | # Zoom image if requested. 288 | image = np.asarray(image) 289 | if size is not None: 290 | assert zoom is None 291 | zoom = max(1, size // image.shape[0]) 292 | if zoom is not None: 293 | image = image.repeat(zoom, axis=0).repeat(zoom, axis=1) 294 | height, width, channels = image.shape 295 | 296 | # Initialize window. 297 | if title is None: 298 | title = 'Debug window' 299 | global _glfw_window 300 | if _glfw_window is None: 301 | glfw.init() 302 | _glfw_window = glfw.create_window(width, height, title, None, None) 303 | glfw.make_context_current(_glfw_window) 304 | glfw.show_window(_glfw_window) 305 | glfw.swap_interval(0) 306 | else: 307 | glfw.make_context_current(_glfw_window) 308 | glfw.set_window_title(_glfw_window, title) 309 | glfw.set_window_size(_glfw_window, width, height) 310 | 311 | # Update window. 312 | glfw.poll_events() 313 | gl.glClearColor(0, 0, 0, 1) 314 | gl.glClear(gl.GL_COLOR_BUFFER_BIT) 315 | gl.glWindowPos2f(0, 0) 316 | gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1) 317 | gl_format = {3: gl.GL_RGB, 2: gl.GL_RG, 1: gl.GL_LUMINANCE}[channels] 318 | gl_dtype = {'uint8': gl.GL_UNSIGNED_BYTE, 'float32': gl.GL_FLOAT}[image.dtype.name] 319 | gl.glDrawPixels(width, height, gl_format, gl_dtype, image[::-1]) 320 | glfw.swap_buffers(_glfw_window) 321 | if glfw.window_should_close(_glfw_window): 322 | return False 323 | return True 324 | 325 | #---------------------------------------------------------------------------- 326 | # Image save helper. 327 | #---------------------------------------------------------------------------- 328 | 329 | def save_image(fn, x : np.ndarray) -> np.ndarray: 330 | imageio.imwrite(fn, np.clip(np.rint(x * 255.0), 0, 255).astype(np.uint8)) 331 | 332 | def load_image(fn) -> np.ndarray: 333 | img = imageio.imread(fn) 334 | if img.dtype == np.float32: # HDR image 335 | return img 336 | else: # LDR image 337 | return img.astype(np.float32) / 255 338 | 339 | #---------------------------------------------------------------------------- 340 | 341 | def time_to_text(x): 342 | if x > 3600: 343 | return "%.2f h" % (x / 3600) 344 | elif x > 60: 345 | return "%.2f m" % (x / 60) 346 | else: 347 | return "%.2f s" % x 348 | 349 | #---------------------------------------------------------------------------- 350 | 351 | def checkerboard(width, repetitions) -> np.ndarray: 352 | tilesize = int(width//repetitions//2) 353 | check = np.kron([[1, 0] * repetitions, [0, 1] * repetitions] * repetitions, np.ones((tilesize, tilesize)))*0.33 + 0.33 354 | return np.stack((check, check, check), axis=-1)[None, ...] 355 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | clip @ git+https://github.com/openai/CLIP.git@b46f5ac7587d2e1862f8b7b1573179d80dcdd620 2 | imageio 3 | cython 4 | imageio-ffmpeg 5 | kornia 6 | numpy 7 | nvdiffrast @ git+https://github.com/NVlabs/nvdiffrast.git@78528e683210dfaa1be57e3c65aa37d3b36c6644 8 | Pillow 9 | PyGLM 10 | resize-right 11 | scipy 12 | smplx 13 | tqdm 14 | Ninja 15 | pyyaml 16 | matplotlib>=3.3.0 17 | trimesh 18 | tensorboard 19 | easydict 20 | cholespy 21 | fire 22 | torch-scatter 23 | torch-sparse 24 | -f https://data.pyg.org/whl/torch-1.13.1+11.7.html 25 | pymeshlab -------------------------------------------------------------------------------- /utilities/camera.py: -------------------------------------------------------------------------------- 1 | import glm 2 | import torch 3 | import random 4 | 5 | import numpy as np 6 | import torchvision.transforms as transforms 7 | 8 | from .resize_right import resize 9 | 10 | blurs = [ 11 | transforms.Compose([ 12 | transforms.GaussianBlur(11, sigma=(5, 5)) 13 | ]), 14 | transforms.Compose([ 15 | transforms.GaussianBlur(11, sigma=(2, 2)) 16 | ]), 17 | transforms.Compose([ 18 | transforms.GaussianBlur(5, sigma=(5, 5)) 19 | ]), 20 | transforms.Compose([ 21 | transforms.GaussianBlur(5, sigma=(2, 2)) 22 | ]), 23 | ] 24 | 25 | def get_random_bg(h, w, rand_solid=False): 26 | 27 | p = torch.rand(1) 28 | 29 | if p > 0.66666: 30 | if rand_solid: 31 | background = torch.vstack([ 32 | torch.full( (1, h, w), torch.rand(1).item()), 33 | torch.full( (1, h, w), torch.rand(1).item()), 34 | torch.full( (1, h, w), torch.rand(1).item()), 35 | ]).unsqueeze(0) + torch.rand(1, 3, h, w) 36 | background = ((background - background.amin()) / (background.amax() - background.amin())) 37 | background = blurs[random.randint(0, 3)](background).permute(0, 2, 3, 1) 38 | else: 39 | background = blurs[random.randint(0, 3)]( torch.rand((1, 3, h, w)) ).permute(0, 2, 3, 1) 40 | elif p > 0.333333: 41 | size = random.randint(5, 10) 42 | background = torch.vstack([ 43 | torch.full( (1, size, size), torch.rand(1).item() / 2), 44 | torch.full( (1, size, size), torch.rand(1).item() / 2 ), 45 | torch.full( (1, size, size), torch.rand(1).item() / 2 ), 46 | ]).unsqueeze(0) 47 | 48 | second = torch.rand(3) 49 | 50 | background[:, 0, ::2, ::2] = second[0] 51 | background[:, 1, ::2, ::2] = second[1] 52 | background[:, 2, ::2, ::2] = second[2] 53 | 54 | background[:, 0, 1::2, 1::2] = second[0] 55 | background[:, 1, 1::2, 1::2] = second[1] 56 | background[:, 2, 1::2, 1::2] = second[2] 57 | 58 | background = blurs[random.randint(0, 3)]( resize(background, out_shape=(h, w)) ) 59 | 60 | background = background.permute(0, 2, 3, 1) 61 | 62 | else: 63 | background = torch.vstack([ 64 | torch.full( (1, h, w), torch.rand(1).item()), 65 | torch.full( (1, h, w), torch.rand(1).item()), 66 | torch.full( (1, h, w), torch.rand(1).item()), 67 | ]).unsqueeze(0).permute(0, 2, 3, 1) 68 | 69 | return background 70 | 71 | def cosine_sample(N : np.ndarray) -> np.ndarray: 72 | """ 73 | #---------------------------------------------------------------------------- 74 | # Cosine sample around a vector N 75 | #---------------------------------------------------------------------------- 76 | 77 | Copied from nvdiffmodelling 78 | 79 | """ 80 | # construct local frame 81 | N = N/np.linalg.norm(N) 82 | 83 | dx0 = np.array([0, N[2], -N[1]]) 84 | dx1 = np.array([-N[2], 0, N[0]]) 85 | 86 | dx = dx0 if np.dot(dx0,dx0) > np.dot(dx1,dx1) else dx1 87 | dx = dx/np.linalg.norm(dx) 88 | dy = np.cross(N,dx) 89 | dy = dy/np.linalg.norm(dy) 90 | 91 | # cosine sampling in local frame 92 | phi = 2.0*np.pi*np.random.uniform() 93 | s = np.random.uniform() 94 | costheta = np.sqrt(s) 95 | sintheta = np.sqrt(1.0 - s) 96 | 97 | # cartesian vector in local space 98 | x = np.cos(phi)*sintheta 99 | y = np.sin(phi)*sintheta 100 | z = costheta 101 | 102 | # local to world 103 | return dx*x + dy*y + N*z 104 | 105 | def persp_proj(fov_x=45, ar=1, near=1.0, far=50.0): 106 | """ 107 | From https://github.com/rgl-epfl/large-steps-pytorch by @bathal1 (Baptiste Nicolet) 108 | 109 | Build a perspective projection matrix. 110 | Parameters 111 | ---------- 112 | fov_x : float 113 | Horizontal field of view (in degrees). 114 | ar : float 115 | Aspect ratio (w/h). 116 | near : float 117 | Depth of the near plane relative to the camera. 118 | far : float 119 | Depth of the far plane relative to the camera. 120 | """ 121 | fov_rad = np.deg2rad(fov_x) 122 | 123 | tanhalffov = np.tan( (fov_rad / 2) ) 124 | max_y = tanhalffov * near 125 | min_y = -max_y 126 | max_x = max_y * ar 127 | min_x = -max_x 128 | 129 | z_sign = -1.0 130 | proj_mat = np.array([[0, 0, 0, 0], 131 | [0, 0, 0, 0], 132 | [0, 0, 0, 0], 133 | [0, 0, 0, 0]]) 134 | 135 | proj_mat[0, 0] = 2.0 * near / (max_x - min_x) 136 | proj_mat[1, 1] = 2.0 * near / (max_y - min_y) 137 | proj_mat[0, 2] = (max_x + min_x) / (max_x - min_x) 138 | proj_mat[1, 2] = (max_y + min_y) / (max_y - min_y) 139 | proj_mat[3, 2] = z_sign 140 | 141 | proj_mat[2, 2] = z_sign * far / (far - near) 142 | proj_mat[2, 3] = -(far * near) / (far - near) 143 | 144 | return proj_mat 145 | 146 | def get_camera_params(elev_angle, azim_angle, distance, resolution, fov=60, look_at=[0, 0, 0], up=[0, -1, 0]): 147 | 148 | elev = np.radians( elev_angle ) 149 | azim = np.radians( azim_angle ) 150 | 151 | # Generate random view 152 | cam_z = distance * np.cos(elev) * np.sin(azim) 153 | cam_y = distance * np.sin(elev) 154 | cam_x = distance * np.cos(elev) * np.cos(azim) 155 | 156 | modl = glm.mat4() 157 | view = glm.lookAt( 158 | glm.vec3(cam_x, cam_y, cam_z), 159 | glm.vec3(look_at[0], look_at[1], look_at[2]), 160 | glm.vec3(up[0], up[1], up[2]), 161 | ) 162 | 163 | a_mv = view * modl 164 | a_mv = np.array(a_mv.to_list()).T 165 | proj_mtx = persp_proj(fov) 166 | 167 | a_mvp = np.matmul(proj_mtx, a_mv).astype(np.float32)[None, ...] 168 | 169 | a_lightpos = np.linalg.inv(a_mv)[None, :3, 3] 170 | a_campos = a_lightpos 171 | 172 | return { 173 | 'mvp' : a_mvp, 174 | 'lightpos' : a_lightpos, 175 | 'campos' : a_campos, 176 | 'resolution' : [resolution, resolution], 177 | } 178 | 179 | # Returns a batch of camera parameters 180 | class CameraBatch(torch.utils.data.Dataset): 181 | def __init__( 182 | self, 183 | image_resolution, 184 | distances, 185 | azimuths, 186 | elevation_params, 187 | fovs, 188 | aug_loc, 189 | aug_light, 190 | aug_bkg, 191 | bs, 192 | look_at=[0, 0, 0], up=[0, -1, 0], 193 | rand_solid=False 194 | ): 195 | 196 | self.res = image_resolution 197 | 198 | self.dist_min = distances[0] 199 | self.dist_max = distances[1] 200 | 201 | self.azim_min = azimuths[0] 202 | self.azim_max = azimuths[1] 203 | 204 | self.fov_min = fovs[0] 205 | self.fov_max = fovs[1] 206 | 207 | self.elev_alpha = elevation_params[0] 208 | self.elev_beta = elevation_params[1] 209 | self.elev_max = elevation_params[2] 210 | 211 | self.aug_loc = aug_loc 212 | self.aug_light = aug_light 213 | self.aug_bkg = aug_bkg 214 | 215 | self.look_at = look_at 216 | self.up = up 217 | 218 | self.batch_size = bs 219 | self.rand_solid = rand_solid 220 | 221 | def __len__(self): 222 | return self.batch_size 223 | 224 | def __getitem__(self, index): 225 | 226 | elev = np.radians( np.random.beta( self.elev_alpha, self.elev_beta ) * self.elev_max ) 227 | azim = np.radians( np.random.uniform( self.azim_min, self.azim_max+1.0 ) ) 228 | dist = np.random.uniform( self.dist_min, self.dist_max ) 229 | fov = np.random.uniform( self.fov_min, self.fov_max ) 230 | 231 | proj_mtx = persp_proj(fov) 232 | 233 | # Generate random view 234 | cam_z = dist * np.cos(elev) * np.sin(azim) 235 | cam_y = dist * np.sin(elev) 236 | cam_x = dist * np.cos(elev) * np.cos(azim) 237 | 238 | if self.aug_loc: 239 | 240 | # Random offset 241 | limit = self.dist_min // 2 242 | rand_x = np.random.uniform( -limit, limit ) 243 | rand_y = np.random.uniform( -limit, limit ) 244 | 245 | modl = glm.translate(glm.mat4(), glm.vec3(rand_x, rand_y, 0)) 246 | 247 | else: 248 | 249 | modl = glm.mat4() 250 | 251 | view = glm.lookAt( 252 | glm.vec3(cam_x, cam_y, cam_z), 253 | glm.vec3(self.look_at[0], self.look_at[1], self.look_at[2]), 254 | glm.vec3(self.up[0], self.up[1], self.up[2]), 255 | ) 256 | 257 | r_mv = view * modl 258 | r_mv = np.array(r_mv.to_list()).T 259 | 260 | mvp = np.matmul(proj_mtx, r_mv).astype(np.float32) 261 | campos = np.linalg.inv(r_mv)[:3, 3] 262 | 263 | if self.aug_light: 264 | lightpos = cosine_sample(campos)*dist 265 | else: 266 | lightpos = campos*dist 267 | 268 | if self.aug_bkg: 269 | bkgs = get_random_bg(self.res, self.res, self.rand_solid).squeeze(0) 270 | else: 271 | bkgs = torch.ones(self.res, self.res, 3) 272 | 273 | return { 274 | 'mvp': torch.from_numpy( mvp ).float(), 275 | 'lightpos': torch.from_numpy( lightpos ).float(), 276 | 'campos': torch.from_numpy( campos ).float(), 277 | 'bkgs': bkgs, 278 | 'azim': torch.tensor(azim).float(), 279 | 'elev': torch.tensor(elev).float(), 280 | } 281 | 282 | class ListCameraBatch(torch.utils.data.Dataset): 283 | def __init__(self, datasets, bs, weights=None): 284 | self.datasets = datasets 285 | self.batch_size = bs 286 | self.weights = weights 287 | def __len__(self): 288 | return self.batch_size 289 | def __getitem__(self, index): 290 | d = random.choices(self.datasets, weights=self.weights)[0] 291 | return d[index] -------------------------------------------------------------------------------- /utilities/clip_spatial.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import math 3 | import types 4 | import typing 5 | 6 | import clip 7 | import torch 8 | import torch.nn as nn 9 | from torchvision import models, transforms 10 | 11 | # code lifted from CLIPasso 12 | 13 | # For ViT 14 | class CLIPVisualEncoder(nn.Module): 15 | def __init__(self, model_name, stride, device): 16 | super().__init__() 17 | self.load_model(model_name, device) 18 | self.old_stride = self.model.conv1.stride[0] 19 | self.new_stride = stride 20 | self.patch_vit_resolution(stride) 21 | 22 | for i in range(12): # 12 resblocks in VIT visual transformer 23 | self.model.transformer.resblocks[i].register_forward_hook( 24 | self.make_hook(i)) 25 | 26 | 27 | def load_model(self, model_name, device): 28 | model, preprocess = clip.load(model_name, device=device) 29 | self.model = model.visual 30 | self.mean = torch.tensor(preprocess.transforms[-1].mean, device=device) 31 | self.std = torch.tensor(preprocess.transforms[-1].std, device=device) 32 | 33 | @staticmethod 34 | def _fix_pos_enc(patch_size: int, stride_hw: typing.Tuple[int, int]): 35 | def interpolate_pos_encoding(self, x, w, h): 36 | npatch = x.shape[1] - 1 37 | N = self.positional_embedding.shape[0] - 1 38 | if npatch == N and w == h: 39 | return self.positional_embedding 40 | class_pos_embed = self.positional_embedding[:1].type(x.dtype) 41 | patch_pos_embed = self.positional_embedding[1:].type(x.dtype) 42 | dim = x.shape[-1] 43 | w0 = 1 + (w - patch_size) // stride_hw[1] 44 | h0 = 1 + (h - patch_size) // stride_hw[0] 45 | assert (w0 * h0 == npatch) 46 | w0, h0 = w0 + 0.1, h0 + 0.1 47 | patch_pos_embed = torch.nn.functional.interpolate( 48 | patch_pos_embed.reshape(int(math.sqrt(N)), int(math.sqrt(N)), dim).permute(2, 0, 1).unsqueeze(0), 49 | scale_factor=(w0 / math.sqrt(N), h0 / math.sqrt(N)), 50 | mode='bicubic', 51 | align_corners=False, recompute_scale_factor=False, 52 | ).squeeze() 53 | assert int(w0) == patch_pos_embed.shape[-2] and int(h0) == patch_pos_embed.shape[-1] 54 | patch_pos_embed = patch_pos_embed.permute(1, 2, 0).view(1, -1, dim) 55 | return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1) 56 | return interpolate_pos_encoding 57 | 58 | 59 | def patch_vit_resolution(self, stride): 60 | patch_size = self.model.conv1.stride[0] 61 | if stride == patch_size: 62 | return 63 | 64 | stride = (stride, stride) 65 | assert all([(patch_size // s_) * s_ == patch_size for s_ in stride]) 66 | self.model.conv1.stride = stride 67 | self.model.interpolate_pos_encoding = types.MethodType(CLIPVisualEncoder._fix_pos_enc(patch_size, stride), self.model) 68 | 69 | @property 70 | def dtype(self): 71 | return self.model.conv1.weight.dtype 72 | 73 | def make_hook(self, name): 74 | def hook(module, input, output): 75 | if len(output.shape) == 3: 76 | self.featuremaps[name] = output.permute( 77 | 1, 0, 2) # LND -> NLD bs, smth, 768 78 | else: 79 | self.featuremaps[name] = output 80 | 81 | return hook 82 | 83 | def forward(self, x, preprocess=False): 84 | self.featuremaps = collections.OrderedDict() 85 | if preprocess: 86 | x = (x - self.mean[None, :, None, None]) / self.std[None, :, None, None] 87 | B, C, W, H = x.shape 88 | x = self.model.conv1(x.type(self.dtype)) # shape = [*, width, grid, grid] 89 | x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2] 90 | x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] 91 | x = torch.cat([self.model.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device), x], dim=1) # shape = [*, grid ** 2 + 1, width] 92 | x = x + self.model.interpolate_pos_encoding(x, W, H) 93 | x = self.model.ln_pre(x) 94 | x = x.permute(1, 0, 2) 95 | x = self.model.transformer(x) 96 | # remove cls 97 | featuremaps = [self.featuremaps[k].permute(0, 2, 1)[..., 1:] for k in range(12)] 98 | 99 | return featuremaps 100 | -------------------------------------------------------------------------------- /utilities/helpers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Various helper functions 3 | 4 | create_scene -> combines multiple nvdiffmodeling meshes in to a single mesh with mega texture 5 | """ 6 | import sys 7 | import numpy as np 8 | import torch 9 | 10 | from math import ceil 11 | 12 | sys.path.append("../nvdiffmodeling") 13 | 14 | import nvdiffmodeling.src.mesh as mesh 15 | import nvdiffmodeling.src.texture as texture 16 | import nvdiffmodeling.src.renderutils as ru 17 | 18 | cosine_sim = torch.nn.CosineSimilarity() 19 | 20 | def cosine_sum(features, targets): 21 | return -cosine_sim(features, targets).sum() 22 | 23 | def cosine_avg(features, targets): 24 | return -cosine_sim(features, targets).mean() 25 | 26 | def _merge_attr_idx(a, b, a_idx, b_idx, scale_a=1.0, scale_b=1.0, add_a=0.0, add_b=0.0): 27 | if a is None and b is None: 28 | return None, None 29 | elif a is not None and b is None: 30 | return (a*scale_a)+add_a, a_idx 31 | elif a is None and b is not None: 32 | return (b*scale_b)+add_b, b_idx 33 | else: 34 | return torch.cat(((a*scale_a)+add_a, (b*scale_b)+add_b), dim=0), torch.cat((a_idx, b_idx + a.shape[0]), dim=0) 35 | 36 | def create_scene(meshes, sz=1024): 37 | 38 | # Need to comment and fix code 39 | 40 | scene = mesh.Mesh() 41 | 42 | tot = len(meshes) if len(meshes) % 2 == 0 else len(meshes)+1 43 | 44 | nx = 2 45 | ny = ceil(tot / 2) if ceil(tot / 2) % 2 == 0 else ceil(tot / 2) + 1 46 | 47 | w = int(sz*ny) 48 | h = int(sz*nx) 49 | 50 | dev = meshes[0].v_pos.device 51 | 52 | kd_atlas = torch.ones ( (1, w, h, 4) ).to(dev) 53 | ks_atlas = torch.zeros( (1, w, h, 3) ).to(dev) 54 | kn_atlas = torch.ones ( (1, w, h, 3) ).to(dev) 55 | 56 | for i, m in enumerate(meshes): 57 | v_pos, t_pos_idx = _merge_attr_idx(scene.v_pos, m.v_pos, scene.t_pos_idx, m.t_pos_idx) 58 | v_nrm, t_nrm_idx = _merge_attr_idx(scene.v_nrm, m.v_nrm, scene.t_nrm_idx, m.t_nrm_idx) 59 | v_tng, t_tng_idx = _merge_attr_idx(scene.v_tng, m.v_tng, scene.t_tng_idx, m.t_tng_idx) 60 | 61 | pos_x = i % nx 62 | pos_y = int(i / ny) 63 | 64 | sc_x = 1./nx 65 | sc_y = 1./ny 66 | 67 | v_tex, t_tex_idx = _merge_attr_idx( 68 | scene.v_tex, 69 | m.v_tex, 70 | scene.t_tex_idx, 71 | m.t_tex_idx, 72 | scale_a=1., 73 | scale_b=torch.tensor([sc_x, sc_y]).to(dev), 74 | add_a=0., 75 | add_b=torch.tensor([sc_x*pos_x, sc_y*pos_y]).to(dev) 76 | ) 77 | 78 | kd_atlas[:, pos_y*sz:(pos_y*sz)+sz, pos_x*sz:(pos_x*sz)+sz, :m.material['kd'].data.shape[-1]] = m.material['kd'].data 79 | ks_atlas[:, pos_y*sz:(pos_y*sz)+sz, pos_x*sz:(pos_x*sz)+sz, :m.material['ks'].data.shape[-1]] = m.material['ks'].data 80 | kn_atlas[:, pos_y*sz:(pos_y*sz)+sz, pos_x*sz:(pos_x*sz)+sz, :m.material['normal'].data.shape[-1]] = m.material['normal'].data 81 | 82 | scene = mesh.Mesh( 83 | v_pos=v_pos, 84 | t_pos_idx=t_pos_idx, 85 | v_nrm=v_nrm, 86 | t_nrm_idx=t_nrm_idx, 87 | v_tng=v_tng, 88 | t_tng_idx=t_tng_idx, 89 | v_tex=v_tex, 90 | t_tex_idx=t_tex_idx, 91 | base=scene 92 | ) 93 | 94 | scene = mesh.Mesh( 95 | material={ 96 | 'bsdf': 'diffuse', 97 | 'kd': texture.Texture2D( 98 | kd_atlas 99 | ), 100 | 'ks': texture.Texture2D( 101 | ks_atlas 102 | ), 103 | 'normal': texture.Texture2D( 104 | kn_atlas 105 | ), 106 | }, 107 | base=scene # gets uvs etc from here 108 | ) 109 | 110 | return scene 111 | 112 | def get_vp_map(v_pos, mtx_in, resolution): 113 | device = v_pos.device 114 | with torch.no_grad(): 115 | vp_mtx = torch.tensor([ 116 | [resolution / 2, 0., 0., (resolution - 1) / 2], 117 | [0., resolution / 2, 0., (resolution - 1) / 2], 118 | [0., 0., 1., 0.], 119 | [0., 0., 0., 1.,] 120 | ], device=device) 121 | 122 | v_pos_clip = ru.xfm_points(v_pos[None, ...], mtx_in) 123 | v_pos_div = v_pos_clip / v_pos_clip[..., -1:] 124 | 125 | v_vp = (vp_mtx @ v_pos_div.transpose(1, 2)).transpose(1, 2)[..., :-1] 126 | 127 | # don't need manual z-buffer here since we're using the rast map to do occlusion 128 | if False: 129 | v_pix = v_vp[..., :-1].int().cpu().numpy() 130 | v_depth = v_vp[..., -1].cpu().numpy() 131 | 132 | # pix_v_map = -torch.ones(len(v_pix), resolution, resolution, dtype=int) 133 | pix_v_map = -np.ones((len(v_pix), resolution, resolution), dtype=int) 134 | # v_pix_map = resolution * torch.ones(len(v_pix), len(v_pos), 2, dtype=int) 135 | v_pix_map = resolution * np.ones_like(v_pix, dtype=int) 136 | # buffer = torch.ones_like(pix_v_map) / 0 137 | buffer = -np.ones_like(pix_v_map) / 0 138 | for i, vs in enumerate(v_pix): 139 | for j, (y, x) in enumerate(vs): 140 | if x < 0 or x > resolution - 1 or y < 0 or y > resolution - 1: 141 | continue 142 | else: 143 | if v_depth[i, j] > buffer[i, x, y]: 144 | buffer[i, x, y] = v_depth[i, j] 145 | if pix_v_map[i, x, y] != -1: 146 | v_pix_map[i, pix_v_map[i, x, y]] = np.array([resolution, resolution]) 147 | pix_v_map[i, x, y] = j 148 | v_pix_map[i, j] = np.array([x, y]) 149 | v_pix_map = torch.tensor(v_pix_map, device=device) 150 | v_pix_map = v_vp[..., :-1].int().flip([-1]) 151 | v_pix_map [(v_pix_map > resolution - 1) | (v_pix_map < 0)] = resolution 152 | return v_pix_map.long() 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /utilities/video.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper class to create and add images to video 3 | """ 4 | import imageio 5 | import numpy as np 6 | 7 | class Video(): 8 | def __init__(self, path, name='video_log.mp4', mode='I', fps=30, codec='libx264', bitrate='16M') -> None: 9 | 10 | if path[-1] != "/": 11 | path += "/" 12 | 13 | self.writer = imageio.get_writer(path+name, mode=mode, fps=fps, codec=codec, bitrate=bitrate) 14 | 15 | def ready_image(self, image, write_video=True): 16 | # assuming channels last - as renderer returns it 17 | if len(image.shape) == 4: 18 | image = image.squeeze(0)[..., :3].detach().cpu().numpy() 19 | else: 20 | image = image[..., :3].detach().cpu().numpy() 21 | 22 | image = np.clip(np.rint(image*255.0), 0, 255).astype(np.uint8) 23 | 24 | if write_video: 25 | self.writer.append_data(image) 26 | 27 | return image 28 | 29 | def close(self): 30 | self.writer.close() --------------------------------------------------------------------------------