├── .gitignore ├── GMeshDiffusion ├── diffusion_configs │ ├── config_lower_occgrid_normalized.py │ └── config_upper_occgrid_normalized.py ├── lib │ ├── dataset │ │ ├── gshell_dataset.py │ │ └── gshell_dataset_aug.py │ └── diffusion │ │ ├── evaler.py │ │ ├── likelihood.py │ │ ├── losses.py │ │ ├── models │ │ ├── __init__.py │ │ ├── ema.py │ │ ├── functional.py │ │ ├── layers.py │ │ ├── normalization.py │ │ ├── unet3d_occgrid.py │ │ └── utils.py │ │ ├── sampling.py │ │ ├── sde_lib.py │ │ ├── trainer.py │ │ ├── trainer_ddp.py │ │ └── utils.py ├── main_diffusion.py ├── main_diffusion_ddp.py ├── metadata │ ├── get_splits_lower.py │ ├── get_splits_upper.py │ ├── save_tet_info.py │ └── tet_to_cubic_grid_dataset.py └── scripts │ ├── run_eval_lower_occgrid_normalized.sh │ ├── run_eval_upper_occgrid_normalized.sh │ ├── run_lower_occgrid_normalized_ddp.sh │ └── run_upper_occgrid_normalized_ddp.sh ├── README.md ├── assets ├── gshell_logo.png └── teaser.png ├── configs ├── deepfashion_mc.json ├── deepfashion_mc_256.json ├── deepfashion_mc_512.json ├── deepfashion_mc_80.json ├── nerf_chair.json ├── polycam_mc.json ├── polycam_mc_128.json └── polycam_mc_16samples.json ├── data └── tets │ └── generate_tets.py ├── dataset ├── __init__.py ├── dataset.py ├── dataset_deepfashion.py ├── dataset_deepfashion_testset.py ├── dataset_llff.py ├── dataset_mesh.py ├── dataset_nerf.py └── dataset_nerf_colmap.py ├── denoiser └── denoiser.py ├── eval_gmeshdiffusion_generated_samples.py ├── geometry ├── embedding.py ├── flexicubes_table.py ├── gshell_flexicubes.py ├── gshell_flexicubes_geometry.py ├── gshell_tets.py ├── gshell_tets_geometry.py └── mlp.py ├── render ├── light.py ├── material.py ├── mesh.py ├── mlptexture.py ├── obj.py ├── optixutils │ ├── __init__.py │ ├── c_src │ │ ├── accessor.h │ │ ├── bsdf.h │ │ ├── common.h │ │ ├── denoising.cu │ │ ├── denoising.h │ │ ├── envsampling │ │ │ ├── kernel.cu │ │ │ └── params.h │ │ ├── math_utils.h │ │ ├── optix_wrapper.cpp │ │ ├── optix_wrapper.h │ │ └── torch_bindings.cpp │ ├── include │ │ ├── internal │ │ │ ├── optix_7_device_impl.h │ │ │ ├── optix_7_device_impl_exception.h │ │ │ └── optix_7_device_impl_transformations.h │ │ ├── optix.h │ │ ├── optix_7_device.h │ │ ├── optix_7_host.h │ │ ├── optix_7_types.h │ │ ├── optix_denoiser_tiling.h │ │ ├── optix_device.h │ │ ├── optix_function_table.h │ │ ├── optix_function_table_definition.h │ │ ├── optix_host.h │ │ ├── optix_stack_size.h │ │ ├── optix_stubs.h │ │ └── optix_types.h │ ├── ops.py │ └── tests │ │ └── filter_test.py ├── regularizer.py ├── render.py ├── renderutils │ ├── __init__.py │ ├── bsdf.py │ ├── c_src │ │ ├── bsdf.cu │ │ ├── bsdf.h │ │ ├── common.cpp │ │ ├── common.h │ │ ├── cubemap.cu │ │ ├── cubemap.h │ │ ├── loss.cu │ │ ├── loss.h │ │ ├── mesh.cu │ │ ├── mesh.h │ │ ├── normal.cu │ │ ├── normal.h │ │ ├── tensor.h │ │ ├── torch_bindings.cpp │ │ ├── vec3f.h │ │ └── vec4f.h │ ├── loss.py │ ├── ops.py │ └── tests │ │ ├── test_bsdf.py │ │ ├── test_loss.py │ │ ├── test_mesh.py │ │ └── test_perf.py ├── texture.py └── util.py ├── train_gflexicubes_deepfashion.py ├── train_gflexicubes_polycam.py ├── train_gshelltet_deepfashion.py ├── train_gshelltet_polycam.py └── train_gshelltet_synthetic.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | # lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .DS_STORE 163 | -------------------------------------------------------------------------------- /GMeshDiffusion/diffusion_configs/config_lower_occgrid_normalized.py: -------------------------------------------------------------------------------- 1 | import ml_collections 2 | import torch 3 | import os 4 | 5 | 6 | def get_config(): 7 | config = ml_collections.ConfigDict() 8 | 9 | # data 10 | data = config.data = ml_collections.ConfigDict() 11 | data.root_dir = 'PLACEHOLDER' 12 | # data.dataset_metapath = os.path.join(data.root_dir, 'metadata/lower_res64_train.txt') 13 | data.num_workers = 4 14 | data.grid_size = 128 15 | data.tet_resolution = 64 16 | data.num_channels = 4 17 | data.use_occ_grid = True 18 | data.grid_metafile = os.path.join(data.root_dir, 'metadata/lower_res64_grid_train.txt') 19 | data.occgrid_metafile = os.path.join(data.root_dir, 'metadata/lower_res64_occgrid_train.txt') 20 | 21 | data.occ_mask_path = os.path.join(data.root_dir, 'metadata/occ_mask_res64.pt') 22 | data.tet_info_path = os.path.join(data.root_dir, 'metadata/tet_info.pt') 23 | 24 | data.filter_meta_path = None 25 | data.aug = True 26 | 27 | # training 28 | training = config.training = ml_collections.ConfigDict() 29 | training.sde = 'vpsde' 30 | training.continuous = False 31 | training.reduce_mean = True 32 | training.batch_size = 1 ### for DDP, global_batch_size = nproc * local_batch_size 33 | training.num_grad_acc_steps = 4 34 | training.n_iters = 2400001 35 | training.snapshot_freq = 1000 36 | training.log_freq = 50 37 | ## produce samples at each snapshot. 38 | training.snapshot_sampling = True 39 | training.likelihood_weighting = False 40 | training.loss_type = 'l2' 41 | training.train_dir = "PLACEHOLDER" 42 | training.snapshot_freq_for_preemption = 1000 43 | training.gradscaler_growth_interval = 1000 44 | training.use_aux_loss = False 45 | 46 | 47 | training.compile = True # PyTorch 2.0, torch.compile 48 | training.enable_xformers_memory_efficient_attention = True 49 | 50 | # sampling 51 | sampling = config.sampling = ml_collections.ConfigDict() 52 | sampling.method = 'pc' 53 | sampling.predictor = 'ancestral_sampling' 54 | sampling.corrector = 'none' 55 | sampling.n_steps_each = 1 56 | sampling.noise_removal = True 57 | sampling.probability_flow = False 58 | sampling.snr = 0.075 59 | 60 | 61 | # model 62 | model = config.model = ml_collections.ConfigDict() 63 | model.name = 'unet3d_occgrid' 64 | model.use_occ_grid = True 65 | model.num_res_blocks = 2 66 | model.num_res_blocks_1st_layer = 2 67 | model.base_channels = 128 68 | model.ch_mult = (1, 2, 2, 4, 4, 4) 69 | model.down_block_types = ( 70 | "ResBlock", "ResBlock", "ResBlock", "AttnResBlock", "ResBlock", "ResBlock" 71 | ) 72 | model.up_block_types = ( 73 | "ResBlock", "ResBlock", "AttnResBlock", "ResBlock", "ResBlock", "ResBlock" 74 | ) 75 | model.scale_by_sigma = False 76 | model.num_scales = 1000 77 | model.ema_rate = 0.9999 78 | model.normalization = 'GroupNorm' 79 | model.act_fn = 'swish' 80 | model.attn_resolutions = (16,) 81 | model.resamp_with_conv = True 82 | model.dropout = 0.1 83 | model.sigma_max = 378 84 | model.sigma_min = 0.01 85 | model.beta_min = 0.1 86 | model.beta_max = 20. 87 | model.embedding_type = 'fourier' 88 | model.pred_type = 'noise' 89 | model.conditional = True 90 | 91 | model.feature_mask_path = os.path.join(data.root_dir, 'metadata/global_mask_res64.pt') 92 | model.pixcat_mask_path = os.path.join(data.root_dir, 'metadata/cat_mask_res64.pt') 93 | 94 | # optimization 95 | config.optim = optim = ml_collections.ConfigDict() 96 | optim.weight_decay = 1e-5 97 | optim.optimizer = 'AdamW' 98 | optim.lr = 1e-5 99 | optim.beta1 = 0.9 100 | optim.eps = 1e-8 101 | optim.warmup = 5000 102 | optim.grad_clip = 1. 103 | 104 | # eval 105 | config.eval = eval_config = ml_collections.ConfigDict() 106 | eval_config.batch_size = 2 107 | eval_config.idx = 0 108 | eval_config.bin_size = 30 109 | eval_config.eval_dir = "PLACEHOLDER" 110 | eval_config.ckpt_path = "PLACEHOLDER" 111 | 112 | 113 | config.seed = 42 114 | config.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu') 115 | 116 | 117 | return config 118 | -------------------------------------------------------------------------------- /GMeshDiffusion/diffusion_configs/config_upper_occgrid_normalized.py: -------------------------------------------------------------------------------- 1 | import ml_collections 2 | import torch 3 | import os 4 | 5 | 6 | def get_config(): 7 | config = ml_collections.ConfigDict() 8 | 9 | # data 10 | data = config.data = ml_collections.ConfigDict() 11 | data.root_dir = 'PLACEHOLDER' 12 | # data.dataset_metapath = os.path.join(data.root_dir, 'metadata/upper_res64_train.txt') 13 | data.num_workers = 4 14 | data.grid_size = 128 15 | data.tet_resolution = 64 16 | data.num_channels = 4 17 | data.use_occ_grid = True 18 | data.grid_metafile = os.path.join(data.root_dir, 'metadata/upper_res64_grid_train.txt') 19 | data.occgrid_metafile = os.path.join(data.root_dir, 'metadata/upper_res64_occgrid_train.txt') 20 | 21 | data.occ_mask_path = os.path.join(data.root_dir, 'metadata/occ_mask_res64.pt') 22 | data.tet_info_path = os.path.join(data.root_dir, 'metadata/tet_info.pt') 23 | 24 | data.filter_meta_path = None 25 | data.aug = True 26 | 27 | # training 28 | training = config.training = ml_collections.ConfigDict() 29 | training.sde = 'vpsde' 30 | training.continuous = False 31 | training.reduce_mean = True 32 | training.batch_size = 1 ### for DDP, global_batch_size = nproc * local_batch_size 33 | training.num_grad_acc_steps = 4 34 | training.n_iters = 2400001 35 | training.snapshot_freq = 1000 36 | training.log_freq = 50 37 | ## produce samples at each snapshot. 38 | training.snapshot_sampling = True 39 | training.likelihood_weighting = False 40 | training.loss_type = 'l2' 41 | training.train_dir = "PLACEHOLDER" 42 | training.snapshot_freq_for_preemption = 1000 43 | training.gradscaler_growth_interval = 1000 44 | training.use_aux_loss = False 45 | 46 | 47 | training.compile = True # PyTorch 2.0, torch.compile 48 | training.enable_xformers_memory_efficient_attention = True 49 | 50 | # sampling 51 | sampling = config.sampling = ml_collections.ConfigDict() 52 | sampling.method = 'pc' 53 | sampling.predictor = 'ancestral_sampling' 54 | sampling.corrector = 'none' 55 | sampling.n_steps_each = 1 56 | sampling.noise_removal = True 57 | sampling.probability_flow = False 58 | sampling.snr = 0.075 59 | 60 | 61 | # model 62 | model = config.model = ml_collections.ConfigDict() 63 | model.name = 'unet3d_occgrid' 64 | model.use_occ_grid = True 65 | model.num_res_blocks = 2 66 | model.num_res_blocks_1st_layer = 2 67 | model.base_channels = 128 68 | model.ch_mult = (1, 2, 2, 4, 4, 4) 69 | model.down_block_types = ( 70 | "ResBlock", "ResBlock", "ResBlock", "AttnResBlock", "ResBlock", "ResBlock" 71 | ) 72 | model.up_block_types = ( 73 | "ResBlock", "ResBlock", "AttnResBlock", "ResBlock", "ResBlock", "ResBlock" 74 | ) 75 | model.scale_by_sigma = False 76 | model.num_scales = 1000 77 | model.ema_rate = 0.9999 78 | model.normalization = 'GroupNorm' 79 | model.act_fn = 'swish' 80 | model.attn_resolutions = (16,) 81 | model.resamp_with_conv = True 82 | model.dropout = 0.1 83 | model.sigma_max = 378 84 | model.sigma_min = 0.01 85 | model.beta_min = 0.1 86 | model.beta_max = 20. 87 | model.embedding_type = 'fourier' 88 | model.pred_type = 'noise' 89 | model.conditional = True 90 | 91 | model.feature_mask_path = os.path.join(data.root_dir, 'metadata/global_mask_res64_occaug_normalized_v1.pt') 92 | model.pixcat_mask_path = os.path.join(data.root_dir, 'metadata/cat_mask_res64_occaug_normalized_v1.pt') 93 | 94 | # optimization 95 | config.optim = optim = ml_collections.ConfigDict() 96 | optim.weight_decay = 1e-5 97 | optim.optimizer = 'AdamW' 98 | optim.lr = 1e-5 99 | optim.beta1 = 0.9 100 | optim.eps = 1e-8 101 | optim.warmup = 5000 102 | optim.grad_clip = 1. 103 | 104 | # eval 105 | config.eval = eval_config = ml_collections.ConfigDict() 106 | eval_config.batch_size = 2 107 | eval_config.idx = 0 108 | eval_config.bin_size = 30 109 | eval_config.eval_dir = "PLACEHOLDER" 110 | eval_config.ckpt_path = "PLACEHOLDER" 111 | 112 | 113 | config.seed = 42 114 | config.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu') 115 | 116 | 117 | return config 118 | -------------------------------------------------------------------------------- /GMeshDiffusion/lib/dataset/gshell_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch.utils.data import Dataset 4 | 5 | class GShellDataset(Dataset): 6 | def __init__(self, filepath_metafile, extension='pt'): 7 | super().__init__() 8 | with open(filepath_metafile, 'r') as f: 9 | self.filepath_list = [fpath.rstrip() for fpath in f] 10 | 11 | self.extension = extension 12 | assert self.extension in ['pt', 'npy'] 13 | 14 | def __len__(self): 15 | return len(self.filepath_list) 16 | 17 | def __getitem__(self, idx): 18 | with torch.no_grad(): 19 | if self.extension == 'pt': 20 | datum = torch.load(self.filepath_list[idx], map_location='cpu') 21 | else: 22 | datum = torch.tensor(np.load(self.filepath_list[idx])) 23 | return datum 24 | -------------------------------------------------------------------------------- /GMeshDiffusion/lib/dataset/gshell_dataset_aug.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | 4 | class GShellAugDataset(Dataset): 5 | def __init__(self, FLAGS, extension='pt'): 6 | super().__init__() 7 | with open(FLAGS.data.grid_metafile, 'r') as f: 8 | self.filepath_list = [fpath.rstrip() for fpath in f] 9 | with open(FLAGS.data.occgrid_metafile, 'r') as f: 10 | self.occ_filepath_list = [fpath.rstrip() for fpath in f] 11 | 12 | self.extension = extension 13 | self.num_channels = FLAGS.data.num_channels 14 | print('num_channels: ', self.num_channels) 15 | assert self.extension in ['pt', 'npy'] 16 | 17 | def __len__(self): 18 | return len(self.filepath_list) 19 | 20 | def __getitem__(self, idx): 21 | with torch.no_grad(): 22 | grid = torch.load(self.filepath_list[idx], map_location='cpu') 23 | try: 24 | occ_grid = torch.load(self.occ_filepath_list[idx], map_location='cpu') 25 | except: 26 | print(self.occ_filepath_list[idx]) 27 | raise 28 | return (grid[:self.num_channels], occ_grid) 29 | 30 | @staticmethod 31 | def collate(data): 32 | return { 33 | 'grid': torch.stack([x[0] for x in data]), 34 | 'occgrid': torch.stack([x[1] for x in data]), 35 | } 36 | -------------------------------------------------------------------------------- /GMeshDiffusion/lib/diffusion/likelihood.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # pylint: skip-file 17 | # pytype: skip-file 18 | """Various sampling methods.""" 19 | 20 | import torch 21 | import numpy as np 22 | from scipy import integrate 23 | from .models import utils as mutils 24 | 25 | 26 | def get_div_fn(fn): 27 | """Create the divergence function of `fn` using the Hutchinson-Skilling trace estimator.""" 28 | 29 | def div_fn(x, t, eps): 30 | with torch.enable_grad(): 31 | x.requires_grad_(True) 32 | fn_eps = torch.sum(fn(x, t) * eps) 33 | grad_fn_eps = torch.autograd.grad(fn_eps, x)[0] 34 | x.requires_grad_(False) 35 | return torch.sum(grad_fn_eps * eps, dim=tuple(range(1, len(x.shape)))) 36 | 37 | return div_fn 38 | 39 | 40 | def get_likelihood_fn(sde, inverse_scaler, hutchinson_type='Rademacher', 41 | rtol=1e-5, atol=1e-5, method='RK45', eps=1e-5): 42 | """Create a function to compute the unbiased log-likelihood estimate of a given data point. 43 | 44 | Args: 45 | sde: A `sde_lib.SDE` object that represents the forward SDE. 46 | inverse_scaler: The inverse data normalizer. 47 | hutchinson_type: "Rademacher" or "Gaussian". The type of noise for Hutchinson-Skilling trace estimator. 48 | rtol: A `float` number. The relative tolerance level of the black-box ODE solver. 49 | atol: A `float` number. The absolute tolerance level of the black-box ODE solver. 50 | method: A `str`. The algorithm for the black-box ODE solver. 51 | See documentation for `scipy.integrate.solve_ivp`. 52 | eps: A `float` number. The probability flow ODE is integrated to `eps` for numerical stability. 53 | 54 | Returns: 55 | A function that a batch of data points and returns the log-likelihoods in bits/dim, 56 | the latent code, and the number of function evaluations cost by computation. 57 | """ 58 | 59 | def drift_fn(model, x, t): 60 | """The drift function of the reverse-time SDE.""" 61 | score_fn = mutils.get_score_fn(sde, model, train=False, continuous=True) 62 | # Probability flow ODE is a special case of Reverse SDE 63 | rsde = sde.reverse(score_fn, probability_flow=True) 64 | return rsde.sde(x, t)[0] 65 | 66 | def div_fn(model, x, t, noise): 67 | return get_div_fn(lambda xx, tt: drift_fn(model, xx, tt))(x, t, noise) 68 | 69 | def likelihood_fn(model, data): 70 | """Compute an unbiased estimate to the log-likelihood in bits/dim. 71 | 72 | Args: 73 | model: A score model. 74 | data: A PyTorch tensor. 75 | 76 | Returns: 77 | bpd: A PyTorch tensor of shape [batch size]. The log-likelihoods on `data` in bits/dim. 78 | z: A PyTorch tensor of the same shape as `data`. The latent representation of `data` under the 79 | probability flow ODE. 80 | nfe: An integer. The number of function evaluations used for running the black-box ODE solver. 81 | """ 82 | with torch.no_grad(): 83 | shape = data.shape 84 | if hutchinson_type == 'Gaussian': 85 | epsilon = torch.randn_like(data) 86 | elif hutchinson_type == 'Rademacher': 87 | epsilon = torch.randint_like(data, low=0, high=2).float() * 2 - 1. 88 | else: 89 | raise NotImplementedError(f"Hutchinson type {hutchinson_type} unknown.") 90 | 91 | def ode_func(t, x): 92 | sample = mutils.from_flattened_numpy(x[:-shape[0]], shape).to(data.device).type(torch.float32) 93 | vec_t = torch.ones(sample.shape[0], device=sample.device) * t 94 | drift = mutils.to_flattened_numpy(drift_fn(model, sample, vec_t)) 95 | logp_grad = mutils.to_flattened_numpy(div_fn(model, sample, vec_t, epsilon)) 96 | return np.concatenate([drift, logp_grad], axis=0) 97 | 98 | init = np.concatenate([mutils.to_flattened_numpy(data), np.zeros((shape[0],))], axis=0) 99 | solution = integrate.solve_ivp(ode_func, (eps, sde.T), init, rtol=rtol, atol=atol, method=method) 100 | nfe = solution.nfev 101 | zp = solution.y[:, -1] 102 | z = mutils.from_flattened_numpy(zp[:-shape[0]], shape).to(data.device).type(torch.float32) 103 | delta_logp = mutils.from_flattened_numpy(zp[-shape[0]:], (shape[0],)).to(data.device).type(torch.float32) 104 | prior_logp = sde.prior_logp(z) 105 | bpd = -(prior_logp + delta_logp) / np.log(2) 106 | N = np.prod(shape[1:]) 107 | bpd = bpd / N 108 | # A hack to convert log-likelihoods to bits/dim 109 | offset = 7. - inverse_scaler(-1.) 110 | bpd = bpd + offset 111 | return bpd, z, nfe 112 | 113 | return likelihood_fn 114 | -------------------------------------------------------------------------------- /GMeshDiffusion/lib/diffusion/models/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /GMeshDiffusion/lib/diffusion/models/ema.py: -------------------------------------------------------------------------------- 1 | # Modified from https://raw.githubusercontent.com/fadel/pytorch_ema/master/torch_ema/ema.py 2 | 3 | from __future__ import division 4 | from __future__ import unicode_literals 5 | 6 | import torch 7 | 8 | 9 | # Partially based on: https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/python/training/moving_averages.py 10 | class ExponentialMovingAverage: 11 | """ 12 | Maintains (exponential) moving average of a set of parameters. 13 | """ 14 | 15 | def __init__(self, parameters, decay, use_num_updates=True): 16 | """ 17 | Args: 18 | parameters: Iterable of `torch.nn.Parameter`; usually the result of 19 | `model.parameters()`. 20 | decay: The exponential decay. 21 | use_num_updates: Whether to use number of updates when computing 22 | averages. 23 | """ 24 | if decay < 0.0 or decay > 1.0: 25 | raise ValueError('Decay must be between 0 and 1') 26 | self.decay = decay 27 | self.num_updates = 0 if use_num_updates else None 28 | self.shadow_params = [p.clone().detach() 29 | for p in parameters if p.requires_grad] 30 | self.collected_params = [] 31 | 32 | def update(self, parameters): 33 | """ 34 | Update currently maintained parameters. 35 | 36 | Call this every time the parameters are updated, such as the result of 37 | the `optimizer.step()` call. 38 | 39 | Args: 40 | parameters: Iterable of `torch.nn.Parameter`; usually the same set of 41 | parameters used to initialize this object. 42 | """ 43 | decay = self.decay 44 | if self.num_updates is not None: 45 | self.num_updates += 1 46 | decay = min(decay, (1 + self.num_updates) / (10 + self.num_updates)) 47 | one_minus_decay = 1.0 - decay 48 | with torch.no_grad(): 49 | parameters = [p for p in parameters if p.requires_grad] 50 | for s_param, param in zip(self.shadow_params, parameters): 51 | # print(s_param.device, s_param.device, param.device) 52 | s_param.sub_(one_minus_decay * (s_param - param)) 53 | 54 | def copy_to(self, parameters): 55 | """ 56 | Copy current parameters into given collection of parameters. 57 | 58 | Args: 59 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 60 | updated with the stored moving averages. 61 | """ 62 | parameters = [p for p in parameters if p.requires_grad] 63 | for s_param, param in zip(self.shadow_params, parameters): 64 | if param.requires_grad: 65 | param.data.copy_(s_param.data) 66 | 67 | def store(self, parameters): 68 | """ 69 | Save the current parameters for restoring later. 70 | 71 | Args: 72 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 73 | temporarily stored. 74 | """ 75 | self.collected_params = [param.clone() for param in parameters] 76 | 77 | def restore(self, parameters): 78 | """ 79 | Restore the parameters stored with the `store` method. 80 | Useful to validate the model with EMA parameters without affecting the 81 | original optimization process. Store the parameters before the 82 | `copy_to` method. After validation (or model saving), use this to 83 | restore the former parameters. 84 | 85 | Args: 86 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 87 | updated with the stored parameters. 88 | """ 89 | for c_param, param in zip(self.collected_params, parameters): 90 | param.data.copy_(c_param.data) 91 | 92 | def state_dict(self): 93 | return dict(decay=self.decay, num_updates=self.num_updates, 94 | shadow_params=self.shadow_params) 95 | 96 | def load_state_dict(self, state_dict, device='cuda'): 97 | self.decay = state_dict['decay'] 98 | self.num_updates = state_dict['num_updates'] 99 | self.shadow_params = state_dict['shadow_params'] 100 | for k, _ in enumerate(self.shadow_params): 101 | self.shadow_params[k] = self.shadow_params[k].to(device) 102 | # for k in self.shadow_params: 103 | # print(k.device) 104 | # raise -------------------------------------------------------------------------------- /GMeshDiffusion/lib/diffusion/trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | 5 | import logging 6 | # Keep the import below for registering all model definitions 7 | from .models import unet3d, unet3d_occgrid, unet3d_tet_aware, unet3d_occgrid_v2, unet3d_meshdiffusion 8 | 9 | from . import losses 10 | from .models import utils as mutils 11 | from .models.ema import ExponentialMovingAverage 12 | from . import sde_lib 13 | import torch 14 | from torch.utils import tensorboard 15 | from .utils import save_checkpoint, restore_checkpoint 16 | from ..dataset.gshell_dataset import GShellDataset 17 | from ..dataset.gshell_dataset_aug import GShellAugDataset 18 | 19 | 20 | def train(config): 21 | """Runs the training pipeline. 22 | 23 | Args: 24 | config: Configuration to use. 25 | workdir: Working directory for checkpoints and TF summaries. If this 26 | contains checkpoint training will be resumed from the latest checkpoint. 27 | """ 28 | 29 | workdir = config.training.train_dir 30 | # Create directories for experimental logs 31 | logging.info("working dir: {:s}".format(workdir)) 32 | 33 | 34 | tb_dir = os.path.join(workdir, "tensorboard") 35 | writer = tensorboard.SummaryWriter(tb_dir) 36 | 37 | # Initialize model. 38 | score_model = mutils.create_model(config) 39 | ema = ExponentialMovingAverage(score_model.parameters(), decay=config.model.ema_rate) 40 | optimizer = losses.get_optimizer(config, score_model.parameters()) 41 | gradscaler = torch.cuda.amp.GradScaler(enabled=True) 42 | 43 | state = dict(optimizer=optimizer, model=score_model, ema=ema, gradscaler=gradscaler, step=0) 44 | 45 | 46 | # Create checkpoints directory 47 | checkpoint_dir = os.path.join(workdir, "checkpoints") 48 | # Intermediate checkpoints to resume training after pre-emption in cloud environments 49 | checkpoint_meta_dir = os.path.join(workdir, "checkpoints-meta", "checkpoint.pth") 50 | os.makedirs(checkpoint_dir, exist_ok=True) 51 | os.makedirs(os.path.dirname(checkpoint_meta_dir), exist_ok=True) 52 | 53 | # Resume training when intermediate checkpoints are detected 54 | state = restore_checkpoint(checkpoint_meta_dir, state, config.device) 55 | initial_step = int(state['step']) 56 | 57 | print(f"work dir: {workdir}") 58 | 59 | 60 | try: 61 | use_occ_grid = config.data.use_occ_grid 62 | except: 63 | use_occ_grid = False 64 | if use_occ_grid: 65 | train_dataset = GShellAugDataset(config) 66 | else: 67 | train_dataset = GShellDataset(config.data.dataset_metapath) 68 | 69 | 70 | try: 71 | collate_fn = train_dataset.collate 72 | except: 73 | collate_fn = None 74 | 75 | train_loader = torch.utils.data.DataLoader( 76 | train_dataset, 77 | batch_size=config.training.batch_size, 78 | shuffle=True, 79 | num_workers=config.data.num_workers, 80 | collate_fn=collate_fn, 81 | pin_memory=True 82 | ) 83 | 84 | data_iter = iter(train_loader) 85 | 86 | print("data loader set") 87 | 88 | # Setup SDEs 89 | sde = sde_lib.VPSDE(beta_min=config.model.beta_min, beta_max=config.model.beta_max, N=config.model.num_scales) 90 | 91 | # Build one-step training and evaluation functions 92 | optimize_fn = losses.optimization_manager(config) 93 | try: 94 | use_vis_mask = config.model.use_vis_mask 95 | except: 96 | use_vis_mask = False 97 | print('use_vis_mask', use_vis_mask) 98 | train_step_fn = losses.get_step_fn(sde, train=True, optimize_fn=optimize_fn, 99 | loss_type=config.training.loss_type, 100 | pred_type=config.model.pred_type, 101 | use_vis_mask=use_vis_mask, 102 | use_occ=use_occ_grid, 103 | use_aux=config.training.use_aux_loss) 104 | 105 | num_train_steps = config.training.n_iters 106 | 107 | # In case there are multiple hosts (e.g., TPU pods), only log to host 0 108 | logging.info("Starting training loop at step %d." % (initial_step // config.training.num_grad_acc_steps,)) 109 | 110 | 111 | iter_size = config.training.num_grad_acc_steps 112 | for step in range(initial_step // iter_size, num_train_steps + 1): 113 | tmp_loss_dict = { 114 | 'loss_total': 0.0, 115 | 'loss_score': 0.0, 116 | 'loss_reg': 0.0, 117 | } 118 | for step_inner in range(iter_size): 119 | try: 120 | # batch, batch_mask = next(data_iter) 121 | batch = next(data_iter) 122 | except StopIteration: 123 | # StopIteration is thrown if dataset ends 124 | # reinitialize data loader 125 | data_iter = iter(train_loader) 126 | batch = next(data_iter) 127 | 128 | 129 | if type(batch) == dict: 130 | for k in batch: 131 | batch[k] = batch[k].to('cuda', non_blocking=False) 132 | else: 133 | batch = batch.to('cuda', non_blocking=False) 134 | 135 | # Execute one training step 136 | clear_grad_flag = (step_inner == 0) 137 | update_param_flag = (step_inner == iter_size - 1) 138 | loss_dict = train_step_fn(state, batch, clear_grad=clear_grad_flag, update_param=update_param_flag, gradscaler=gradscaler) 139 | for key in loss_dict: 140 | tmp_loss_dict[key] += loss_dict[key].item() / iter_size 141 | 142 | # print(torch.cuda.memory_summary()) 143 | 144 | if step % config.training.log_freq == 0: 145 | # logging.info("step: %d, training_loss: %.5e" % (step, tmp_loss)) 146 | logging.info( 147 | "step: %d, loss_total: %.5e, loss_score: %.5e, loss_reg: %.5e" 148 | % (step, tmp_loss_dict['loss_total'], tmp_loss_dict['loss_score'], tmp_loss_dict['loss_reg']) 149 | ) 150 | sys.stdout.flush() 151 | writer.add_scalar("loss_total", tmp_loss_dict['loss_total'], step) 152 | writer.add_scalar("loss_score", tmp_loss_dict['loss_score'], step) 153 | writer.add_scalar("loss_reg", tmp_loss_dict['loss_reg'], step) 154 | 155 | # Save a temporary checkpoint to resume training after pre-emption periodically 156 | if step != 0 and step % config.training.snapshot_freq_for_preemption == 0: 157 | logging.info(f"save meta at iter {step}") 158 | save_checkpoint(checkpoint_meta_dir, state) 159 | 160 | # Save a checkpoint periodically and generate samples if needed 161 | if step != 0 and step % config.training.snapshot_freq == 0 or step == num_train_steps: 162 | logging.info(f"save model: {step}-th") 163 | save_checkpoint(os.path.join(checkpoint_dir, f'checkpoint_{step}.pth'), state) 164 | -------------------------------------------------------------------------------- /GMeshDiffusion/lib/diffusion/trainer_ddp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | 5 | import logging 6 | # Keep the import below for registering all model definitions 7 | from .models import unet3d, unet3d_occgrid, unet3d_tet_aware, unet3d_occgrid_v2, unet3d_meshdiffusion 8 | 9 | from . import losses 10 | from .models import utils as mutils 11 | from .models.ema import ExponentialMovingAverage 12 | from . import sde_lib 13 | import torch 14 | from torch.utils import tensorboard 15 | from .utils import save_checkpoint, restore_checkpoint 16 | from ..dataset.gshell_dataset import GShellDataset 17 | from ..dataset.gshell_dataset_aug import GShellAugDataset 18 | 19 | from .lion.lion import Lion 20 | import torch.distributed as dist 21 | 22 | def train(config): 23 | """Runs the training pipeline. 24 | 25 | Args: 26 | config: Configuration to use. 27 | workdir: Working directory for checkpoints and TF summaries. If this 28 | contains checkpoint training will be resumed from the latest checkpoint. 29 | """ 30 | dist.init_process_group("nccl") 31 | rank = dist.get_rank() 32 | torch.cuda.set_device(rank) 33 | device = torch.device("cuda", rank) 34 | print(f"Start running basic DDP example on rank {rank}.") 35 | 36 | # create model and move it to GPU with id rank 37 | world_size = torch.cuda.device_count() 38 | device_id = rank % torch.cuda.device_count() 39 | 40 | workdir = config.training.train_dir 41 | # Create directories for experimental logs 42 | logging.info("working dir: {:s}".format(workdir)) 43 | 44 | 45 | tb_dir = os.path.join(workdir, "tensorboard") 46 | writer = tensorboard.SummaryWriter(tb_dir) 47 | 48 | # Initialize model. 49 | score_model = mutils.create_model(config, ddp=True, rank=rank) 50 | ema = ExponentialMovingAverage(score_model.parameters(), decay=config.model.ema_rate) 51 | optimizer = losses.get_optimizer(config, score_model.parameters()) 52 | gradscaler = torch.cuda.amp.GradScaler(growth_interval=config.training.gradscaler_growth_interval) 53 | 54 | state = dict(optimizer=optimizer, model=score_model, ema=ema, gradscaler=gradscaler, step=0) 55 | 56 | 57 | # Create checkpoints directory 58 | checkpoint_dir = os.path.join(workdir, "checkpoints") 59 | # Intermediate checkpoints to resume training after pre-emption in cloud environments 60 | checkpoint_meta_dir = os.path.join(workdir, "checkpoints-meta", "checkpoint.pth") 61 | os.makedirs(checkpoint_dir, exist_ok=True) 62 | os.makedirs(os.path.dirname(checkpoint_meta_dir), exist_ok=True) 63 | 64 | # Resume training when intermediate checkpoints are detected 65 | state = restore_checkpoint(checkpoint_meta_dir, state, config.device, rank=rank) 66 | initial_step = int(state['step']) 67 | 68 | print(f"work dir: {workdir}") 69 | 70 | try: 71 | use_occ_grid = config.data.use_occ_grid 72 | except: 73 | use_occ_grid = False 74 | if use_occ_grid: 75 | train_dataset = GShellAugDataset(config) 76 | else: 77 | train_dataset = GShellDataset(config.data.dataset_metapath) 78 | 79 | train_sampler = torch.utils.data.distributed.DistributedSampler( 80 | train_dataset, 81 | num_replicas=world_size, 82 | rank=rank 83 | ) 84 | 85 | try: 86 | collate_fn = train_dataset.collate 87 | except: 88 | collate_fn = None 89 | train_loader = torch.utils.data.DataLoader( 90 | train_dataset, 91 | batch_size=config.training.batch_size, 92 | num_workers=config.data.num_workers, 93 | # pin_memory=True, 94 | sampler=train_sampler, 95 | collate_fn=collate_fn 96 | ) 97 | 98 | data_iter = iter(train_loader) 99 | 100 | print("data loader set") 101 | 102 | # Setup SDEs 103 | sde = sde_lib.VPSDE(beta_min=config.model.beta_min, beta_max=config.model.beta_max, N=config.model.num_scales) 104 | 105 | # Build one-step training and evaluation functions 106 | optimize_fn = losses.optimization_manager(config) 107 | try: 108 | use_vis_mask = config.model.use_vis_mask 109 | except: 110 | use_vis_mask = False 111 | print('use_vis_mask', use_vis_mask) 112 | train_step_fn = losses.get_step_fn(sde, train=True, optimize_fn=optimize_fn, 113 | loss_type=config.training.loss_type, 114 | pred_type=config.model.pred_type, 115 | use_vis_mask=use_vis_mask, 116 | use_occ=use_occ_grid, 117 | use_aux=config.training.use_aux_loss) 118 | 119 | num_train_steps = config.training.n_iters 120 | 121 | # In case there are multiple hosts (e.g., TPU pods), only log to host 0 122 | logging.info("Starting training loop at step %d." % (initial_step // config.training.num_grad_acc_steps,)) 123 | 124 | iter_size = config.training.num_grad_acc_steps 125 | epoch = 0 126 | train_sampler.set_epoch(epoch) 127 | for step in range(initial_step // iter_size, num_train_steps + 1): 128 | tmp_loss_dict = { 129 | 'loss_total': 0.0, 130 | 'loss_score': 0.0, 131 | 'loss_reg': 0.0, 132 | } 133 | for step_inner in range(iter_size): 134 | try: 135 | # batch, batch_mask = next(data_iter) 136 | batch = next(data_iter) 137 | except StopIteration: 138 | # StopIteration is thrown if dataset ends 139 | # reinitialize data loader 140 | epoch += 1 141 | train_sampler.set_epoch(epoch) 142 | data_iter = iter(train_loader) 143 | batch = next(data_iter) 144 | 145 | if type(batch) == dict: 146 | for k in batch: 147 | batch[k] = batch[k].to(rank, non_blocking=False) 148 | else: 149 | batch = batch.to(rank, non_blocking=False) 150 | 151 | # Execute one training step 152 | clear_grad_flag = (step_inner == 0) 153 | update_param_flag = (step_inner == iter_size - 1) 154 | if not update_param_flag: 155 | with score_model.no_sync(): 156 | loss_dict = train_step_fn(state, batch, clear_grad=clear_grad_flag, update_param=update_param_flag, gradscaler=gradscaler) 157 | else: 158 | loss_dict = train_step_fn(state, batch, clear_grad=clear_grad_flag, update_param=update_param_flag, gradscaler=gradscaler) 159 | for key in loss_dict: 160 | tmp_loss_dict[key] += loss_dict[key].item() / iter_size 161 | 162 | # print(torch.cuda.memory_summary()) 163 | 164 | if step % config.training.log_freq == 0: 165 | loss = tmp_loss_dict['loss_total'] 166 | loss = torch.tensor(loss / world_size).to(rank) 167 | 168 | # logging.info("step: %d, training_loss: %.5e" % (step, tmp_loss)) 169 | dist.reduce(loss, dst=0, op=dist.ReduceOp.SUM) 170 | if rank == 0: 171 | loss = loss.item() 172 | logging.info("step: %d, loss: %.5e, scale: %.5e" % (step, loss, gradscaler.get_scale())) 173 | sys.stdout.flush() 174 | writer.add_scalar("loss", loss, step) 175 | 176 | if rank == 0: 177 | # Save a temporary checkpoint to resume training after pre-emption periodically 178 | if step != 0 and step % config.training.snapshot_freq_for_preemption == 0: 179 | logging.info(f"save meta at iter {step}") 180 | save_checkpoint(checkpoint_meta_dir, state) 181 | 182 | # Save a checkpoint periodically and generate samples if needed 183 | if step != 0 and step % config.training.snapshot_freq == 0 or step == num_train_steps: 184 | logging.info(f"save model: {step}-th") 185 | save_checkpoint(os.path.join(checkpoint_dir, f'checkpoint_{step}.pth'), state) 186 | 187 | dist.destroy_process_group() -------------------------------------------------------------------------------- /GMeshDiffusion/lib/diffusion/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import logging 4 | 5 | 6 | def restore_checkpoint(ckpt_dir, state, device, strict=False, rank=None): 7 | if not os.path.exists(ckpt_dir): 8 | os.makedirs(os.path.dirname(ckpt_dir), exist_ok=True) 9 | logging.warning(f"No checkpoint found at {ckpt_dir}. " 10 | f"Returned the same state as input") 11 | if strict: 12 | raise 13 | return state 14 | else: 15 | if rank is not None: 16 | device = f"cuda:{rank}" 17 | # loaded_state = torch.load(ckpt_dir, map_location=device) 18 | loaded_state = torch.load(ckpt_dir, map_location='cpu') 19 | state['optimizer'].load_state_dict(loaded_state['optimizer']) 20 | try: 21 | state['model'].load_state_dict(loaded_state['model'], strict=False) 22 | except: 23 | consume_prefix_in_state_dict_if_present(loaded_state['model']) 24 | state['model'].load_state_dict(loaded_state['model'], strict=False) 25 | state['ema'].load_state_dict(loaded_state['ema'], device=device) 26 | state['step'] = loaded_state['step'] 27 | state['model'].to(device) 28 | try: 29 | state['gradscaler'].load_state_dict(loaded_state['gradscaler']) 30 | # state['gradscaler'].to(device) 31 | except: 32 | # raise 33 | pass 34 | torch.cuda.empty_cache() 35 | return state 36 | 37 | 38 | def save_checkpoint(ckpt_dir, state): 39 | saved_state = { 40 | 'optimizer': state['optimizer'].state_dict(), 41 | 'model': state['model'].state_dict(), 42 | 'ema': state['ema'].state_dict(), 43 | 'step': state['step'], 44 | 'gradscaler': state['gradscaler'].state_dict() 45 | } 46 | torch.save(saved_state, ckpt_dir) -------------------------------------------------------------------------------- /GMeshDiffusion/main_diffusion.py: -------------------------------------------------------------------------------- 1 | """Training and evaluation""" 2 | 3 | from absl import app 4 | from absl import flags 5 | from ml_collections.config_flags import config_flags 6 | 7 | import lib.diffusion.trainer as trainer 8 | import lib.diffusion.evaler as evaler 9 | 10 | 11 | FLAGS = flags.FLAGS 12 | 13 | config_flags.DEFINE_config_file( 14 | "config", None, "diffusion configs", lock_config=False) 15 | flags.DEFINE_enum("mode", None, ["train", "uncond_gen", "cond_gen", "uncond_gen_interp"], "Running mode") 16 | flags.mark_flags_as_required(["config", "mode"]) 17 | 18 | 19 | def main(argv): 20 | if FLAGS.mode == 'train': 21 | trainer.train(FLAGS.config) 22 | elif FLAGS.mode == 'uncond_gen': 23 | evaler.uncond_gen(FLAGS.config) 24 | elif FLAGS.mode == 'uncond_gen_interp': 25 | evaler.uncond_gen_interp(FLAGS.config) 26 | elif FLAGS.mode == 'cond_gen': 27 | evaler.cond_gen(FLAGS.config) 28 | 29 | if __name__ == "__main__": 30 | app.run(main) 31 | -------------------------------------------------------------------------------- /GMeshDiffusion/main_diffusion_ddp.py: -------------------------------------------------------------------------------- 1 | """Training and evaluation""" 2 | 3 | from absl import app 4 | from absl import flags 5 | from ml_collections.config_flags import config_flags 6 | 7 | import lib.diffusion.trainer_ddp as trainer 8 | import lib.diffusion.evaler as evaler 9 | 10 | 11 | 12 | 13 | FLAGS = flags.FLAGS 14 | 15 | config_flags.DEFINE_config_file( 16 | "config", None, "diffusion configs", lock_config=False) 17 | flags.DEFINE_enum("mode", None, ["train", "uncond_gen", "cond_gen", "uncond_gen_interp"], "Running mode") 18 | flags.mark_flags_as_required(["config", "mode"]) 19 | 20 | def main(argv): 21 | print(FLAGS.config) 22 | if FLAGS.mode == 'train': 23 | trainer.train(FLAGS.config) 24 | 25 | if __name__ == "__main__": 26 | app.run(main) 27 | -------------------------------------------------------------------------------- /GMeshDiffusion/metadata/get_splits_lower.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | random.seed(42) 5 | 6 | split_ratio = 0.9 7 | data_root = 'PLACEHOLDER' 8 | grid_root = os.path.join(data_root, 'grid') 9 | occgrid_root = os.path.join(data_root, 'grid_aug') 10 | data_path_list = sorted([os.path.join(data_root, fpath) for fpath in os.listdir(data_root)]) 11 | 12 | random.shuffle(data_path_list) 13 | 14 | n_train = int(len(data_path_list) * split_ratio) 15 | train_list = data_path_list[:n_train] 16 | test_list = data_path_list[n_train:] 17 | 18 | with open('lower_res64_grid_train.txt', 'w') as f: 19 | f.write('\n'.join(train_list)) 20 | 21 | with open('lower_res64_grid_test.txt', 'w') as f: 22 | f.write('\n'.join(test_list)) 23 | 24 | 25 | occgrid_train_list = [os.path.join(occgrid_root, x.split('/')[-1]) for x in train_list] 26 | occgrid_test_list = [os.path.join(occgrid_root, x.split('/')[-1]) for x in test_list] 27 | 28 | with open('lower_res64_occgrid_train.txt', 'w') as f: 29 | f.write('\n'.join(occgrid_train_list)) 30 | 31 | with open('lower_res64_occgrid_test.txt', 'w') as f: 32 | f.write('\n'.join(occgrid_test_list)) 33 | 34 | -------------------------------------------------------------------------------- /GMeshDiffusion/metadata/get_splits_upper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | random.seed(42) 5 | 6 | split_ratio = 0.9 7 | data_root = 'PLACEHOLDER' 8 | grid_root = os.path.join(data_root, 'grid') 9 | occgrid_root = os.path.join(data_root, 'grid_aug') 10 | data_path_list = sorted([os.path.join(data_root, fpath) for fpath in os.listdir(data_root)]) 11 | 12 | random.shuffle(data_path_list) 13 | 14 | n_train = int(len(data_path_list) * split_ratio) 15 | train_list = data_path_list[:n_train] 16 | test_list = data_path_list[n_train:] 17 | 18 | with open('upper_res64_grid_train.txt', 'w') as f: 19 | f.write('\n'.join(train_list)) 20 | 21 | with open('upper_res64_grid_test.txt', 'w') as f: 22 | f.write('\n'.join(test_list)) 23 | 24 | 25 | occgrid_train_list = [os.path.join(occgrid_root, x.split('/')[-1]) for x in train_list] 26 | occgrid_test_list = [os.path.join(occgrid_root, x.split('/')[-1]) for x in test_list] 27 | 28 | with open('upper_res64_occgrid_train.txt', 'w') as f: 29 | f.write('\n'.join(occgrid_train_list)) 30 | 31 | with open('upper_res64_occgrid_test.txt', 'w') as f: 32 | f.write('\n'.join(occgrid_test_list)) 33 | 34 | -------------------------------------------------------------------------------- /GMeshDiffusion/metadata/save_tet_info.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Storing tet-grid related meta-info into a single file 3 | ''' 4 | 5 | import numpy as np 6 | import torch 7 | import os 8 | import tqdm 9 | import argparse 10 | 11 | from itertools import combinations 12 | 13 | 14 | def tet_to_grids(vertices, values_list, grid_size): 15 | grid = torch.zeros(12, grid_size, grid_size, grid_size, device=vertices.device) 16 | with torch.no_grad(): 17 | for k, values in enumerate(values_list): 18 | if k == 0: 19 | grid[k, vertices[:, 0], vertices[:, 1], vertices[:, 2]] = values.squeeze() 20 | else: 21 | grid[1:4, vertices[:, 0], vertices[:, 1], vertices[:, 2]] = values.transpose(0, 1) 22 | return grid 23 | 24 | if __name__ == "__main__": 25 | parser = argparse.ArgumentParser(description='nvdiffrec') 26 | parser.add_argument('-res', '--resolution', type=int) 27 | parser.add_argument('-r', '--root', type=str) 28 | parser.add_argument('-s', '--source', type=str) 29 | parser.add_argument('-t', '--target', type=str) 30 | FLAGS = parser.parse_args() 31 | 32 | tet_path = f'./tets/{FLAGS.resolution}_tets_cropped_reordered.npz' 33 | tet = np.load(tet_path) 34 | vertices = torch.tensor(tet['vertices']).cuda() 35 | indices = torch.tensor(tet['indices']).long().cuda() 36 | 37 | edges = torch.tensor(tet['edges']).long().cuda() 38 | tet_edges = torch.tensor(tet['tet_edges']).long().view(-1, 2).cuda() 39 | 40 | vertices_unique = vertices[:].unique() 41 | dx = vertices_unique[1] - vertices_unique[0] 42 | dx = dx / 2.0 ### denser grid 43 | vertices_discretized = ( 44 | ((vertices - vertices.min()) / dx) 45 | ).long() 46 | 47 | midpoints = (vertices_discretized[edges[:, 0]] + vertices_discretized[edges[:, 1]]) / 2.0 48 | midpoints_dicretized = midpoints.long() 49 | 50 | tet_verts = vertices_discretized[indices.view(-1)].view(-1, 4, 3) 51 | tet_center = tet_verts.float().mean(dim=1) 52 | tet_center_discretized = tet_center.long() 53 | 54 | 55 | edge_ind_list = [[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]] 56 | msdf_tetedges = [] 57 | msdf_from_tetverts = [] 58 | for i in range(5): 59 | for j in range(i+1, 6): 60 | if (edge_ind_list[i][0] == edge_ind_list[j][0] 61 | or edge_ind_list[i][0] == edge_ind_list[j][1] 62 | or edge_ind_list[i][1] == edge_ind_list[j][0] 63 | or edge_ind_list[i][1] == edge_ind_list[j][1] 64 | ): 65 | msdf_tetedges.append(i) 66 | msdf_tetedges.append(j) 67 | msdf_from_tetverts.extend([edge_ind_list[i][0], edge_ind_list[i][1], edge_ind_list[j][0], edge_ind_list[j][1]]) 68 | msdf_tetedges = torch.tensor(msdf_tetedges) 69 | msdf_from_tetverts = torch.tensor(msdf_from_tetverts) 70 | print(msdf_tetedges) 71 | print(msdf_tetedges.size()) 72 | 73 | 74 | 75 | tet_edges = tet_edges.view(-1, 2) 76 | msdf_tetedges = msdf_tetedges.view(-1) 77 | tet_edgenodes_pos = (vertices_discretized[tet_edges[:, 0]] + vertices_discretized[tet_edges[:, 1]]) / 2.0 78 | tet_edgenodes_pos = tet_edgenodes_pos.view(-1, 6, 2) 79 | occ_edge_pos = tet_edgenodes_pos[:, msdf_tetedges].view(-1, 12, 2, 3) 80 | 81 | 82 | edge_twopoint_order = torch.sign(occ_edge_pos[:, :, 0, :] - occ_edge_pos[:, :, 1, :]) 83 | edge_twopoint_order_binary_code = (edge_twopoint_order * torch.tensor([16, 4, 1], device=edge_twopoint_order.device).view(1, 1, -1)).sum(dim=-1) 84 | edge_twopoint_order_binary_code = torch.stack([edge_twopoint_order_binary_code, -edge_twopoint_order_binary_code], dim=-1) 85 | _, edge_twopoint_order = edge_twopoint_order_binary_code.sort(dim=-1) 86 | 87 | occ_edge_cano_order = torch.arange(2).view(1, 1, 2).expand(occ_edge_pos.size(0), 12, 2).cuda() 88 | occ_edge_cano_order = torch.gather( 89 | input=occ_edge_cano_order, 90 | dim=-1, 91 | index=edge_twopoint_order 92 | ) 93 | 94 | tet_edges = tet_edges.view(-1) 95 | 96 | torch.save({ 97 | 'tet_v_pos': vertices, 98 | 'tet_edge_vpos': vertices[tet_edges].view(-1, 2, 3), 99 | 'tet_edge_pix_loc': vertices_discretized[tet_edges].view(-1, 2, 3), 100 | 'tet_center_loc': tet_center_discretized, 101 | 'msdf_edges': msdf_tetedges.view(12, 2), 102 | 'occ_edge_cano_order': occ_edge_cano_order 103 | }, 'tet_info.pt') 104 | -------------------------------------------------------------------------------- /GMeshDiffusion/scripts/run_eval_lower_occgrid_normalized.sh: -------------------------------------------------------------------------------- 1 | python main_diffusion.py --mode uncond_gen --config diffusion_configs/config_lower_occgrid_normalized.py \ 2 | --config.eval.eval_dir=$EVAL_DIR \ 3 | --config.data.root_dir=$REPO_ROOT_DIR \ 4 | --config.sampling.method=ddim \ 5 | --config.eval.ckpt_path=$CKPT_PATH \ 6 | --config.eval.bin_size=30 \ 7 | --config.eval.idx $1 -------------------------------------------------------------------------------- /GMeshDiffusion/scripts/run_eval_upper_occgrid_normalized.sh: -------------------------------------------------------------------------------- 1 | python main_diffusion.py --mode uncond_gen --config diffusion_configs/config_upper_occgrid_normalized.py \ 2 | --config.eval.eval_dir=$EVAL_DIR \ 3 | --config.data.root_dir=$REPO_ROOT_DIR \ 4 | --config.sampling.method=ddim \ 5 | --config.eval.ckpt_path=$CKPT_PATH \ 6 | --config.eval.bin_size=10 \ 7 | --config.eval.idx $1 -------------------------------------------------------------------------------- /GMeshDiffusion/scripts/run_lower_occgrid_normalized_ddp.sh: -------------------------------------------------------------------------------- 1 | torchrun --nnodes=1 --nproc_per_node=8 main_diffusion_ddp.py --mode=train --config=diffusion_configs/config_lower_occgrid_normalized.py \ 2 | --config.training.train_dir=$SAVE_DIR --config.data.root_dir=$REPO_ROOT_DIR -------------------------------------------------------------------------------- /GMeshDiffusion/scripts/run_upper_occgrid_normalized_ddp.sh: -------------------------------------------------------------------------------- 1 | torchrun --nnodes=1 --nproc_per_node=8 main_diffusion_ddp.py --mode=train --config=diffusion_configs/config_upper_occgrid_normalized.py \ 2 | --config.training.train_dir=$SAVE_DIR --config.data.root_dir=$REPO_ROOT_DIR 3 | -------------------------------------------------------------------------------- /assets/gshell_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzzcd001/GShell/c2f0ba9ea01a7f1499e309968d386324292e6c92/assets/gshell_logo.png -------------------------------------------------------------------------------- /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzzcd001/GShell/c2f0ba9ea01a7f1499e309968d386324292e6c92/assets/teaser.png -------------------------------------------------------------------------------- /configs/deepfashion_mc.json: -------------------------------------------------------------------------------- 1 | { 2 | "ref_mesh": "data/spot/spot.obj", 3 | "random_textures": true, 4 | "iter": 5000, 5 | "save_interval": 100, 6 | "texture_res": [ 1024, 1024 ], 7 | "train_res": [1024, 1024], 8 | "batch": 2, 9 | "learning_rate": [0.03, 0.005], 10 | "ks_min" : [0, 0.001, 0.0], 11 | "ks_max" : [0, 1.0, 1.0], 12 | "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr", 13 | "lock_pos" : false, 14 | "display": [{"latlong" : true}], 15 | "background" : "white", 16 | "denoiser": "bilateral", 17 | "n_samples" : 24, 18 | "env_scale" : 2.0, 19 | "gshell_grid" : 128, 20 | "validate" : true, 21 | "laplace_scale" : 6000, 22 | "boxscale": [1, 1, 1], 23 | "aabb": [-1, -1, -1, 1, 1, 1] 24 | } -------------------------------------------------------------------------------- /configs/deepfashion_mc_256.json: -------------------------------------------------------------------------------- 1 | { 2 | "ref_mesh": "data/spot/spot.obj", 3 | "random_textures": true, 4 | "iter": 5000, 5 | "save_interval": 100, 6 | "texture_res": [ 1024, 1024 ], 7 | "train_res": [1024, 1024], 8 | "batch": 2, 9 | "learning_rate": [0.03, 0.005], 10 | "ks_min" : [0, 0.001, 0.0], 11 | "ks_max" : [0, 1.0, 1.0], 12 | "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr", 13 | "lock_pos" : false, 14 | "display": [{"latlong" : true}], 15 | "background" : "white", 16 | "denoiser": "bilateral", 17 | "n_samples" : 24, 18 | "env_scale" : 2.0, 19 | "gshell_grid" : 256, 20 | "validate" : true, 21 | "laplace_scale" : 6000, 22 | "boxscale": [1, 1, 1], 23 | "aabb": [-1, -1, -1, 1, 1, 1] 24 | } -------------------------------------------------------------------------------- /configs/deepfashion_mc_512.json: -------------------------------------------------------------------------------- 1 | { 2 | "ref_mesh": "data/spot/spot.obj", 3 | "random_textures": true, 4 | "iter": 5000, 5 | "save_interval": 100, 6 | "texture_res": [ 1024, 1024 ], 7 | "train_res": [1024, 1024], 8 | "batch": 2, 9 | "learning_rate": [0.03, 0.005], 10 | "ks_min" : [0, 0.001, 0.0], 11 | "ks_max" : [0, 1.0, 1.0], 12 | "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr", 13 | "validate" : false, 14 | "lock_pos" : false, 15 | "display": [{"latlong" : true}], 16 | "background" : "white", 17 | "denoiser": "bilateral", 18 | "n_samples" : 12, 19 | "env_scale" : 2.0, 20 | "gshell_grid" : 512, 21 | "validate" : true, 22 | "laplace_scale" : 6000, 23 | "boxscale": [1, 1, 1], 24 | "aabb": [-1, -1, -1, 1, 1, 1] 25 | } -------------------------------------------------------------------------------- /configs/deepfashion_mc_80.json: -------------------------------------------------------------------------------- 1 | { 2 | "ref_mesh": "data/spot/spot.obj", 3 | "random_textures": true, 4 | "iter": 5000, 5 | "save_interval": 100, 6 | "texture_res": [ 1024, 1024 ], 7 | "train_res": [1024, 1024], 8 | "batch": 2, 9 | "learning_rate": [0.03, 0.005], 10 | "ks_min" : [0, 0.001, 0.0], 11 | "ks_max" : [0, 1.0, 1.0], 12 | "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr", 13 | "lock_pos" : false, 14 | "display": [{"latlong" : true}], 15 | "background" : "white", 16 | "denoiser": "bilateral", 17 | "n_samples" : 24, 18 | "env_scale" : 2.0, 19 | "gshell_grid" : 80, 20 | "validate" : true, 21 | "laplace_scale" : 6000, 22 | "boxscale": [1, 1, 1], 23 | "aabb": [-1, -1, -1, 1, 1, 1] 24 | } -------------------------------------------------------------------------------- /configs/nerf_chair.json: -------------------------------------------------------------------------------- 1 | { 2 | "ref_mesh": "data/nerf_synthetic/chair", 3 | "random_textures": true, 4 | "iter": 5000, 5 | "save_interval": 100, 6 | "texture_res": [ 1024, 1024 ], 7 | "train_res": [800, 800], 8 | "batch": 2, 9 | "learning_rate": [0.03, 0.005], 10 | "gshell_grid" : 128, 11 | "mesh_scale" : 2.1, 12 | "validate" : true, 13 | "n_samples" : 8, 14 | "denoiser" : "bilateral", 15 | "display": [{"latlong" : true}, {"bsdf" : "kd"}, {"bsdf" : "ks"}, {"bsdf" : "normal"}], 16 | "background" : "white", 17 | "boxscale": [1, 1, 1], 18 | "aabb": [-1, -1, -1, 1, 1, 1] 19 | } -------------------------------------------------------------------------------- /configs/polycam_mc.json: -------------------------------------------------------------------------------- 1 | { 2 | "ref_mesh": "data/spot/spot.obj", 3 | "random_textures": true, 4 | "iter": 5000, 5 | "save_interval": 100, 6 | "texture_res": [ 1024, 1024 ], 7 | "train_res": [768, 1024], 8 | "batch": 2, 9 | "learning_rate": [0.03, 0.005], 10 | "ks_min" : [0, 0.001, 0.0], 11 | "ks_max" : [0, 1.0, 1.0], 12 | "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr", 13 | "lock_pos" : false, 14 | "display": [{"latlong" : true}], 15 | "background" : "white", 16 | "denoiser": "bilateral", 17 | "n_samples" : 8, 18 | "env_scale" : 2.0, 19 | "gshell_grid" : 256, 20 | "validate" : true, 21 | "laplace_scale" : 6000, 22 | "boxscale": [1, 1, 1], 23 | "aabb": [-1, -1, -1, 1, 1, 1] 24 | } -------------------------------------------------------------------------------- /configs/polycam_mc_128.json: -------------------------------------------------------------------------------- 1 | { 2 | "ref_mesh": "data/spot/spot.obj", 3 | "random_textures": true, 4 | "iter": 5000, 5 | "save_interval": 100, 6 | "texture_res": [ 1024, 1024 ], 7 | "train_res": [768, 1024], 8 | "batch": 2, 9 | "learning_rate": [0.03, 0.005], 10 | "ks_min" : [0, 0.001, 0.0], 11 | "ks_max" : [0, 1.0, 1.0], 12 | "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr", 13 | "lock_pos" : false, 14 | "display": [{"latlong" : true}], 15 | "background" : "white", 16 | "denoiser": "bilateral", 17 | "n_samples" : 8, 18 | "env_scale" : 2.0, 19 | "gshell_grid" : 128, 20 | "validate" : true, 21 | "laplace_scale" : 6000, 22 | "boxscale": [1, 1, 1], 23 | "aabb": [-1, -1, -1, 1, 1, 1] 24 | } -------------------------------------------------------------------------------- /configs/polycam_mc_16samples.json: -------------------------------------------------------------------------------- 1 | { 2 | "ref_mesh": "data/spot/spot.obj", 3 | "random_textures": true, 4 | "iter": 5000, 5 | "save_interval": 100, 6 | "texture_res": [ 1024, 1024 ], 7 | "train_res": [768, 1024], 8 | "batch": 2, 9 | "learning_rate": [0.03, 0.005], 10 | "ks_min" : [0, 0.001, 0.0], 11 | "ks_max" : [0, 1.0, 1.0], 12 | "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr", 13 | "lock_pos" : false, 14 | "display": [{"latlong" : true}], 15 | "background" : "white", 16 | "denoiser": "bilateral", 17 | "n_samples" : 16, 18 | "env_scale" : 2.0, 19 | "gshell_grid" : 256, 20 | "validate" : true, 21 | "laplace_scale" : 6000, 22 | "boxscale": [1, 1, 1], 23 | "aabb": [-1, -1, -1, 1, 1, 1] 24 | } -------------------------------------------------------------------------------- /data/tets/generate_tets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import os 11 | import numpy as np 12 | 13 | 14 | ''' 15 | This code segment shows how to use Quartet: https://github.com/crawforddoran/quartet, 16 | to generate a tet grid 17 | 1) Download, compile and run Quartet as described in the link above. Example usage `quartet meshes/cube.obj 0.5 cube_5.tet` 18 | 2) Run the function below to generate a file `cube_32_tet.tet` 19 | ''' 20 | 21 | def generate_tetrahedron_grid_file(res=32, root='..'): 22 | frac = 1.0 / res 23 | command = 'cd %s/quartet; ' % (root) + \ 24 | './quartet meshes/cube.obj %f meshes/cube_%f_tet.tet -s meshes/cube_boundary_%f.obj' % (frac, res, res) 25 | os.system(command) 26 | 27 | 28 | ''' 29 | This code segment shows how to convert from a quartet .tet file to compressed npz file 30 | ''' 31 | def convert_from_quartet_to_npz(quartetfile = 'cube_32_tet.tet', npzfile = '32_tets'): 32 | 33 | file1 = open(quartetfile, 'r') 34 | header = file1.readline() 35 | numvertices = int(header.split(" ")[1]) 36 | numtets = int(header.split(" ")[2]) 37 | print(numvertices, numtets) 38 | 39 | # load vertices 40 | vertices = np.loadtxt(quartetfile, skiprows=1, max_rows=numvertices) 41 | print(vertices.shape) 42 | 43 | # load indices 44 | indices = np.loadtxt(quartetfile, dtype=int, skiprows=1+numvertices, max_rows=numtets) 45 | print(indices.shape) 46 | 47 | np.savez_compressed(npzfile, vertices=vertices, indices=indices) -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from .dataset import Dataset 10 | from .dataset_mesh import DatasetMesh 11 | from .dataset_nerf import DatasetNERF 12 | from .dataset_llff import DatasetLLFF -------------------------------------------------------------------------------- /dataset/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | 12 | class Dataset(torch.utils.data.Dataset): 13 | """Basic dataset interface""" 14 | def __init__(self): 15 | super().__init__() 16 | 17 | def __len__(self): 18 | raise NotImplementedError 19 | 20 | def __getitem__(self): 21 | raise NotImplementedError 22 | 23 | def collate(self, batch): 24 | iter_res, iter_spp = batch[0]['resolution'], batch[0]['spp'] 25 | return { 26 | 'mv' : torch.cat(list([item['mv'] for item in batch]), dim=0), 27 | 'mvp' : torch.cat(list([item['mvp'] for item in batch]), dim=0), 28 | 'campos' : torch.cat(list([item['campos'] for item in batch]), dim=0), 29 | 'resolution' : iter_res, 30 | 'spp' : iter_spp, 31 | 'img' : torch.cat(list([item['img'] for item in batch]), dim=0) if 'img' in batch[0] else None, 32 | 'img_second' : torch.cat(list([item['img_second'] for item in batch]), dim=0) if 'img_second' in batch[0] else None, 33 | 'invdepth' : torch.cat(list([item['invdepth'] for item in batch]), dim=0)if 'invdepth' in batch[0] else None, 34 | 'invdepth_second' : torch.cat(list([item['invdepth_second'] for item in batch]), dim=0) if 'invdepth_second' in batch[0] else None, 35 | 'envlight_transform': torch.cat(list([item['envlight_transform'] for item in batch]), dim=0) if 'envlight_transform' in batch and batch[0]['envlight_transform'] is not None else None, 36 | } -------------------------------------------------------------------------------- /dataset/dataset_deepfashion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import os 11 | import glob 12 | import json 13 | 14 | import torch 15 | import numpy as np 16 | 17 | from render import util 18 | 19 | from .dataset import Dataset 20 | 21 | import cv2 as cv 22 | 23 | # This function is borrowed from IDR: https://github.com/lioryariv/idr 24 | def load_K_Rt_from_P(filename, P=None): 25 | if P is None: 26 | lines = open(filename).read().splitlines() 27 | if len(lines) == 4: 28 | lines = lines[1:] 29 | lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] 30 | P = np.asarray(lines).astype(np.float32).squeeze() 31 | 32 | out = cv.decomposeProjectionMatrix(P) 33 | K = out[0] 34 | R = out[1] 35 | t = out[2] 36 | 37 | K = K / K[2, 2] 38 | intrinsics = np.eye(4) 39 | intrinsics[:3, :3] = K 40 | 41 | 42 | pose = np.eye(4, dtype=np.float32) 43 | pose[:3, :3] = R.transpose() 44 | pose[:3, 3] = (t[:3] / t[3])[:, 0] 45 | 46 | return intrinsics, pose 47 | 48 | def _load_img(path): 49 | img = util.load_image_raw(path) 50 | if img.dtype != np.float32: # LDR image 51 | img = torch.tensor(img / 255, dtype=torch.float32) 52 | img[..., 0:3] = util.srgb_to_rgb(img[..., 0:3]) 53 | else: 54 | img = torch.tensor(img, dtype=torch.float32) 55 | return img 56 | 57 | 58 | 59 | class DatasetDeepFashion(Dataset): 60 | def __init__(self, base_dir, FLAGS, examples=None): 61 | self.FLAGS = FLAGS 62 | self.examples = examples 63 | self.base_dir = base_dir 64 | 65 | # Load config / transforms 66 | self.n_images = 72 ### hardcoded 67 | 68 | self.fovy = np.deg2rad(60) 69 | self.proj_mtx = util.perspective( 70 | self.fovy, self.FLAGS.display_res[1] / self.FLAGS.display_res[0], self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1] 71 | ) 72 | 73 | 74 | 75 | camera_dict = np.load(os.path.join(self.base_dir, 'cameras_sphere.npz')) 76 | 77 | # world_mat is a projection matrix from world to image 78 | self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in range(self.n_images)] 79 | self.scale_mats_np = [] 80 | 81 | 82 | # scale_mat: used for coordinate normalization, we assume the scene to render is inside a unit sphere at origin. 83 | self.scale_mats_np = [camera_dict['scale_mat_%d' % idx].astype(np.float32) for idx in range(self.n_images)] 84 | self.intrinsics_all = [] 85 | self.pose_all = [] 86 | 87 | for scale_mat, world_mat in zip(self.scale_mats_np, self.world_mats_np): 88 | P = world_mat @ scale_mat 89 | P = P[:3, :4] 90 | intrinsics, pose = load_K_Rt_from_P(None, P) 91 | self.intrinsics_all.append(torch.from_numpy(intrinsics).float()) 92 | self.pose_all.append(torch.from_numpy(pose).float()) 93 | 94 | # Determine resolution & aspect ratio 95 | self.resolution = _load_img(os.path.join(self.base_dir, '{:03d}.png'.format(0))).shape[0:2] 96 | self.aspect = self.resolution[1] / self.resolution[0] 97 | 98 | if self.FLAGS.local_rank == 0: 99 | print("DatasetNERF: %d images with shape [%d, %d]" % (self.n_images, self.resolution[0], self.resolution[1])) 100 | 101 | def _parse_frame(self, idx): 102 | # Load image data and modelview matrix 103 | img = _load_img(os.path.join(self.base_dir, '{:03d}.png'.format(idx))) 104 | img[:,:,:3] = img[:,:,:3] * img[:,:,3:] 105 | img[:,:,3] = torch.sign(img[:,:,3]) 106 | assert img.size(-1) == 4 107 | 108 | flip_mat = torch.tensor([ 109 | [ 1, 0, 0, 0], 110 | [ 0, -1, 0, 0], 111 | [ 0, 0, -1, 0], 112 | [ 0, 0, 0, 1] 113 | ], dtype=torch.float) 114 | 115 | mv = flip_mat @ torch.linalg.inv(self.pose_all[idx]) 116 | campos = torch.linalg.inv(mv)[:3, 3] 117 | mvp = self.proj_mtx @ mv 118 | 119 | return img[None, ...].cuda(), mv[None, ...].cuda(), mvp[None, ...].cuda(), campos[None, ...].cuda() # Add batch dimension 120 | 121 | def __len__(self): 122 | return self.n_images if self.examples is None else self.examples 123 | 124 | def __getitem__(self, itr): 125 | iter_res = self.FLAGS.train_res 126 | 127 | img = [] 128 | 129 | img, mv, mvp, campos = self._parse_frame(itr % self.n_images) 130 | 131 | return { 132 | 'mv' : mv, 133 | 'mvp' : mvp, 134 | 'campos' : campos, 135 | 'resolution' : iter_res, 136 | 'spp' : self.FLAGS.spp, 137 | 'img' : img 138 | } 139 | -------------------------------------------------------------------------------- /dataset/dataset_deepfashion_testset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import os 11 | import glob 12 | import json 13 | 14 | import torch 15 | import numpy as np 16 | 17 | from render import util 18 | 19 | from .dataset import Dataset 20 | 21 | import cv2 as cv 22 | 23 | # This function is borrowed from IDR: https://github.com/lioryariv/idr 24 | def load_K_Rt_from_P(filename, P=None): 25 | if P is None: 26 | lines = open(filename).read().splitlines() 27 | if len(lines) == 4: 28 | lines = lines[1:] 29 | lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] 30 | P = np.asarray(lines).astype(np.float32).squeeze() 31 | 32 | out = cv.decomposeProjectionMatrix(P) 33 | K = out[0] 34 | R = out[1] 35 | t = out[2] 36 | 37 | K = K / K[2, 2] 38 | intrinsics = np.eye(4) 39 | intrinsics[:3, :3] = K 40 | 41 | 42 | pose = np.eye(4, dtype=np.float32) 43 | pose[:3, :3] = R.transpose() 44 | pose[:3, 3] = (t[:3] / t[3])[:, 0] 45 | 46 | return intrinsics, pose 47 | 48 | def _load_img(path): 49 | img = util.load_image_raw(path) 50 | if img.dtype != np.float32: # LDR image 51 | img = torch.tensor(img / 255, dtype=torch.float32) 52 | img[..., 0:3] = util.srgb_to_rgb(img[..., 0:3]) 53 | else: 54 | img = torch.tensor(img, dtype=torch.float32) 55 | return img 56 | 57 | 58 | def _load_mask(path): 59 | img = util.load_image_raw(path) 60 | if img.dtype != np.float32: # LDR image 61 | img = torch.tensor(img / 255, dtype=torch.float32) 62 | else: 63 | img = torch.tensor(img, dtype=torch.float32) 64 | return img 65 | 66 | 67 | class DatasetDeepFashionTestset(Dataset): 68 | def __init__(self, base_dir, FLAGS, examples=None): 69 | self.FLAGS = FLAGS 70 | self.examples = examples 71 | self.base_dir = base_dir 72 | 73 | # Load config / transforms 74 | self.n_images = 200 ### hardcoded 75 | 76 | 77 | proj_mtx_all = np.load(os.path.join(self.base_dir, 'proj_mtx_all.npy')) 78 | self.intrinsics_all = [] 79 | self.pose_all = [] 80 | 81 | 82 | self.fovy = np.deg2rad(60) 83 | self.proj_mtx = util.perspective( 84 | self.fovy, self.FLAGS.display_res[1] / self.FLAGS.display_res[0], self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1] 85 | ) 86 | 87 | for i in range(proj_mtx_all.shape[0]): 88 | P = proj_mtx_all[i] 89 | P = P[:3, :4] 90 | intrinsics, pose = load_K_Rt_from_P(None, P) 91 | self.intrinsics_all.append(torch.from_numpy(intrinsics).float()) 92 | self.pose_all.append(torch.from_numpy(pose).float()) 93 | 94 | # Determine resolution & aspect ratio 95 | self.resolution = _load_img(os.path.join(self.base_dir, '{:03d}.png'.format(0))).shape[0:2] 96 | self.aspect = self.resolution[1] / self.resolution[0] 97 | 98 | if self.FLAGS.local_rank == 0: 99 | print("DatasetNERF: %d images with shape [%d, %d]" % (self.n_images, self.resolution[0], self.resolution[1])) 100 | 101 | def _parse_frame(self, idx): 102 | # Load image data and modelview matrix 103 | img = _load_img(os.path.join(self.base_dir, '{:03d}.png'.format(idx))) 104 | assert img.size(-1) == 4 105 | 106 | flip_mat = torch.tensor([ 107 | [ 1, 0, 0, 0], 108 | [ 0, -1, 0, 0], 109 | [ 0, 0, -1, 0], 110 | [ 0, 0, 0, 1] 111 | ], dtype=torch.float) 112 | 113 | mv = flip_mat @ torch.linalg.inv(self.pose_all[idx]) 114 | campos = torch.linalg.inv(mv)[:3, 3] 115 | mvp = self.proj_mtx @ mv 116 | 117 | return img[None, ...].cuda(), mv[None, ...].cuda(), mvp[None, ...].cuda(), campos[None, ...].cuda() # Add batch dimension 118 | 119 | def __len__(self): 120 | return self.n_images if self.examples is None else self.examples 121 | 122 | def __getitem__(self, itr): 123 | iter_res = self.FLAGS.train_res 124 | 125 | img = [] 126 | 127 | img, mv, mvp, campos = self._parse_frame(itr % self.n_images) 128 | 129 | 130 | return { 131 | 'mv' : mv, 132 | 'mvp' : mvp, 133 | 'campos' : campos, 134 | 'resolution' : iter_res, 135 | 'spp' : self.FLAGS.spp, 136 | 'img' : img 137 | } 138 | -------------------------------------------------------------------------------- /dataset/dataset_llff.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import os 11 | import glob 12 | 13 | import torch 14 | import numpy as np 15 | 16 | from render import util 17 | 18 | from .dataset import Dataset 19 | 20 | def _load_mask(fn): 21 | img = torch.tensor(util.load_image(fn), dtype=torch.float32) 22 | if len(img.shape) == 2: 23 | img = img[..., None].repeat(1, 1, 3) 24 | return img 25 | 26 | def _load_img(fn): 27 | img = util.load_image_raw(fn) 28 | if img.dtype != np.float32: # LDR image 29 | img = torch.tensor(img / 255, dtype=torch.float32) 30 | img[..., 0:3] = util.srgb_to_rgb(img[..., 0:3]) 31 | else: 32 | img = torch.tensor(img, dtype=torch.float32) 33 | return img 34 | 35 | ############################################################################### 36 | # LLFF datasets (real world camera lightfields) 37 | ############################################################################### 38 | 39 | class DatasetLLFF(Dataset): 40 | def __init__(self, base_dir, FLAGS, examples=None): 41 | self.FLAGS = FLAGS 42 | self.base_dir = base_dir 43 | self.examples = examples 44 | 45 | # Enumerate all image files and get resolution 46 | all_img = [f for f in sorted(glob.glob(os.path.join(self.base_dir, "images", "*"))) if f.lower().endswith('png') or f.lower().endswith('jpg') or f.lower().endswith('jpeg')] 47 | self.resolution = _load_img(all_img[0]).shape[0:2] 48 | 49 | # Load camera poses 50 | poses_bounds = np.load(os.path.join(self.base_dir, 'poses_bounds.npy')) 51 | 52 | poses = poses_bounds[:, :-2].reshape([-1, 3, 5]).transpose([1,2,0]) 53 | poses = np.concatenate([poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1) # Taken from nerf, swizzles from LLFF to expected coordinate system 54 | poses = np.moveaxis(poses, -1, 0).astype(np.float32) 55 | 56 | lcol = np.array([0,0,0,1], dtype=np.float32)[None, None, :].repeat(poses.shape[0], 0) 57 | self.imvs = torch.tensor(np.concatenate((poses[:, :, 0:4], lcol), axis=1), dtype=torch.float32) 58 | self.aspect = self.resolution[1] / self.resolution[0] # width / height 59 | self.fovy = util.focal_length_to_fovy(poses[:, 2, 4], poses[:, 0, 4]) 60 | 61 | # Recenter scene so lookat position is origin 62 | center = util.lines_focal(self.imvs[..., :3, 3], -self.imvs[..., :3, 2]) 63 | self.imvs[..., :3, 3] = self.imvs[..., :3, 3] - center[None, ...] 64 | 65 | if self.FLAGS.local_rank == 0: 66 | print("DatasetLLFF: %d images with shape [%d, %d]" % (len(all_img), self.resolution[0], self.resolution[1])) 67 | print("DatasetLLFF: auto-centering at %s" % (center.cpu().numpy())) 68 | 69 | # Pre-load from disc to avoid slow png parsing 70 | if self.FLAGS.pre_load: 71 | self.preloaded_data = [] 72 | for i in range(self.imvs.shape[0]): 73 | self.preloaded_data += [self._parse_frame(i)] 74 | 75 | def _parse_frame(self, idx): 76 | all_img = [f for f in sorted(glob.glob(os.path.join(self.base_dir, "images", "*"))) if f.lower().endswith('png') or f.lower().endswith('jpg') or f.lower().endswith('jpeg')] 77 | all_mask = [f for f in sorted(glob.glob(os.path.join(self.base_dir, "masks", "*"))) if f.lower().endswith('png') or f.lower().endswith('jpg') or f.lower().endswith('jpeg')] 78 | assert len(all_img) == self.imvs.shape[0] and len(all_mask) == self.imvs.shape[0] 79 | 80 | # Load image+mask data 81 | img = _load_img(all_img[idx]) 82 | mask = _load_mask(all_mask[idx]) 83 | img = torch.cat((img, mask[..., 0:1]), dim=-1) 84 | 85 | # Setup transforms 86 | proj = util.perspective(self.fovy[idx, ...], self.aspect, self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1]) 87 | mv = torch.linalg.inv(self.imvs[idx, ...]) 88 | campos = torch.linalg.inv(mv)[:3, 3] 89 | mvp = proj @ mv 90 | 91 | return img[None, ...], mv[None, ...], mvp[None, ...], campos[None, ...] # Add batch dimension 92 | 93 | def __len__(self): 94 | return self.imvs.shape[0] if self.examples is None else self.examples 95 | 96 | def __getitem__(self, itr): 97 | if self.FLAGS.pre_load: 98 | img, mv, mvp, campos = self.preloaded_data[itr % self.imvs.shape[0]] 99 | else: 100 | img, mv, mvp, campos = self._parse_frame(itr % self.imvs.shape[0]) 101 | 102 | return { 103 | 'mv' : mv, 104 | 'mvp' : mvp, 105 | 'campos' : campos, 106 | 'resolution' : self.resolution, 107 | 'spp' : self.FLAGS.spp, 108 | 'img' : img 109 | } 110 | -------------------------------------------------------------------------------- /dataset/dataset_mesh.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import numpy as np 11 | import torch 12 | 13 | from render import util 14 | from render import mesh 15 | from render import render 16 | from render import light 17 | 18 | from .dataset import Dataset 19 | 20 | ############################################################################### 21 | # Reference dataset using mesh & rendering 22 | ############################################################################### 23 | 24 | class DatasetMesh(Dataset): 25 | 26 | def __init__(self, ref_mesh, glctx, cam_radius, FLAGS, validate=False, mesh_center=None): 27 | # Init 28 | self.glctx = glctx 29 | self.cam_radius = cam_radius 30 | self.FLAGS = FLAGS 31 | self.validate = validate 32 | self.fovy = np.deg2rad(45) 33 | self.aspect = FLAGS.train_res[1] / FLAGS.train_res[0] 34 | self.random_lgt = FLAGS.random_lgt 35 | self.camera_lgt = False 36 | 37 | self.mesh_center = mesh_center 38 | 39 | if self.FLAGS.local_rank == 0: 40 | print("DatasetMesh: ref mesh has %d triangles and %d vertices" % (ref_mesh.t_pos_idx.shape[0], ref_mesh.v_pos.shape[0])) 41 | 42 | # Sanity test training texture resolution 43 | ref_texture_res = np.maximum(ref_mesh.material['kd'].getRes(), ref_mesh.material['ks'].getRes()) 44 | if 'normal' in ref_mesh.material: 45 | ref_texture_res = np.maximum(ref_texture_res, ref_mesh.material['normal'].getRes()) 46 | if self.FLAGS.local_rank == 0 and FLAGS.texture_res[0] < ref_texture_res[0] or FLAGS.texture_res[1] < ref_texture_res[1]: 47 | print("---> WARNING: Picked a texture resolution lower than the reference mesh [%d, %d] < [%d, %d]" % (FLAGS.texture_res[0], FLAGS.texture_res[1], ref_texture_res[0], ref_texture_res[1])) 48 | 49 | # Load environment map texture 50 | self.envlight = light.load_env(FLAGS.envmap, scale=FLAGS.env_scale) 51 | 52 | self.ref_mesh = mesh.compute_tangents(ref_mesh) 53 | 54 | def _rotate_scene(self, itr): 55 | proj_mtx = util.perspective(self.fovy, self.FLAGS.display_res[1] / self.FLAGS.display_res[0], self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1]) 56 | 57 | # Smooth rotation for display. 58 | ang = (itr / 50) * np.pi * 2 59 | mv = util.translate(0, 0, -self.cam_radius) @ (util.rotate_x(-0.4) @ util.rotate_y(ang)) 60 | mvp = proj_mtx @ mv 61 | campos = torch.linalg.inv(mv)[:3, 3] 62 | 63 | return mv[None, ...].cuda(), mvp[None, ...].cuda(), campos[None, ...].cuda(), self.FLAGS.display_res, self.FLAGS.spp 64 | 65 | def _random_scene(self): 66 | # ============================================================================================== 67 | # Setup projection matrix 68 | # ============================================================================================== 69 | iter_res = self.FLAGS.train_res 70 | proj_mtx = util.perspective(self.fovy, iter_res[1] / iter_res[0], self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1]) 71 | 72 | # ============================================================================================== 73 | # Random camera & light position 74 | # ============================================================================================== 75 | 76 | # Random rotation/translation matrix for optimization. 77 | if self.mesh_center is not None: 78 | mv = ( 79 | util.translate(-self.mesh_center[0], -self.mesh_center[1], -self.mesh_center[2]-self.cam_radius) 80 | @ util.random_rotation_translation(0.25) 81 | ) 82 | else: 83 | mv = util.translate(0, 0, -self.cam_radius) @ util.random_rotation_translation(0.25) 84 | mvp = proj_mtx @ mv 85 | campos = torch.linalg.inv(mv)[:3, 3] 86 | 87 | return mv[None, ...].cuda(), mvp[None, ...].cuda(), campos[None, ...].cuda(), iter_res, self.FLAGS.spp # Add batch dimension 88 | 89 | def __len__(self): 90 | return 50 if self.validate else (self.FLAGS.iter + 1) * self.FLAGS.batch 91 | 92 | def __getitem__(self, itr): 93 | # ============================================================================================== 94 | # Randomize scene parameters 95 | # ============================================================================================== 96 | 97 | if self.validate: 98 | mv, mvp, campos, iter_res, iter_spp = self._rotate_scene(itr) 99 | camera_mv = None 100 | else: 101 | mv, mvp, campos, iter_res, iter_spp = self._random_scene() 102 | if self.random_lgt: 103 | rnd_rot = util.random_rotation() 104 | camera_mv = rnd_rot.unsqueeze(0).clone() 105 | elif self.camera_lgt: 106 | camera_mv = mv.clone() 107 | else: 108 | camera_mv = None 109 | 110 | with torch.no_grad(): 111 | rendered = render.render_mesh(self.glctx, self.ref_mesh, mvp, campos, self.envlight, iter_res, spp=iter_spp, 112 | num_layers=self.FLAGS.layers, msaa=True, background=None, shade_data=True) 113 | return { 114 | 'mv' : mv, 115 | 'mvp' : mvp, 116 | 'campos' : campos, 117 | 'resolution' : iter_res, 118 | 'spp' : iter_spp, 119 | 'img' : rendered['shaded'], 120 | 'img_second' : rendered['shaded_second'], 121 | 'invdepth' : rendered['invdepth'], 122 | 'invdepth_second' : rendered['invdepth_second'], 123 | 'envlight_transform': camera_mv 124 | } 125 | -------------------------------------------------------------------------------- /dataset/dataset_nerf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import os 11 | import glob 12 | import json 13 | 14 | import torch 15 | import numpy as np 16 | 17 | from render import util 18 | 19 | from .dataset import Dataset 20 | 21 | ############################################################################### 22 | # NERF image based dataset (synthetic) 23 | ############################################################################### 24 | 25 | def _load_img(path): 26 | files = glob.glob(path + '.*') 27 | assert len(files) > 0, "Tried to find image file for: %s, but found 0 files" % (path) 28 | img = util.load_image_raw(files[0]) 29 | if img.dtype != np.float32: # LDR image 30 | img = torch.tensor(img / 255, dtype=torch.float32) 31 | img[..., 0:3] = util.srgb_to_rgb(img[..., 0:3]) 32 | else: 33 | img = torch.tensor(img, dtype=torch.float32) 34 | return img 35 | 36 | class DatasetNERF(Dataset): 37 | def __init__(self, cfg_path, FLAGS, examples=None): 38 | self.FLAGS = FLAGS 39 | self.examples = examples 40 | self.base_dir = os.path.dirname(cfg_path) 41 | 42 | # Load config / transforms 43 | self.cfg = json.load(open(cfg_path, 'r')) 44 | self.n_images = len(self.cfg['frames']) 45 | 46 | # Determine resolution & aspect ratio 47 | self.resolution = _load_img(os.path.join(self.base_dir, self.cfg['frames'][0]['file_path'])).shape[0:2] 48 | self.aspect = self.resolution[1] / self.resolution[0] 49 | 50 | if self.FLAGS.local_rank == 0: 51 | print("DatasetNERF: %d images with shape [%d, %d]" % (self.n_images, self.resolution[0], self.resolution[1])) 52 | 53 | # Pre-load from disc to avoid slow png parsing 54 | if self.FLAGS.pre_load: 55 | self.preloaded_data = [] 56 | for i in range(self.n_images): 57 | self.preloaded_data += [self._parse_frame(self.cfg, i)] 58 | 59 | def _parse_frame(self, cfg, idx): 60 | # Config projection matrix (static, so could be precomputed) 61 | fovy = util.fovx_to_fovy(cfg['camera_angle_x'], self.aspect) 62 | proj = util.perspective(fovy, self.aspect, self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1]) 63 | 64 | # Load image data and modelview matrix 65 | img = _load_img(os.path.join(self.base_dir, cfg['frames'][idx]['file_path'])) 66 | mv = torch.linalg.inv(torch.tensor(cfg['frames'][idx]['transform_matrix'], dtype=torch.float32)) 67 | mv = mv @ util.rotate_x(-np.pi / 2) 68 | campos = torch.linalg.inv(mv)[:3, 3] 69 | mvp = proj @ mv 70 | 71 | return img[None, ...], mv[None, ...], mvp[None, ...], campos[None, ...] # Add batch dimension 72 | 73 | def __len__(self): 74 | return self.n_images if self.examples is None else self.examples 75 | 76 | def __getitem__(self, itr): 77 | iter_res = self.FLAGS.train_res 78 | 79 | img = [] 80 | fovy = util.fovx_to_fovy(self.cfg['camera_angle_x'], self.aspect) 81 | 82 | if self.FLAGS.pre_load: 83 | img, mv, mvp, campos = self.preloaded_data[itr % self.n_images] 84 | else: 85 | img, mv, mvp, campos = self._parse_frame(self.cfg, itr % self.n_images) 86 | 87 | return { 88 | 'mv' : mv, 89 | 'mvp' : mvp, 90 | 'campos' : campos, 91 | 'resolution' : iter_res, 92 | 'spp' : self.FLAGS.spp, 93 | 'img' : img 94 | } 95 | -------------------------------------------------------------------------------- /dataset/dataset_nerf_colmap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import os 11 | import glob 12 | import json 13 | 14 | import torch 15 | import numpy as np 16 | 17 | from render import util 18 | 19 | from .dataset import Dataset 20 | 21 | ############################################################################### 22 | # NERF image based dataset (synthetic) 23 | ############################################################################### 24 | 25 | def _load_img(path): 26 | img = util.load_image_raw(path) 27 | if img.dtype != np.float32: # LDR image 28 | img = torch.tensor(img / 255, dtype=torch.float32) 29 | img[..., 0:3] = util.srgb_to_rgb(img[..., 0:3]) 30 | else: 31 | img = torch.tensor(img, dtype=torch.float32) 32 | return img 33 | 34 | class DatasetNERF(Dataset): 35 | def __init__(self, cfg_path, FLAGS, examples=None): 36 | self.FLAGS = FLAGS 37 | self.examples = examples 38 | self.base_dir = os.path.dirname(cfg_path) 39 | 40 | # Load config / transforms 41 | self.cfg = json.load(open(cfg_path, 'r')) 42 | self.n_images = len(self.cfg['frames']) 43 | 44 | # Determine resolution & aspect ratio 45 | self.resolution = _load_img(os.path.join(self.base_dir, self.cfg['frames'][0]['file_path'])).shape[0:2] 46 | self.aspect = self.resolution[1] / self.resolution[0] 47 | 48 | if self.FLAGS.local_rank == 0: 49 | print("DatasetNERF: %d images with shape [%d, %d]" % (self.n_images, self.resolution[0], self.resolution[1])) 50 | 51 | # Pre-load from disc to avoid slow png parsing 52 | if self.FLAGS.pre_load: 53 | self.preloaded_data = [] 54 | for i in range(self.n_images): 55 | self.preloaded_data += [self._parse_frame(self.cfg, i)] 56 | 57 | def _parse_frame(self, cfg, idx): 58 | # Config projection matrix (static, so could be precomputed) 59 | fovy = util.fovx_to_fovy(cfg['frames'][idx]['camera_angle_x'], self.aspect) 60 | proj = util.perspective(fovy, self.aspect, self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1]) 61 | 62 | # Load image data and modelview matrix 63 | img = _load_img(os.path.join(self.base_dir, cfg['frames'][idx]['file_path'])) 64 | mask = _load_img(os.path.join(self.base_dir, cfg['frames'][idx]['file_path']).replace('/image/', '/mask/').replace('.jpg', '.png')) 65 | img = torch.cat([img, mask[:,:,:1]], dim=-1) 66 | mv = torch.linalg.inv(torch.tensor(cfg['frames'][idx]['transform_matrix'], dtype=torch.float32)) 67 | mv = mv @ util.rotate_x(-np.pi / 2) 68 | campos = torch.linalg.inv(mv)[:3, 3] 69 | mvp = proj @ mv 70 | 71 | return img[None, ...], mv[None, ...], mvp[None, ...], campos[None, ...] # Add batch dimension 72 | 73 | def __len__(self): 74 | return self.n_images if self.examples is None else self.examples 75 | 76 | def __getitem__(self, itr): 77 | iter_res = self.FLAGS.train_res 78 | 79 | img = [] 80 | fovy = util.fovx_to_fovy(self.cfg['frames'][itr % self.n_images]['camera_angle_x'], self.aspect) 81 | 82 | if self.FLAGS.pre_load: 83 | img, mv, mvp, campos = self.preloaded_data[itr % self.n_images] 84 | else: 85 | img, mv, mvp, campos = self._parse_frame(self.cfg, itr % self.n_images) 86 | 87 | return { 88 | 'mv' : mv, 89 | 'mvp' : mvp, 90 | 'campos' : campos, 91 | 'resolution' : iter_res, 92 | 'spp' : self.FLAGS.spp, 93 | 'img' : img 94 | } 95 | -------------------------------------------------------------------------------- /denoiser/denoiser.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import numpy as np 5 | import math 6 | 7 | from render import util 8 | if "TWOSIDED_TEXTURE" not in os.environ or os.environ["TWOSIDED_TEXTURE"] == "True": 9 | from render import optixutils as ou 10 | else: 11 | from render import optixutils_single_sided as ou 12 | 13 | 14 | ############################################################################### 15 | # Bilateral denoiser 16 | # 17 | # Loosely based on SVGF, but removing temporal components and variance stopping guides. 18 | # https://research.nvidia.com/publication/2017-07_spatiotemporal-variance-guided-filtering-real-time-reconstruction-path-traced 19 | ############################################################################### 20 | 21 | class BilateralDenoiser(torch.nn.Module): 22 | def __init__(self, influence=1.0): 23 | super(BilateralDenoiser, self).__init__() 24 | self.set_influence(influence) 25 | 26 | def set_influence(self, factor): 27 | self.sigma = max(factor * 2, 0.0001) 28 | self.variance = self.sigma**2. 29 | self.N = 2 * math.ceil(self.sigma * 2.5) + 1 30 | 31 | def forward(self, input): 32 | col = input[..., 0:3] 33 | nrm = util.safe_normalize(input[..., 3:6]) # Bent normals can produce normals of length < 1 here 34 | zdz = input[..., 6:8] 35 | return ou.bilateral_denoiser(col, nrm, zdz, self.sigma) 36 | -------------------------------------------------------------------------------- /geometry/embedding.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class Embedding(nn.Module): 5 | def __init__(self, in_channels, N_freqs, logscale=True): 6 | """ 7 | Defines a function that embeds x to (x, sin(2^k x), cos(2^k x), ...) 8 | in_channels: number of input channels (3 for both xyz and direction) 9 | """ 10 | super(Embedding, self).__init__() 11 | self.N_freqs = N_freqs 12 | self.in_channels = in_channels 13 | self.funcs = [torch.sin, torch.cos] 14 | self.out_channels = in_channels*(len(self.funcs)*N_freqs+1) 15 | 16 | if logscale: 17 | self.freq_bands = 2**torch.linspace(0, N_freqs-1, N_freqs) 18 | else: 19 | self.freq_bands = torch.linspace(1, 2**(N_freqs-1), N_freqs) 20 | 21 | def forward(self, x): 22 | """ 23 | Embeds x to (x, sin(2^k x), cos(2^k x), ...) 24 | Different from the paper, "x" is also in the output 25 | See https://github.com/bmild/nerf/issues/12 26 | 27 | Inputs: 28 | x: (B, self.in_channels) 29 | 30 | Outputs: 31 | out: (B, self.out_channels) 32 | """ 33 | out = [x] 34 | for freq in self.freq_bands: 35 | for func in self.funcs: 36 | out += [func(freq*x)] 37 | 38 | return torch.cat(out, -1) 39 | 40 | -------------------------------------------------------------------------------- /geometry/mlp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | from .embedding import Embedding 6 | 7 | class MLP(nn.Module): 8 | def __init__(self, n_freq=6, d_hidden=128, d_out=1, n_hidden=3, skip_in=[], use_float16=False): 9 | super().__init__() 10 | self.emb = Embedding(3, n_freq) 11 | layers = [ 12 | nn.Linear(self.emb.out_channels, d_hidden), 13 | nn.Softplus(beta=100) 14 | ] 15 | count = 2 16 | self.skip_count = [] 17 | self.skip_in = skip_in 18 | for i in range(n_hidden): 19 | if i in skip_in: 20 | layers.append(nn.Linear(d_hidden + self.emb.out_channels, d_hidden)) 21 | self.skip_count.append(count) 22 | else: 23 | layers.append(nn.Linear(d_hidden, d_hidden)) 24 | count += 1 25 | layers.append(nn.Softplus(beta=100)) 26 | count += 1 27 | layers.append(nn.Linear(d_hidden, d_out)) 28 | count += 1 29 | self.net = nn.ModuleList(layers) 30 | self.use_float16 = use_float16 31 | 32 | def forward(self, x): 33 | emb = self.emb(x) 34 | x = emb 35 | with torch.autocast('cuda', dtype=torch.float16, enabled=self.use_float16): 36 | for i, module in enumerate(self.net): 37 | if i in self.skip_count: 38 | x = module(torch.cat([x, emb], dim=-1)) 39 | else: 40 | x = module(x) 41 | return x -------------------------------------------------------------------------------- /render/light.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import numpy as np 11 | import torch 12 | import nvdiffrast.torch as dr 13 | 14 | from . import util 15 | from . import renderutils as ru 16 | 17 | ###################################################################################### 18 | # Monte-carlo sampled environment light with PDF / CDF computation 19 | ###################################################################################### 20 | 21 | class EnvironmentLight: 22 | LIGHT_MIN_RES = 16 23 | 24 | MIN_ROUGHNESS = 0.08 25 | MAX_ROUGHNESS = 0.5 26 | 27 | def __init__(self, base): 28 | self.mtx = None 29 | self.base = base 30 | 31 | self.pdf_scale = (self.base.shape[0] * self.base.shape[1]) / (2 * np.pi * np.pi) 32 | self.update_pdf() 33 | 34 | def xfm(self, mtx): 35 | self.mtx = mtx 36 | 37 | def parameters(self): 38 | return [self.base] 39 | 40 | def clone(self): 41 | return EnvironmentLight(self.base.clone().detach()) 42 | 43 | def clamp_(self, min=None, max=None): 44 | self.base.clamp_(min, max) 45 | 46 | def update_pdf(self): 47 | with torch.no_grad(): 48 | # Compute PDF 49 | Y = util.pixel_grid(self.base.shape[1], self.base.shape[0])[..., 1] 50 | self._pdf = torch.max(self.base, dim=-1)[0] * torch.sin(Y * np.pi) # Scale by sin(theta) for lat-long, https://cs184.eecs.berkeley.edu/sp18/article/25 51 | self._pdf = self._pdf / torch.sum(self._pdf) 52 | 53 | # Compute cumulative sums over the columns and rows 54 | self.cols = torch.cumsum(self._pdf, dim=1) 55 | self.rows = torch.cumsum(self.cols[:, -1:].repeat([1, self.cols.shape[1]]), dim=0) 56 | 57 | # Normalize 58 | self.cols = self.cols / torch.where(self.cols[:, -1:] > 0, self.cols[:, -1:], torch.ones_like(self.cols)) 59 | self.rows = self.rows / torch.where(self.rows[-1:, :] > 0, self.rows[-1:, :], torch.ones_like(self.rows)) 60 | 61 | @torch.no_grad() 62 | def generate_image(self, res): 63 | texcoord = util.pixel_grid(res[1], res[0]) 64 | return dr.texture(self.base[None, ...].contiguous(), texcoord[None, ...].contiguous(), filter_mode='linear')[0] 65 | 66 | ###################################################################################### 67 | # Load and store 68 | ###################################################################################### 69 | 70 | @torch.no_grad() 71 | def _load_env_hdr(fn, scale=1.0, res=None, trainable=False): 72 | latlong_img = torch.tensor(util.load_image(fn), dtype=torch.float32, device='cuda')*scale 73 | 74 | if res is not None: 75 | texcoord = util.pixel_grid(res[1], res[0]) 76 | latlong_img = torch.clamp(dr.texture(latlong_img[None, ...], texcoord[None, ...], filter_mode='linear')[0], min=0.0001) 77 | 78 | print("EnvProbe,", latlong_img.shape, ", min/max", torch.min(latlong_img).item(), torch.max(latlong_img).item()) 79 | if trainable: 80 | print("trainable light loaded") 81 | return EnvironmentLight(base=latlong_img.clone().detach().requires_grad_(True)) 82 | else: 83 | return EnvironmentLight(base=latlong_img) 84 | 85 | @torch.no_grad() 86 | def load_env(fn, scale=1.0, res=None, trainable=False): 87 | if os.path.splitext(fn)[1].lower() == ".hdr": 88 | return _load_env_hdr(fn, scale, res, trainable=trainable) 89 | else: 90 | assert False, "Unknown envlight extension %s" % os.path.splitext(fn)[1] 91 | 92 | @torch.no_grad() 93 | def save_env_map(fn, light): 94 | assert isinstance(light, EnvironmentLight) 95 | color = light.generate_image([512, 1024]) 96 | util.save_image_raw(fn, color.detach().cpu().numpy()) 97 | 98 | ###################################################################################### 99 | # Create trainable with random initialization 100 | ###################################################################################### 101 | 102 | def create_trainable_env_rnd(base_res, scale=0.5, bias=0.25): 103 | base = torch.rand(base_res, base_res, 3, dtype=torch.float32, device='cuda') * scale + bias 104 | l = EnvironmentLight(base.clone().detach().requires_grad_(True)) 105 | return l 106 | -------------------------------------------------------------------------------- /render/material.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import numpy as np 11 | import torch 12 | 13 | from . import util 14 | from . import texture 15 | from . import mlptexture 16 | 17 | ###################################################################################### 18 | # .mtl material format loading / storing 19 | ###################################################################################### 20 | 21 | def load_mtl(fn, clear_ks=True): 22 | import re 23 | mtl_path = os.path.dirname(fn) 24 | 25 | # Read file 26 | with open(fn, 'r') as f: 27 | lines = f.readlines() 28 | 29 | # Parse materials 30 | materials = [] 31 | for line in lines: 32 | split_line = re.split(' +|\t+|\n+', line.strip()) 33 | prefix = split_line[0].lower() 34 | data = split_line[1:] 35 | if 'newmtl' in prefix: 36 | material = {'name' : data[0]} 37 | materials += [material] 38 | elif materials: 39 | if 'bsdf' in prefix or 'map_kd' in prefix or 'map_ks' in prefix or 'bump' in prefix: 40 | material[prefix] = data[0] 41 | else: 42 | material[prefix] = torch.tensor(tuple(float(d) for d in data), dtype=torch.float32, device='cuda') 43 | 44 | # Convert everything to textures. Our code expects 'kd' and 'ks' to be texture maps. So replace constants with 1x1 maps 45 | for mat in materials: 46 | if not 'bsdf' in mat: 47 | mat['bsdf'] = 'pbr' 48 | 49 | if 'map_kd' in mat: 50 | mat['kd'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_kd'])) 51 | else: 52 | mat['kd'] = texture.Texture2D(mat['kd']) 53 | 54 | if 'map_ks' in mat: 55 | mat['ks'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_ks']), channels=3) 56 | else: 57 | mat['ks'] = texture.Texture2D(mat['ks']) 58 | 59 | if 'bump' in mat: 60 | mat['normal'] = texture.load_texture2D(os.path.join(mtl_path, mat['bump']), lambda_fn=lambda x: x * 2 - 1, channels=3) 61 | 62 | # Convert Kd from sRGB to linear RGB 63 | mat['kd'] = texture.srgb_to_rgb(mat['kd']) 64 | 65 | if clear_ks: 66 | # Override ORM occlusion (red) channel by zeros. We hijack this channel 67 | for mip in mat['ks'].getMips(): 68 | mip[..., 0] = 0.0 69 | 70 | return materials 71 | 72 | def save_mtl(fn, material): 73 | folder = os.path.dirname(fn) 74 | with open(fn, "w") as f: 75 | f.write('newmtl defaultMat\n') 76 | if material is not None: 77 | f.write('bsdf %s\n' % material['bsdf']) 78 | if 'kd' in material.keys(): 79 | f.write('map_Kd texture_kd.png\n') 80 | texture.save_texture2D(os.path.join(folder, 'texture_kd.png'), texture.rgb_to_srgb(material['kd'])) 81 | if 'ks' in material.keys(): 82 | f.write('map_Ks texture_ks.png\n') 83 | texture.save_texture2D(os.path.join(folder, 'texture_ks.png'), material['ks']) 84 | if 'normal' in material.keys(): 85 | f.write('bump texture_n.png\n') 86 | texture.save_texture2D(os.path.join(folder, 'texture_n.png'), material['normal'], lambda_fn=lambda x:(util.safe_normalize(x)+1)*0.5) 87 | else: 88 | f.write('Kd 1 1 1\n') 89 | f.write('Ks 0 0 0\n') 90 | f.write('Ka 0 0 0\n') 91 | f.write('Tf 1 1 1\n') 92 | f.write('Ni 1\n') 93 | f.write('Ns 0\n') 94 | 95 | ###################################################################################### 96 | # Utility function to convert an existing material and make all textures trainable 97 | ###################################################################################### 98 | 99 | def create_trainable(material): 100 | result = material.copy() 101 | for key, val in result.items(): 102 | if isinstance(val, texture.Texture2D): 103 | result[key] = texture.create_trainable(val) 104 | return result 105 | 106 | def get_parameters(material): 107 | trainable = [] 108 | for key, val in material.items(): 109 | if isinstance(val, texture.Texture2D) or isinstance(val, mlptexture.MLPTexture3D): 110 | trainable += val.parameters() 111 | return trainable 112 | 113 | ###################################################################################### 114 | # Merge multiple materials into a single uber-material 115 | ###################################################################################### 116 | 117 | def _upscale_replicate(x, full_res): 118 | x = x.permute(0, 3, 1, 2) 119 | x = torch.nn.functional.pad(x, (0, full_res[1] - x.shape[3], 0, full_res[0] - x.shape[2]), 'replicate') 120 | return x.permute(0, 2, 3, 1).contiguous() 121 | 122 | def merge_materials(materials, texcoords, tfaces, mfaces): 123 | assert len(materials) > 0 124 | for mat in materials: 125 | assert mat['bsdf'] == materials[0]['bsdf'], "All materials must have the same BSDF (uber shader)" 126 | assert ('normal' in mat) is ('normal' in materials[0]), "All materials must have either normal map enabled or disabled" 127 | 128 | uber_material = { 129 | 'name' : 'uber_material', 130 | 'bsdf' : materials[0]['bsdf'], 131 | } 132 | 133 | textures = ['kd', 'ks', 'normal'] 134 | 135 | # Find maximum texture resolution across all materials and textures 136 | max_res = None 137 | for mat in materials: 138 | for tex in textures: 139 | tex_res = np.array(mat[tex].getRes()) if tex in mat else np.array([1, 1]) 140 | max_res = np.maximum(max_res, tex_res) if max_res is not None else tex_res 141 | 142 | # Compute size of compund texture and round up to nearest PoT 143 | full_res = 2**np.ceil(np.log2(max_res * np.array([1, len(materials)]))).astype(np.int) 144 | 145 | # Normalize texture resolution across all materials & combine into a single large texture 146 | for tex in textures: 147 | if tex in materials[0]: 148 | tex_data = torch.cat(tuple(util.scale_img_nhwc(mat[tex].data, tuple(max_res)) for mat in materials), dim=2) # Lay out all textures horizontally, NHWC so dim2 is x 149 | tex_data = _upscale_replicate(tex_data, full_res) 150 | uber_material[tex] = texture.Texture2D(tex_data) 151 | 152 | # Compute scaling values for used / unused texture area 153 | s_coeff = [full_res[0] / max_res[0], full_res[1] / max_res[1]] 154 | 155 | # Recompute texture coordinates to cooincide with new composite texture 156 | new_tverts = {} 157 | new_tverts_data = [] 158 | for fi in range(len(tfaces)): 159 | matIdx = mfaces[fi] 160 | for vi in range(3): 161 | ti = tfaces[fi][vi] 162 | if not (ti in new_tverts): 163 | new_tverts[ti] = {} 164 | if not (matIdx in new_tverts[ti]): # create new vertex 165 | new_tverts_data.append([(matIdx + texcoords[ti][0]) / s_coeff[1], texcoords[ti][1] / s_coeff[0]]) # Offset texture coodrinate (x direction) by material id & scale to local space. Note, texcoords are (u,v) but texture is stored (w,h) so the indexes swap here 166 | new_tverts[ti][matIdx] = len(new_tverts_data) - 1 167 | tfaces[fi][vi] = new_tverts[ti][matIdx] # reindex vertex 168 | 169 | return uber_material, new_tverts_data, tfaces 170 | -------------------------------------------------------------------------------- /render/mlptexture.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | import tinycudann as tcnn 12 | import numpy as np 13 | 14 | ####################################################################################################################################################### 15 | # Small MLP using PyTorch primitives, internal helper class 16 | ####################################################################################################################################################### 17 | 18 | class _MLP(torch.nn.Module): 19 | def __init__(self, cfg, loss_scale=1.0): 20 | super(_MLP, self).__init__() 21 | self.loss_scale = loss_scale 22 | net = (torch.nn.Linear(cfg['n_input_dims'], cfg['n_neurons'], bias=False), torch.nn.ReLU()) 23 | for i in range(cfg['n_hidden_layers']-1): 24 | net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_neurons'], bias=False), torch.nn.ReLU()) 25 | net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_output_dims'], bias=False),) 26 | self.net = torch.nn.Sequential(*net).cuda() 27 | 28 | self.net.apply(self._init_weights) 29 | 30 | if self.loss_scale != 1.0: 31 | self.net.register_full_backward_hook(lambda module, grad_i, grad_o: (grad_i[0] * self.loss_scale, )) 32 | 33 | def forward(self, x): 34 | return self.net(x.to(torch.float32)) 35 | 36 | @staticmethod 37 | def _init_weights(m): 38 | if type(m) == torch.nn.Linear: 39 | torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu') 40 | if hasattr(m.bias, 'data'): 41 | m.bias.data.fill_(0.0) 42 | 43 | ####################################################################################################################################################### 44 | # Outward visible MLP class 45 | ####################################################################################################################################################### 46 | 47 | class MLPTexture3D(torch.nn.Module): 48 | def __init__(self, AABB, channels = 3, internal_dims = 32, hidden = 2, min_max = None, use_float16=False): 49 | super(MLPTexture3D, self).__init__() 50 | 51 | self.channels = channels 52 | self.internal_dims = internal_dims 53 | self.AABB = AABB 54 | self.min_max = min_max 55 | self.use_float16 = use_float16 56 | 57 | # Setup positional encoding, see https://github.com/NVlabs/tiny-cuda-nn for details 58 | desired_resolution = 4096 59 | base_grid_resolution = 16 60 | num_levels = 16 61 | per_level_scale = np.exp(np.log(desired_resolution / base_grid_resolution) / (num_levels-1)) 62 | 63 | enc_cfg = { 64 | "otype": "HashGrid", 65 | "n_levels": num_levels, 66 | "n_features_per_level": 2, 67 | "log2_hashmap_size": 19, 68 | "base_resolution": base_grid_resolution, 69 | "per_level_scale" : per_level_scale 70 | } 71 | 72 | gradient_scaling = 128.0 73 | self.encoder = tcnn.Encoding(3, enc_cfg) 74 | self.encoder.register_full_backward_hook(lambda module, grad_i, grad_o: (grad_i[0] / gradient_scaling, )) 75 | 76 | # Setup MLP 77 | mlp_cfg = { 78 | "n_input_dims" : self.encoder.n_output_dims, 79 | "n_output_dims" : self.channels, 80 | "n_hidden_layers" : hidden, 81 | "n_neurons" : self.internal_dims 82 | } 83 | self.net = _MLP(mlp_cfg, gradient_scaling) 84 | print("Encoder output: %d dims" % (self.encoder.n_output_dims)) 85 | 86 | # Sample texture at a given location 87 | def sample(self, texc): 88 | _texc = (texc.view(-1, 3) - self.AABB[0][None, ...]) / (self.AABB[1][None, ...] - self.AABB[0][None, ...]) 89 | _texc = torch.clamp(_texc, min=0, max=1) 90 | 91 | p_enc = self.encoder(_texc.contiguous()) 92 | with torch.autocast('cuda', dtype=torch.float16, enabled=self.use_float16): 93 | out = self.net.forward(p_enc) 94 | 95 | # Sigmoid limit and scale to the allowed range 96 | out = torch.sigmoid(out) * (self.min_max[1][None, :] - self.min_max[0][None, :]) + self.min_max[0][None, :] 97 | 98 | return out.view(*texc.shape[:-1], self.channels) # Remap to [n, h, w, c] 99 | 100 | # In-place clamp with no derivative to make sure values are in valid range after training 101 | def clamp_(self): 102 | pass 103 | 104 | def cleanup(self): 105 | tcnn.free_temporary_memory() 106 | 107 | -------------------------------------------------------------------------------- /render/optixutils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from .ops import OptiXContext, optix_build_bvh, optix_env_shade, bilateral_denoiser 10 | __all__ = ["OptiXContext", "optix_build_bvh", "optix_env_shade", 'bilateral_denoiser'] 11 | -------------------------------------------------------------------------------- /render/optixutils/c_src/common.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | // Helper functions to do broadcast guarded fetches 12 | #if defined(__CUDACC__) 13 | template 14 | static __device__ inline float3 fetch3(const T &tensor, U idx, Args... args) { 15 | return tensor.size(0) == 1 ? fetch3(tensor[0], args...) : fetch3(tensor[idx], args...); 16 | } 17 | template static __device__ inline float3 fetch3(const T &tensor) { 18 | return tensor.size(0) == 1 ? make_float3(tensor[0], tensor[0], tensor[0]) : make_float3(tensor[0], tensor[1], tensor[2]); 19 | } 20 | 21 | template 22 | static __device__ inline float2 fetch2(const T &tensor, U idx, Args... args) { 23 | return tensor.size(0) == 1 ? fetch2(tensor[0], args...) : fetch2(tensor[idx], args...); 24 | } 25 | template static __device__ inline float2 fetch2(const T &tensor) { 26 | return tensor.size(0) == 1 ? make_float2(tensor[0], tensor[0]) : make_float2(tensor[0], tensor[1]); 27 | } 28 | 29 | #include "math_utils.h" 30 | #include "bsdf.h" 31 | #endif 32 | 33 | //------------------------------------------------------------------------------ 34 | // CUDA error-checking macros 35 | //------------------------------------------------------------------------------ 36 | 37 | #define CUDA_CHECK( call ) \ 38 | do \ 39 | { \ 40 | cudaError_t error = call; \ 41 | if( error != cudaSuccess ) \ 42 | { \ 43 | std::stringstream ss; \ 44 | ss << "CUDA call (" << #call << " ) failed with error: '" \ 45 | << cudaGetErrorString( error ) \ 46 | << "' (" __FILE__ << ":" << __LINE__ << ")\n"; \ 47 | } \ 48 | } while( 0 ) 49 | 50 | 51 | #define OPTIX_CHECK( call ) \ 52 | do \ 53 | { \ 54 | OptixResult res = call; \ 55 | if( res != OPTIX_SUCCESS ) \ 56 | { \ 57 | std::stringstream ss; \ 58 | ss << "Optix call '" << #call << "' failed: " __FILE__ ":" \ 59 | << __LINE__ << ")\n"; \ 60 | } \ 61 | } while( 0 ) 62 | 63 | #define OPTIX_CHECK_LOG( call ) \ 64 | do \ 65 | { \ 66 | OptixResult res = call; \ 67 | const size_t sizeof_log_returned = sizeof_log; \ 68 | sizeof_log = sizeof( log ); /* reset sizeof_log for future calls */ \ 69 | if( res != OPTIX_SUCCESS ) \ 70 | { \ 71 | std::stringstream ss; \ 72 | ss << "Optix call '" << #call << "' failed: " __FILE__ ":" \ 73 | << __LINE__ << ")\nLog:\n" << log \ 74 | << ( sizeof_log_returned > sizeof( log ) ? "" : "" ) \ 75 | << "\n"; \ 76 | } \ 77 | } while( 0 ) 78 | 79 | #define NVRTC_CHECK_ERROR( func ) \ 80 | do \ 81 | { \ 82 | nvrtcResult code = func; \ 83 | if( code != NVRTC_SUCCESS ) \ 84 | throw std::runtime_error( "ERROR: " __FILE__ "(): " + std::string( nvrtcGetErrorString( code ) ) ); \ 85 | } while( 0 ) 86 | -------------------------------------------------------------------------------- /render/optixutils/c_src/denoising.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "common.h" 10 | #include "denoising.h" 11 | 12 | #define FLT_EPS 0.0001f 13 | 14 | __global__ void bilateral_denoiser_fwd_kernel(BilateralDenoiserParams params) 15 | { 16 | uint3 idx = make_uint3(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z); 17 | 18 | if (idx.z >= params.col.size(0) || idx.y >= params.col.size(1) || idx.x >= params.col.size(2)) 19 | return; 20 | 21 | // Fetch central tap 22 | float3 c_nrm = fetch3(params.nrm, idx.z, idx.y, idx.x); 23 | float2 c_zdz = fetch2(params.zdz, idx.z, idx.y, idx.x); 24 | 25 | float variance = params.sigma * params.sigma; 26 | int filter_rad = 2 * ceil(params.sigma * 2.5) + 1; 27 | 28 | float accum_w = 0.0f; 29 | float3 accum_col = make_float3(0.0f); 30 | for (int32_t fy = -filter_rad; fy <= filter_rad; ++fy) 31 | { 32 | for (int32_t fx = -filter_rad; fx <= filter_rad; ++fx) 33 | { 34 | // Compute tap coordinates, used for input activations and bilateral guides 35 | int32_t y = idx.y + fy; 36 | int32_t x = idx.x + fx; 37 | 38 | if (y < 0 || x < 0 || y >= params.col.size(1) || x >= params.col.size(2)) 39 | continue; 40 | 41 | // Fetch current tap 42 | float3 t_col = fetch3(params.col, idx.z, y, x); 43 | float3 t_nrm = fetch3(params.nrm, idx.z, y, x); 44 | float2 t_zdz = fetch2(params.zdz, idx.z, y, x); 45 | 46 | ///////////////////////////////////////////////////////// 47 | // Compute bilateral weight 48 | ///////////////////////////////////////////////////////// 49 | 50 | // Distance 51 | float dist_sqr = fx * fx + fy * fy; 52 | float dist = sqrtf(dist_sqr); 53 | float w_xy = expf(-dist_sqr / (2.0f * variance)); 54 | 55 | // Normal 56 | float w_normal = powf(min(max(dot(t_nrm, c_nrm), FLT_EPS), 1.0f), 128.0f); 57 | 58 | // Depth 59 | float w_depth = expf(-(abs(t_zdz.x - c_zdz.x) / max(c_zdz.y * dist, FLT_EPS))); 60 | 61 | float w = w_xy * w_normal * w_depth; 62 | 63 | accum_col = accum_col + t_col * w; 64 | accum_w += w; 65 | } 66 | } 67 | 68 | params.out[idx.z][idx.y][idx.x][0] = accum_col.x; 69 | params.out[idx.z][idx.y][idx.x][1] = accum_col.y; 70 | params.out[idx.z][idx.y][idx.x][2] = accum_col.z; 71 | params.out[idx.z][idx.y][idx.x][3] = max(accum_w, 0.0001f); 72 | } 73 | 74 | __global__ void bilateral_denoiser_bwd_kernel(BilateralDenoiserParams params) 75 | { 76 | uint3 idx = make_uint3(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z); 77 | 78 | if (idx.z >= params.col.size(0) || idx.y >= params.col.size(1) || idx.x >= params.col.size(2)) 79 | return; 80 | 81 | // Fetch central tap 82 | float3 c_nrm = fetch3(params.nrm, idx.z, idx.y, idx.x); 83 | float2 c_zdz = fetch2(params.zdz, idx.z, idx.y, idx.x); 84 | 85 | float variance = params.sigma * params.sigma; 86 | int filter_rad = 2 * ceil(params.sigma * 2.5) + 1; 87 | 88 | float3 accum_grad = make_float3(0.0f); 89 | for (int32_t fy = -filter_rad; fy <= filter_rad; ++fy) 90 | { 91 | for (int32_t fx = -filter_rad; fx <= filter_rad; ++fx) 92 | { 93 | // Compute tap coordinates, used for input activations and bilateral guides 94 | int32_t y = idx.y + fy; 95 | int32_t x = idx.x + fx; 96 | 97 | if (y < 0 || x < 0 || y >= params.col.size(1) || x >= params.col.size(2)) 98 | continue; 99 | 100 | // Fetch current tap 101 | float3 t_col = fetch3(params.col, idx.z, y, x); 102 | float3 t_nrm = fetch3(params.nrm, idx.z, y, x); 103 | float2 t_zdz = fetch2(params.zdz, idx.z, y, x); 104 | 105 | ///////////////////////////////////////////////////////// 106 | // Compute bilateral weight 107 | ///////////////////////////////////////////////////////// 108 | 109 | // Distance, transposing fx & fy doesn't affect distance 110 | float dist_sqr = fx * fx + fy * fy; 111 | float dist = sqrtf(dist_sqr); 112 | float w_xy = expf(-dist_sqr / (2.0f * variance)); 113 | 114 | // Normal, transpose c_ and t_ (it's symmetric so doesn't matter) 115 | float w_normal = powf(min(max(dot(t_nrm, c_nrm), FLT_EPS), 1.0f), 128.0f); 116 | 117 | // Depth, transpose c_ and t_ (matters for the denominator) 118 | float w_depth = expf(-(abs(t_zdz.x - c_zdz.x) / max(t_zdz.y * dist, FLT_EPS))); 119 | 120 | float w = w_xy * w_normal * w_depth; 121 | 122 | float3 t_col_grad = w * fetch3(params.out_grad, idx.z, y, x); 123 | accum_grad += t_col_grad; 124 | } 125 | } 126 | 127 | params.col_grad[idx.z][idx.y][idx.x][0] = accum_grad.x; 128 | params.col_grad[idx.z][idx.y][idx.x][1] = accum_grad.y; 129 | params.col_grad[idx.z][idx.y][idx.x][2] = accum_grad.z; 130 | } 131 | -------------------------------------------------------------------------------- /render/optixutils/c_src/denoising.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | #include "accessor.h" 11 | 12 | struct BilateralDenoiserParams 13 | { 14 | PackedTensorAccessor32 col; 15 | PackedTensorAccessor32 col_grad; 16 | PackedTensorAccessor32 nrm; 17 | PackedTensorAccessor32 zdz; 18 | PackedTensorAccessor32 out; 19 | PackedTensorAccessor32 out_grad; 20 | float sigma; 21 | }; 22 | -------------------------------------------------------------------------------- /render/optixutils/c_src/envsampling/params.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "../accessor.h" 10 | 11 | struct EnvSamplingParams 12 | { 13 | // Ray data 14 | PackedTensorAccessor32 ro; // ray origin 15 | 16 | // GBuffer 17 | PackedTensorAccessor32 mask; 18 | PackedTensorAccessor32 gb_pos; 19 | PackedTensorAccessor32 gb_pos_grad; 20 | PackedTensorAccessor32 gb_normal; 21 | PackedTensorAccessor32 gb_normal_grad; 22 | PackedTensorAccessor32 gb_view_pos; 23 | PackedTensorAccessor32 gb_kd; 24 | PackedTensorAccessor32 gb_kd_grad; 25 | PackedTensorAccessor32 gb_ks; 26 | PackedTensorAccessor32 gb_ks_grad; 27 | 28 | // Light 29 | PackedTensorAccessor32 light; 30 | PackedTensorAccessor32 light_grad; 31 | PackedTensorAccessor32 pdf; // light pdf 32 | PackedTensorAccessor32 rows; // light sampling cdf 33 | PackedTensorAccessor32 cols; // light sampling cdf 34 | 35 | // Output 36 | PackedTensorAccessor32 diff; 37 | PackedTensorAccessor32 diff_grad; 38 | PackedTensorAccessor32 spec; 39 | PackedTensorAccessor32 spec_grad; 40 | 41 | // Table with random permutations for stratified sampling 42 | PackedTensorAccessor32 perms; 43 | 44 | OptixTraversableHandle handle; 45 | unsigned int BSDF; 46 | unsigned int n_samples_x; 47 | unsigned int rnd_seed; 48 | unsigned int backward; 49 | float shadow_scale; 50 | }; -------------------------------------------------------------------------------- /render/optixutils/c_src/optix_wrapper.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | //------------------------------------------------------------------------ 15 | // Python OptiX state wrapper. 16 | 17 | struct OptiXState 18 | { 19 | OptixDeviceContext context; 20 | OptixTraversableHandle gas_handle; 21 | CUdeviceptr d_gas_output_buffer; 22 | 23 | // Differentiable env sampling 24 | OptixPipeline pipelineEnvSampling; 25 | OptixShaderBindingTable sbtEnvSampling; 26 | OptixModule moduleEnvSampling; 27 | }; 28 | 29 | 30 | class OptiXStateWrapper 31 | { 32 | public: 33 | OptiXStateWrapper (const std::string &path, const std::string &cuda_path); 34 | ~OptiXStateWrapper (void); 35 | 36 | OptiXState* pState; 37 | }; 38 | 39 | -------------------------------------------------------------------------------- /render/optixutils/include/optix.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 4 | * 5 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary 6 | * rights in and to this software, related documentation and any modifications thereto. 7 | * Any use, reproduction, disclosure or distribution of this software and related 8 | * documentation without an express license agreement from NVIDIA Corporation is strictly 9 | * prohibited. 10 | * 11 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* 12 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, 13 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY 15 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT 16 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF 17 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR 18 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF 19 | * SUCH DAMAGES 20 | */ 21 | 22 | /// @file 23 | /// @author NVIDIA Corporation 24 | /// @brief OptiX public API header 25 | /// 26 | /// Includes the host api if compiling host code, includes the cuda api if compiling device code. 27 | /// For the math library routines include optix_math.h 28 | 29 | #ifndef __optix_optix_h__ 30 | #define __optix_optix_h__ 31 | 32 | /// The OptiX version. 33 | /// 34 | /// - major = OPTIX_VERSION/10000 35 | /// - minor = (OPTIX_VERSION%10000)/100 36 | /// - micro = OPTIX_VERSION%100 37 | #define OPTIX_VERSION 70300 38 | 39 | 40 | #ifdef __CUDACC__ 41 | #include "optix_device.h" 42 | #else 43 | #include "optix_host.h" 44 | #endif 45 | 46 | 47 | #endif // __optix_optix_h__ 48 | -------------------------------------------------------------------------------- /render/optixutils/include/optix_device.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 4 | * 5 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary 6 | * rights in and to this software, related documentation and any modifications thereto. 7 | * Any use, reproduction, disclosure or distribution of this software and related 8 | * documentation without an express license agreement from NVIDIA Corporation is strictly 9 | * prohibited. 10 | * 11 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* 12 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, 13 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY 15 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT 16 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF 17 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR 18 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF 19 | * SUCH DAMAGES 20 | */ 21 | 22 | /** 23 | * @file optix_device.h 24 | * @author NVIDIA Corporation 25 | * @brief OptiX public API 26 | * 27 | * OptiX public API Reference - Host/Device side 28 | */ 29 | 30 | /******************************************************************************\ 31 | * optix_cuda.h 32 | * 33 | * This file provides the nvcc interface for generating PTX that the OptiX is 34 | * capable of parsing and weaving into the final kernel. This is included by 35 | * optix.h automatically if compiling device code. It can be included explicitly 36 | * in host code if desired. 37 | * 38 | \******************************************************************************/ 39 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) 40 | # define __OPTIX_INCLUDE_INTERNAL_HEADERS__ 41 | # define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ 42 | #endif 43 | #include "optix_7_device.h" 44 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ ) 45 | # undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ 46 | # undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ 47 | #endif 48 | -------------------------------------------------------------------------------- /render/optixutils/include/optix_function_table_definition.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary 5 | * rights in and to this software, related documentation and any modifications thereto. 6 | * Any use, reproduction, disclosure or distribution of this software and related 7 | * documentation without an express license agreement from NVIDIA Corporation is strictly 8 | * prohibited. 9 | * 10 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* 11 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, 12 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 13 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY 14 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT 15 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF 16 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR 17 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF 18 | * SUCH DAMAGES 19 | */ 20 | 21 | /// @file 22 | /// @author NVIDIA Corporation 23 | /// @brief OptiX public API header 24 | 25 | #ifndef __optix_optix_function_table_definition_h__ 26 | #define __optix_optix_function_table_definition_h__ 27 | 28 | #include "optix_function_table.h" 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif 33 | 34 | /** \addtogroup optix_function_table 35 | @{ 36 | */ 37 | 38 | /// If the stubs in optix_stubs.h are used, then the function table needs to be defined in exactly 39 | /// one translation unit. This can be achieved by including this header file in that translation 40 | /// unit. 41 | OptixFunctionTable g_optixFunctionTable; 42 | 43 | /*@}*/ // end group optix_function_table 44 | 45 | #ifdef __cplusplus 46 | } 47 | #endif 48 | 49 | #endif // __optix_optix_function_table_definition_h__ 50 | -------------------------------------------------------------------------------- /render/optixutils/include/optix_host.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 4 | * 5 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary 6 | * rights in and to this software, related documentation and any modifications thereto. 7 | * Any use, reproduction, disclosure or distribution of this software and related 8 | * documentation without an express license agreement from NVIDIA Corporation is strictly 9 | * prohibited. 10 | * 11 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* 12 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, 13 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY 15 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT 16 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF 17 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR 18 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF 19 | * SUCH DAMAGES 20 | */ 21 | 22 | /** 23 | * @file optix_host.h 24 | * @author NVIDIA Corporation 25 | * @brief OptiX public API 26 | * 27 | * OptiX public API Reference - Host side 28 | */ 29 | 30 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) 31 | # define __OPTIX_INCLUDE_INTERNAL_HEADERS__ 32 | # define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ 33 | #endif 34 | #include "optix_7_host.h" 35 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ ) 36 | # undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ 37 | # undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ 38 | #endif 39 | -------------------------------------------------------------------------------- /render/optixutils/include/optix_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NVIDIA Corporation and its licensors retain all intellectual property and proprietary 5 | * rights in and to this software, related documentation and any modifications thereto. 6 | * Any use, reproduction, disclosure or distribution of this software and related 7 | * documentation without an express license agreement from NVIDIA Corporation is strictly 8 | * prohibited. 9 | * 10 | * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS* 11 | * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, 12 | * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 13 | * PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY 14 | * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT 15 | * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF 16 | * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR 17 | * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF 18 | * SUCH DAMAGES 19 | */ 20 | 21 | /** 22 | * @file optix_types.h 23 | * @author NVIDIA Corporation 24 | * @brief OptiX public API header 25 | * 26 | */ 27 | 28 | #ifndef __optix_optix_types_h__ 29 | #define __optix_optix_types_h__ 30 | 31 | // clang-format off 32 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__) 33 | # define __OPTIX_INCLUDE_INTERNAL_HEADERS__ 34 | # define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ 35 | #endif 36 | #include "optix_7_types.h" 37 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ ) 38 | # undef __OPTIX_INCLUDE_INTERNAL_HEADERS__ 39 | # undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ 40 | #endif 41 | // clang-format on 42 | 43 | #endif // #ifndef __optix_optix_types_h__ 44 | -------------------------------------------------------------------------------- /render/optixutils/ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import os 11 | import sys 12 | import torch 13 | import torch.utils.cpp_extension 14 | 15 | #---------------------------------------------------------------------------- 16 | # C++/Cuda plugin compiler/loader. 17 | 18 | _plugin = None 19 | if _plugin is None: 20 | 21 | # Make sure we can find the necessary compiler and libary binaries. 22 | if os.name == 'nt': 23 | optix_include_dir = os.path.dirname(__file__) + r"\include" 24 | 25 | def find_cl_path(): 26 | import glob 27 | for edition in ['Enterprise', 'Professional', 'BuildTools', 'Community']: 28 | vs_editions = glob.glob(r"C:\Program Files (x86)\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition) \ 29 | + glob.glob(r"C:\Program Files\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition) 30 | paths = sorted(vs_editions, reverse=True) 31 | if paths: 32 | return paths[0] 33 | 34 | # If cl.exe is not on path, try to find it. 35 | if os.system("where cl.exe >nul 2>nul") != 0: 36 | cl_path = find_cl_path() 37 | if cl_path is None: 38 | raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") 39 | os.environ['PATH'] += ';' + cl_path 40 | 41 | elif os.name == 'posix': 42 | optix_include_dir = os.path.dirname(__file__) + r"/include" 43 | 44 | include_paths = [optix_include_dir] 45 | 46 | # Compiler options. 47 | opts = ['-DNVDR_TORCH'] 48 | 49 | # Linker options. 50 | if os.name == 'posix': 51 | ldflags = ['-lcuda', '-lnvrtc'] 52 | elif os.name == 'nt': 53 | ldflags = ['cuda.lib', 'advapi32.lib', 'nvrtc.lib'] 54 | 55 | # List of sources. 56 | source_files = [ 57 | 'c_src/denoising.cu', 58 | 'c_src/optix_wrapper.cpp', 59 | 'c_src/torch_bindings.cpp' 60 | ] 61 | 62 | # Some containers set this to contain old architectures that won't compile. We only need the one installed in the machine. 63 | os.environ['TORCH_CUDA_ARCH_LIST'] = '' 64 | 65 | # Compile and load. 66 | build_dir = os.path.join(os. path. dirname(__file__), 'build') 67 | os.makedirs(build_dir, exist_ok=True) 68 | source_paths = [os.path.join(os.path.dirname(__file__), fn) for fn in source_files] 69 | torch.utils.cpp_extension.load(name='optixutils_plugin', sources=source_paths, extra_cflags=opts, 70 | build_directory=build_dir, 71 | extra_cuda_cflags=opts, extra_ldflags=ldflags, extra_include_paths=include_paths, with_cuda=True, verbose=True) 72 | 73 | # Import, cache, and return the compiled module. 74 | import optixutils_plugin 75 | _plugin = optixutils_plugin 76 | 77 | #---------------------------------------------------------------------------- 78 | # OptiX autograd func 79 | #---------------------------------------------------------------------------- 80 | 81 | class _optix_env_shade_func(torch.autograd.Function): 82 | _random_perm = {} 83 | 84 | @staticmethod 85 | def forward(ctx, optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, BSDF, n_samples_x, rnd_seed, shadow_scale): 86 | _rnd_seed = np.random.randint(2**31) if rnd_seed is None else rnd_seed 87 | if n_samples_x not in _optix_env_shade_func._random_perm: 88 | # Generate (32k) tables with random permutations to decorrelate the BSDF and light stratified samples 89 | _optix_env_shade_func._random_perm[n_samples_x] = torch.argsort(torch.rand(32768, n_samples_x * n_samples_x, device="cuda"), dim=-1).int() 90 | 91 | diff, spec = _plugin.env_shade_fwd(optix_ctx.cpp_wrapper, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, _optix_env_shade_func._random_perm[n_samples_x], BSDF, n_samples_x, _rnd_seed, shadow_scale) 92 | ctx.save_for_backward(mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols) 93 | ctx.optix_ctx = optix_ctx 94 | ctx.BSDF = BSDF 95 | ctx.n_samples_x = n_samples_x 96 | ctx.rnd_seed = rnd_seed 97 | ctx.shadow_scale = shadow_scale 98 | return diff, spec 99 | 100 | @staticmethod 101 | def backward(ctx, diff_grad, spec_grad): 102 | optix_ctx = ctx.optix_ctx 103 | _rnd_seed = np.random.randint(2**31) if ctx.rnd_seed is None else ctx.rnd_seed 104 | mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols = ctx.saved_variables 105 | gb_pos_grad, gb_normal_grad, gb_kd_grad, gb_ks_grad, light_grad = _plugin.env_shade_bwd( 106 | optix_ctx.cpp_wrapper, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, _optix_env_shade_func._random_perm[ctx.n_samples_x], 107 | ctx.BSDF, ctx.n_samples_x, _rnd_seed, ctx.shadow_scale, diff_grad, spec_grad) 108 | return None, None, None, gb_pos_grad, gb_normal_grad, None, gb_kd_grad, gb_ks_grad, light_grad, None, None, None, None, None, None, None 109 | 110 | class _bilateral_denoiser_func(torch.autograd.Function): 111 | @staticmethod 112 | def forward(ctx, col, nrm, zdz, sigma): 113 | ctx.save_for_backward(col, nrm, zdz) 114 | ctx.sigma = sigma 115 | out = _plugin.bilateral_denoiser_fwd(col, nrm, zdz, sigma) 116 | return out 117 | 118 | @staticmethod 119 | def backward(ctx, out_grad): 120 | col, nrm, zdz = ctx.saved_variables 121 | col_grad = _plugin.bilateral_denoiser_bwd(col, nrm, zdz, ctx.sigma, out_grad) 122 | return col_grad, None, None, None 123 | 124 | #---------------------------------------------------------------------------- 125 | # OptiX ray tracing utils 126 | #---------------------------------------------------------------------------- 127 | 128 | class OptiXContext: 129 | def __init__(self): 130 | print("Cuda path", torch.utils.cpp_extension.CUDA_HOME) 131 | self.cpp_wrapper = _plugin.OptiXStateWrapper(os.path.dirname(__file__), torch.utils.cpp_extension.CUDA_HOME) 132 | 133 | def optix_build_bvh(optix_ctx, verts, tris, rebuild): 134 | ''' 135 | choose not to raise error since we may have msdf supervision.. should clean the code later 136 | ''' 137 | # assert tris.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)" 138 | # assert verts.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)" 139 | _plugin.optix_build_bvh(optix_ctx.cpp_wrapper, verts.view(-1, 3), tris.view(-1, 3), rebuild) 140 | 141 | def optix_env_shade(optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, BSDF='pbr', n_samples_x=8, rnd_seed=None, shadow_scale=1.0): 142 | iBSDF = ['pbr', 'diffuse', 'white'].index(BSDF) # Ordering important, must match the order of the fwd/bwdPbrBSDF kernel. 143 | return _optix_env_shade_func.apply(optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, iBSDF, n_samples_x, rnd_seed, shadow_scale) 144 | 145 | def bilateral_denoiser(col, nrm, zdz, sigma): 146 | col_w = _bilateral_denoiser_func.apply(col, nrm, zdz, sigma) 147 | return col_w[..., 0:3] / col_w[..., 3:4] 148 | -------------------------------------------------------------------------------- /render/optixutils/tests/filter_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from pickletools import read_float8 10 | import torch 11 | 12 | import os 13 | import sys 14 | import math 15 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 16 | import optixutils as ou 17 | import numpy as np 18 | 19 | RES = 1024 20 | DTYPE = torch.float32 21 | 22 | def length(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor: 23 | return torch.sqrt(torch.clamp(dot(x,x), min=eps)) # Clamp to avoid nan gradients because grad(sqrt(0)) = NaN 24 | 25 | def safe_normalize(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor: 26 | return x / length(x, eps) 27 | 28 | def dot(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: 29 | return torch.sum(x*y, -1, keepdim=True) 30 | 31 | class BilateralDenoiser(torch.nn.Module): 32 | def __init__(self, sigma=1.0): 33 | super(BilateralDenoiser, self).__init__() 34 | self.set_sigma(sigma) 35 | 36 | def set_sigma(self, sigma): 37 | self.sigma = max(sigma, 0.0001) 38 | self.variance = self.sigma**2. 39 | self.N = 2 * math.ceil(self.sigma * 2.5) + 1 40 | 41 | def forward(self, input): 42 | eps = 0.0001 43 | col = input[..., 0:3] 44 | nrm = input[..., 3:6] 45 | kd = input[..., 6:9] 46 | zdz = input[..., 9:11] 47 | 48 | accum_col = torch.zeros_like(col) 49 | accum_w = torch.zeros_like(col[..., 0:1]) 50 | for y in range(-self.N, self.N+1): 51 | for x in range(-self.N, self.N+1): 52 | 53 | ty, tx = torch.meshgrid(torch.arange(0, input.shape[1], dtype=torch.float32, device="cuda"), torch.arange(0, input.shape[2], dtype=torch.float32, device="cuda")) 54 | tx = tx[None, ..., None] + x 55 | ty = ty[None, ..., None] + y 56 | 57 | dist_sqr = (x**2 + y**2) 58 | dist = np.sqrt(dist_sqr) 59 | w_xy = np.exp(-dist_sqr / (2 * self.variance)) 60 | 61 | with torch.no_grad(): 62 | nrm_tap = torch.roll(nrm, (-y, -x), (1, 2)) 63 | w_normal = torch.pow(torch.clamp(dot(nrm_tap, nrm), min=eps, max=1.0), 128.0) # From SVGF 64 | 65 | zdz_tap = torch.roll(zdz, (-y, -x), (1, 2)) 66 | w_depth = torch.exp(-(torch.abs(zdz_tap[..., 0:1] - zdz[..., 0:1]) / torch.clamp(zdz[..., 1:2] * dist, min=eps)) ) # From SVGF 67 | 68 | w = w_xy * w_normal * w_depth 69 | w = torch.where((tx >= 0) & (tx < input.shape[2]) & (ty >= 0) & (ty < input.shape[1]), w, torch.zeros_like(w)) 70 | 71 | col_tap = torch.roll(col, (-y, -x), (1, 2)) 72 | accum_col += col_tap * w 73 | accum_w += w 74 | return accum_col / torch.clamp(accum_w, min=eps) 75 | 76 | def relative_loss(name, ref, cuda): 77 | ref = ref.float() 78 | cuda = cuda.float() 79 | denom = torch.where(ref > 1e-7, ref, torch.ones_like(ref)) 80 | relative = torch.abs(ref - cuda) / denom 81 | print(name, torch.max(relative).item()) 82 | 83 | 84 | def test_filter(): 85 | img_cuda = torch.rand(1, RES, RES, 11, dtype=DTYPE, device='cuda') 86 | img_cuda[..., 3:6] = safe_normalize(img_cuda[..., 3:6]) 87 | img_ref = img_cuda.clone().detach().requires_grad_(True) 88 | img_cuda = img_cuda.clone().detach().requires_grad_(True) 89 | target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 90 | target_ref = target_cuda.clone().detach().requires_grad_(True) 91 | 92 | SIGMA = 2.0 93 | 94 | start = torch.cuda.Event(enable_timing=True) 95 | end = torch.cuda.Event(enable_timing=True) 96 | 97 | start.record() 98 | denoiser = BilateralDenoiser(sigma=SIGMA) 99 | denoised_ref = denoiser.forward(img_ref) 100 | ref_loss = torch.nn.MSELoss()(denoised_ref, target_ref) 101 | ref_loss.backward() 102 | end.record() 103 | torch.cuda.synchronize() 104 | print("Python:", start.elapsed_time(end)) 105 | 106 | start.record() 107 | denoised_cuda = ou.svgf(img_cuda[..., 0:3], img_cuda[..., 3:6], img_cuda[..., 9:11], img_cuda[..., 6:9], SIGMA) 108 | cuda_loss = torch.nn.MSELoss()(denoised_cuda, target_cuda) 109 | cuda_loss.backward() 110 | end.record() 111 | torch.cuda.synchronize() 112 | print("CUDA:", start.elapsed_time(end)) 113 | 114 | print("-------------------------------------------------------------") 115 | print(" Filter loss:") 116 | print("-------------------------------------------------------------") 117 | 118 | relative_loss("denoised:", denoised_ref[..., 0:3], denoised_cuda[..., 0:3]) 119 | relative_loss("grad:", img_ref.grad[..., 0:3], img_cuda.grad[..., 0:3]) 120 | 121 | test_filter() -------------------------------------------------------------------------------- /render/regularizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | import nvdiffrast.torch as dr 12 | 13 | from render import util 14 | from . import mesh 15 | 16 | def luma(x): 17 | return ((x[..., 0:1] + x[..., 1:2] + x[..., 2:3]) / 3).repeat(1, 1, 1, 3) 18 | def value(x): 19 | return torch.max(x[..., 0:3], dim=-1, keepdim=True)[0].repeat(1, 1, 1, 3) 20 | 21 | def chroma_loss(kd, color_ref, lambda_chroma): 22 | eps = 0.001 23 | ref_chroma = color_ref[..., 0:3] / torch.clip(value(color_ref), min=eps) 24 | opt_chroma = kd[..., 0:3] / torch.clip(value(kd), min=eps) 25 | return torch.mean(torch.abs((opt_chroma - ref_chroma) * color_ref[..., 3:])) * lambda_chroma 26 | 27 | # Diffuse luma regularizer + specular 28 | def shading_loss(diffuse_light, specular_light, color_ref, lambda_diffuse, lambda_specular): 29 | diffuse_luma = luma(diffuse_light) 30 | specular_luma = luma(specular_light) 31 | ref_luma = value(color_ref) 32 | 33 | eps = 0.001 34 | img = util.rgb_to_srgb(torch.log(torch.clamp((diffuse_luma + specular_luma) * color_ref[..., 3:], min=0, max=65535) + 1)) 35 | target = util.rgb_to_srgb(torch.log(torch.clamp(ref_luma * color_ref[..., 3:], min=0, max=65535) + 1)) 36 | # error = torch.abs(img - target) * diffuse_luma / torch.clamp(diffuse_luma + specular_luma, min=eps) ### encourage specular component to take control 37 | error = torch.abs(img - target) ### the original version in the paper 38 | loss = torch.mean(error) * lambda_diffuse 39 | loss += torch.mean(specular_luma) / torch.clamp(torch.mean(diffuse_luma), min=eps) * lambda_specular 40 | return loss 41 | 42 | ###################################################################################### 43 | # Material smoothness loss 44 | ###################################################################################### 45 | 46 | def material_smoothness_grad(kd_grad, ks_grad, nrm_grad, lambda_kd=0.25, lambda_ks=0.1, lambda_nrm=0.0): 47 | kd_luma_grad = (kd_grad[..., 0] + kd_grad[..., 1] + kd_grad[..., 2]) / 3 48 | loss = torch.mean(kd_luma_grad * kd_grad[..., -1]) * lambda_kd 49 | loss += torch.mean(ks_grad[..., :-1] * ks_grad[..., -1:]) * lambda_ks 50 | loss += torch.mean(nrm_grad[..., :-1] * nrm_grad[..., -1:]) * lambda_nrm 51 | return loss 52 | 53 | ###################################################################################### 54 | # Computes the image gradient, useful for kd/ks smoothness losses 55 | ###################################################################################### 56 | def image_grad(buf, std=0.01): 57 | t, s = torch.meshgrid(torch.linspace(-1.0 + 1.0 / buf.shape[1], 1.0 - 1.0 / buf.shape[1], buf.shape[1], device="cuda"), 58 | torch.linspace(-1.0 + 1.0 / buf.shape[2], 1.0 - 1.0 / buf.shape[2], buf.shape[2], device="cuda"), 59 | indexing='ij') 60 | tc = torch.normal(mean=0, std=std, size=(buf.shape[0], buf.shape[1], buf.shape[2], 2), device="cuda") + torch.stack((s, t), dim=-1)[None, ...] 61 | tap = dr.texture(buf, tc, filter_mode='linear', boundary_mode='clamp') 62 | return torch.abs(tap[..., :-1] - buf[..., :-1]) * tap[..., -1:] * buf[..., -1:] 63 | 64 | ###################################################################################### 65 | # Computes the avergage edge length of a mesh. 66 | # Rough estimate of the tessellation of a mesh. Can be used e.g. to clamp gradients 67 | ###################################################################################### 68 | def avg_edge_length(v_pos, t_pos_idx): 69 | e_pos_idx = mesh.compute_edges(t_pos_idx) 70 | edge_len = util.length(v_pos[e_pos_idx[:, 0]] - v_pos[e_pos_idx[:, 1]]) 71 | return torch.mean(edge_len) 72 | 73 | ###################################################################################### 74 | # Laplacian regularization using umbrella operator (Fujiwara / Desbrun). 75 | # https://mgarland.org/class/geom04/material/smoothing.pdf 76 | ###################################################################################### 77 | def laplace_regularizer_const(v_pos, t_pos_idx): 78 | term = torch.zeros_like(v_pos) 79 | norm = torch.zeros_like(v_pos[..., 0:1]) 80 | 81 | v0 = v_pos[t_pos_idx[:, 0], :] 82 | v1 = v_pos[t_pos_idx[:, 1], :] 83 | v2 = v_pos[t_pos_idx[:, 2], :] 84 | 85 | term.scatter_add_(0, t_pos_idx[:, 0:1].repeat(1,3), (v1 - v0) + (v2 - v0)) 86 | term.scatter_add_(0, t_pos_idx[:, 1:2].repeat(1,3), (v0 - v1) + (v2 - v1)) 87 | term.scatter_add_(0, t_pos_idx[:, 2:3].repeat(1,3), (v0 - v2) + (v1 - v2)) 88 | 89 | two = torch.ones_like(v0) * 2.0 90 | norm.scatter_add_(0, t_pos_idx[:, 0:1], two) 91 | norm.scatter_add_(0, t_pos_idx[:, 1:2], two) 92 | norm.scatter_add_(0, t_pos_idx[:, 2:3], two) 93 | 94 | term = term / torch.clamp(norm, min=1.0) 95 | 96 | return torch.mean(term**2) 97 | 98 | ###################################################################################### 99 | # Smooth vertex normals 100 | ###################################################################################### 101 | def normal_consistency(v_pos, t_pos_idx): 102 | # Compute face normals 103 | v0 = v_pos[t_pos_idx[:, 0], :] 104 | v1 = v_pos[t_pos_idx[:, 1], :] 105 | v2 = v_pos[t_pos_idx[:, 2], :] 106 | 107 | face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0)) 108 | 109 | tris_per_edge = mesh.compute_edge_to_face_mapping(t_pos_idx) 110 | 111 | # Fetch normals for both faces sharind an edge 112 | n0 = face_normals[tris_per_edge[:, 0], :] 113 | n1 = face_normals[tris_per_edge[:, 1], :] 114 | 115 | # Compute error metric based on normal difference 116 | term = torch.clamp(util.dot(n0, n1), min=-1.0, max=1.0) 117 | term = (1.0 - term) * 0.5 118 | 119 | return torch.mean(torch.abs(term)) 120 | -------------------------------------------------------------------------------- /render/renderutils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | from .ops import xfm_points, xfm_vectors, image_loss, diffuse_cubemap, specular_cubemap, prepare_shading_normal, lambert, frostbite_diffuse, pbr_specular, pbr_bsdf, _fresnel_shlick, _ndf_ggx, _lambda_ggx, _masking_smith 11 | __all__ = ["xfm_vectors", "xfm_points", "image_loss", "diffuse_cubemap","specular_cubemap", "prepare_shading_normal", "lambert", "frostbite_diffuse", "pbr_specular", "pbr_bsdf", "_fresnel_shlick", "_ndf_ggx", "_lambda_ggx", "_masking_smith", ] 12 | -------------------------------------------------------------------------------- /render/renderutils/bsdf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import math 11 | import torch 12 | 13 | NORMAL_THRESHOLD = 0.1 14 | 15 | ################################################################################ 16 | # Vector utility functions 17 | ################################################################################ 18 | 19 | def _dot(x, y): 20 | return torch.sum(x*y, -1, keepdim=True) 21 | 22 | def _reflect(x, n): 23 | return 2*_dot(x, n)*n - x 24 | 25 | def _safe_normalize(x): 26 | return torch.nn.functional.normalize(x, dim = -1) 27 | 28 | def _bend_normal(view_vec, smooth_nrm, geom_nrm, two_sided_shading): 29 | # Swap normal direction for backfacing surfaces 30 | if two_sided_shading: 31 | smooth_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, smooth_nrm, -smooth_nrm) 32 | geom_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, geom_nrm, -geom_nrm) 33 | 34 | t = torch.clamp(_dot(view_vec, smooth_nrm) / NORMAL_THRESHOLD, min=0, max=1) 35 | return torch.lerp(geom_nrm, smooth_nrm, t) 36 | 37 | 38 | def _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl): 39 | smooth_bitang = _safe_normalize(torch.cross(smooth_tng, smooth_nrm)) 40 | if opengl: 41 | shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] - smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0) 42 | else: 43 | shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] + smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0) 44 | return _safe_normalize(shading_nrm) 45 | 46 | def bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl): 47 | smooth_nrm = _safe_normalize(smooth_nrm) 48 | smooth_tng = _safe_normalize(smooth_tng) 49 | view_vec = _safe_normalize(view_pos - pos) 50 | shading_nrm = _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl) 51 | return _bend_normal(view_vec, shading_nrm, geom_nrm, two_sided_shading) 52 | 53 | ################################################################################ 54 | # Simple lambertian diffuse BSDF 55 | ################################################################################ 56 | 57 | def bsdf_lambert(nrm, wi): 58 | return torch.clamp(_dot(nrm, wi), min=0.0) / math.pi 59 | 60 | ################################################################################ 61 | # Frostbite diffuse 62 | ################################################################################ 63 | 64 | def bsdf_frostbite(nrm, wi, wo, linearRoughness): 65 | wiDotN = _dot(wi, nrm) 66 | woDotN = _dot(wo, nrm) 67 | 68 | h = _safe_normalize(wo + wi) 69 | wiDotH = _dot(wi, h) 70 | 71 | energyBias = 0.5 * linearRoughness 72 | energyFactor = 1.0 - (0.51 / 1.51) * linearRoughness 73 | f90 = energyBias + 2.0 * wiDotH * wiDotH * linearRoughness 74 | f0 = 1.0 75 | 76 | wiScatter = bsdf_fresnel_shlick(f0, f90, wiDotN) 77 | woScatter = bsdf_fresnel_shlick(f0, f90, woDotN) 78 | res = wiScatter * woScatter * energyFactor 79 | return torch.where((wiDotN > 0.0) & (woDotN > 0.0), res, torch.zeros_like(res)) 80 | 81 | ################################################################################ 82 | # Phong specular, loosely based on mitsuba implementation 83 | ################################################################################ 84 | 85 | def bsdf_phong(nrm, wo, wi, N): 86 | dp_r = torch.clamp(_dot(_reflect(wo, nrm), wi), min=0.0, max=1.0) 87 | dp_l = torch.clamp(_dot(nrm, wi), min=0.0, max=1.0) 88 | return (dp_r ** N) * dp_l * (N + 2) / (2 * math.pi) 89 | 90 | ################################################################################ 91 | # PBR's implementation of GGX specular 92 | ################################################################################ 93 | 94 | specular_epsilon = 1e-4 95 | 96 | def bsdf_fresnel_shlick(f0, f90, cosTheta): 97 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon) 98 | return f0 + (f90 - f0) * (1.0 - _cosTheta) ** 5.0 99 | 100 | def bsdf_ndf_ggx(alphaSqr, cosTheta): 101 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon) 102 | d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1 103 | return alphaSqr / (d * d * math.pi) 104 | 105 | def bsdf_lambda_ggx(alphaSqr, cosTheta): 106 | _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon) 107 | cosThetaSqr = _cosTheta * _cosTheta 108 | tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr 109 | res = 0.5 * (torch.sqrt(1 + alphaSqr * tanThetaSqr) - 1.0) 110 | return res 111 | 112 | def bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO): 113 | lambdaI = bsdf_lambda_ggx(alphaSqr, cosThetaI) 114 | lambdaO = bsdf_lambda_ggx(alphaSqr, cosThetaO) 115 | return 1 / (1 + lambdaI + lambdaO) 116 | 117 | def bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08): 118 | _alpha = torch.clamp(alpha, min=min_roughness*min_roughness, max=1.0) 119 | alphaSqr = _alpha * _alpha 120 | 121 | h = _safe_normalize(wo + wi) 122 | woDotN = _dot(wo, nrm) 123 | wiDotN = _dot(wi, nrm) 124 | woDotH = _dot(wo, h) 125 | nDotH = _dot(nrm, h) 126 | 127 | D = bsdf_ndf_ggx(alphaSqr, nDotH) 128 | G = bsdf_masking_smith_ggx_correlated(alphaSqr, woDotN, wiDotN) 129 | F = bsdf_fresnel_shlick(col, 1, woDotH) 130 | 131 | w = F * D * G * 0.25 / torch.clamp(woDotN, min=specular_epsilon) 132 | 133 | frontfacing = (woDotN > specular_epsilon) & (wiDotN > specular_epsilon) 134 | return torch.where(frontfacing, w, torch.zeros_like(w)) 135 | 136 | def bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF): 137 | wo = _safe_normalize(view_pos - pos) 138 | wi = _safe_normalize(light_pos - pos) 139 | 140 | spec_str = arm[..., 0:1] # x component 141 | roughness = arm[..., 1:2] # y component 142 | metallic = arm[..., 2:3] # z component 143 | ks = (0.04 * (1.0 - metallic) + kd * metallic) * (1 - spec_str) 144 | kd = kd * (1.0 - metallic) 145 | 146 | if BSDF == 0: 147 | diffuse = kd * bsdf_lambert(nrm, wi) 148 | else: 149 | diffuse = kd * bsdf_frostbite(nrm, wi, wo, roughness) 150 | specular = bsdf_pbr_specular(ks, nrm, wo, wi, roughness*roughness, min_roughness=min_roughness) 151 | return diffuse + specular 152 | -------------------------------------------------------------------------------- /render/renderutils/c_src/bsdf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "common.h" 15 | 16 | struct LambertKernelParams 17 | { 18 | Tensor nrm; 19 | Tensor wi; 20 | Tensor out; 21 | dim3 gridSize; 22 | }; 23 | 24 | struct FrostbiteDiffuseKernelParams 25 | { 26 | Tensor nrm; 27 | Tensor wi; 28 | Tensor wo; 29 | Tensor linearRoughness; 30 | Tensor out; 31 | dim3 gridSize; 32 | }; 33 | 34 | struct FresnelShlickKernelParams 35 | { 36 | Tensor f0; 37 | Tensor f90; 38 | Tensor cosTheta; 39 | Tensor out; 40 | dim3 gridSize; 41 | }; 42 | 43 | struct NdfGGXParams 44 | { 45 | Tensor alphaSqr; 46 | Tensor cosTheta; 47 | Tensor out; 48 | dim3 gridSize; 49 | }; 50 | 51 | struct MaskingSmithParams 52 | { 53 | Tensor alphaSqr; 54 | Tensor cosThetaI; 55 | Tensor cosThetaO; 56 | Tensor out; 57 | dim3 gridSize; 58 | }; 59 | 60 | struct PbrSpecular 61 | { 62 | Tensor col; 63 | Tensor nrm; 64 | Tensor wo; 65 | Tensor wi; 66 | Tensor alpha; 67 | Tensor out; 68 | dim3 gridSize; 69 | float min_roughness; 70 | }; 71 | 72 | struct PbrBSDF 73 | { 74 | Tensor kd; 75 | Tensor arm; 76 | Tensor pos; 77 | Tensor nrm; 78 | Tensor view_pos; 79 | Tensor light_pos; 80 | Tensor out; 81 | dim3 gridSize; 82 | float min_roughness; 83 | int BSDF; 84 | }; 85 | -------------------------------------------------------------------------------- /render/renderutils/c_src/common.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #include 13 | #include 14 | 15 | //------------------------------------------------------------------------ 16 | // Block and grid size calculators for kernel launches. 17 | 18 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims) 19 | { 20 | int maxThreads = maxWidth * maxHeight; 21 | if (maxThreads <= 1 || (dims.x * dims.y) <= 1) 22 | return dim3(1, 1, 1); // Degenerate. 23 | 24 | // Start from max size. 25 | int bw = maxWidth; 26 | int bh = maxHeight; 27 | 28 | // Optimizations for weirdly sized buffers. 29 | if (dims.x < bw) 30 | { 31 | // Decrease block width to smallest power of two that covers the buffer width. 32 | while ((bw >> 1) >= dims.x) 33 | bw >>= 1; 34 | 35 | // Maximize height. 36 | bh = maxThreads / bw; 37 | if (bh > dims.y) 38 | bh = dims.y; 39 | } 40 | else if (dims.y < bh) 41 | { 42 | // Halve height and double width until fits completely inside buffer vertically. 43 | while (bh > dims.y) 44 | { 45 | bh >>= 1; 46 | if (bw < dims.x) 47 | bw <<= 1; 48 | } 49 | } 50 | 51 | // Done. 52 | return dim3(bw, bh, 1); 53 | } 54 | 55 | // returns the size of a block that can be reduced using horizontal SIMD operations (e.g. __shfl_xor_sync) 56 | dim3 getWarpSize(dim3 blockSize) 57 | { 58 | return dim3( 59 | std::min(blockSize.x, 32u), 60 | std::min(std::max(32u / blockSize.x, 1u), std::min(32u, blockSize.y)), 61 | std::min(std::max(32u / (blockSize.x * blockSize.y), 1u), std::min(32u, blockSize.z)) 62 | ); 63 | } 64 | 65 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims) 66 | { 67 | dim3 gridSize; 68 | gridSize.x = (dims.x - 1) / blockSize.x + 1; 69 | gridSize.y = (dims.y - 1) / blockSize.y + 1; 70 | gridSize.z = (dims.z - 1) / blockSize.z + 1; 71 | return gridSize; 72 | } 73 | 74 | //------------------------------------------------------------------------ 75 | -------------------------------------------------------------------------------- /render/renderutils/c_src/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | #include 14 | #include 15 | 16 | #include "vec3f.h" 17 | #include "vec4f.h" 18 | #include "tensor.h" 19 | 20 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims); 21 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims); 22 | 23 | #ifdef __CUDACC__ 24 | 25 | #ifdef _MSC_VER 26 | #define M_PI 3.14159265358979323846f 27 | #endif 28 | 29 | __host__ __device__ static inline dim3 getWarpSize(dim3 blockSize) 30 | { 31 | return dim3( 32 | min(blockSize.x, 32u), 33 | min(max(32u / blockSize.x, 1u), min(32u, blockSize.y)), 34 | min(max(32u / (blockSize.x * blockSize.y), 1u), min(32u, blockSize.z)) 35 | ); 36 | } 37 | 38 | __device__ static inline float clamp(float val, float mn, float mx) { return min(max(val, mn), mx); } 39 | #else 40 | dim3 getWarpSize(dim3 blockSize); 41 | #endif -------------------------------------------------------------------------------- /render/renderutils/c_src/cubemap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "common.h" 15 | 16 | struct DiffuseCubemapKernelParams 17 | { 18 | Tensor cubemap; 19 | Tensor out; 20 | dim3 gridSize; 21 | }; 22 | 23 | struct SpecularCubemapKernelParams 24 | { 25 | Tensor cubemap; 26 | Tensor bounds; 27 | Tensor out; 28 | dim3 gridSize; 29 | float costheta_cutoff; 30 | float roughness; 31 | }; 32 | 33 | struct SpecularBoundsKernelParams 34 | { 35 | float costheta_cutoff; 36 | Tensor out; 37 | dim3 gridSize; 38 | }; 39 | -------------------------------------------------------------------------------- /render/renderutils/c_src/loss.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #include 13 | 14 | #include "common.h" 15 | #include "loss.h" 16 | 17 | //------------------------------------------------------------------------ 18 | // Utils 19 | 20 | __device__ inline float bwdAbs(float x) { return x == 0.0f ? 0.0f : x < 0.0f ? -1.0f : 1.0f; } 21 | 22 | __device__ float warpSum(float val) { 23 | for (int i = 1; i < 32; i *= 2) 24 | val += __shfl_xor_sync(0xFFFFFFFF, val, i); 25 | return val; 26 | } 27 | 28 | //------------------------------------------------------------------------ 29 | // Tonemapping 30 | 31 | __device__ inline float fwdSRGB(float x) 32 | { 33 | return x > 0.0031308f ? powf(max(x, 0.0031308f), 1.0f / 2.4f) * 1.055f - 0.055f : 12.92f * max(x, 0.0f); 34 | } 35 | 36 | __device__ inline void bwdSRGB(float x, float &d_x, float d_out) 37 | { 38 | if (x > 0.0031308f) 39 | d_x += d_out * 0.439583f / powf(x, 0.583333f); 40 | else if (x > 0.0f) 41 | d_x += d_out * 12.92f; 42 | } 43 | 44 | __device__ inline vec3f fwdTonemapLogSRGB(vec3f x) 45 | { 46 | return vec3f(fwdSRGB(logf(x.x + 1.0f)), fwdSRGB(logf(x.y + 1.0f)), fwdSRGB(logf(x.z + 1.0f))); 47 | } 48 | 49 | __device__ inline void bwdTonemapLogSRGB(vec3f x, vec3f& d_x, vec3f d_out) 50 | { 51 | if (x.x > 0.0f && x.x < 65535.0f) 52 | { 53 | bwdSRGB(logf(x.x + 1.0f), d_x.x, d_out.x); 54 | d_x.x *= 1 / (x.x + 1.0f); 55 | } 56 | if (x.y > 0.0f && x.y < 65535.0f) 57 | { 58 | bwdSRGB(logf(x.y + 1.0f), d_x.y, d_out.y); 59 | d_x.y *= 1 / (x.y + 1.0f); 60 | } 61 | if (x.z > 0.0f && x.z < 65535.0f) 62 | { 63 | bwdSRGB(logf(x.z + 1.0f), d_x.z, d_out.z); 64 | d_x.z *= 1 / (x.z + 1.0f); 65 | } 66 | } 67 | 68 | __device__ inline float fwdRELMSE(float img, float target, float eps = 0.1f) 69 | { 70 | return (img - target) * (img - target) / (img * img + target * target + eps); 71 | } 72 | 73 | __device__ inline void bwdRELMSE(float img, float target, float &d_img, float &d_target, float d_out, float eps = 0.1f) 74 | { 75 | float denom = (target * target + img * img + eps); 76 | d_img += d_out * 2 * (img - target) * (target * (target + img) + eps) / (denom * denom); 77 | d_target -= d_out * 2 * (img - target) * (img * (target + img) + eps) / (denom * denom); 78 | } 79 | 80 | __device__ inline float fwdSMAPE(float img, float target, float eps=0.01f) 81 | { 82 | return abs(img - target) / (img + target + eps); 83 | } 84 | 85 | __device__ inline void bwdSMAPE(float img, float target, float& d_img, float& d_target, float d_out, float eps = 0.01f) 86 | { 87 | float denom = (target + img + eps); 88 | d_img += d_out * bwdAbs(img - target) * (2 * target + eps) / (denom * denom); 89 | d_target -= d_out * bwdAbs(img - target) * (2 * img + eps) / (denom * denom); 90 | } 91 | 92 | //------------------------------------------------------------------------ 93 | // Kernels 94 | 95 | __global__ void imgLossFwdKernel(LossKernelParams p) 96 | { 97 | // Calculate pixel position. 98 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 99 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y; 100 | unsigned int pz = blockIdx.z; 101 | 102 | float floss = 0.0f; 103 | if (px < p.gridSize.x && py < p.gridSize.y && pz < p.gridSize.z) 104 | { 105 | vec3f img = p.img.fetch3(px, py, pz); 106 | vec3f target = p.target.fetch3(px, py, pz); 107 | 108 | img = vec3f(clamp(img.x, 0.0f, 65535.0f), clamp(img.y, 0.0f, 65535.0f), clamp(img.z, 0.0f, 65535.0f)); 109 | target = vec3f(clamp(target.x, 0.0f, 65535.0f), clamp(target.y, 0.0f, 65535.0f), clamp(target.z, 0.0f, 65535.0f)); 110 | 111 | if (p.tonemapper == TONEMAPPER_LOG_SRGB) 112 | { 113 | img = fwdTonemapLogSRGB(img); 114 | target = fwdTonemapLogSRGB(target); 115 | } 116 | 117 | vec3f vloss(0); 118 | if (p.loss == LOSS_MSE) 119 | vloss = (img - target) * (img - target); 120 | else if (p.loss == LOSS_RELMSE) 121 | vloss = vec3f(fwdRELMSE(img.x, target.x), fwdRELMSE(img.y, target.y), fwdRELMSE(img.z, target.z)); 122 | else if (p.loss == LOSS_SMAPE) 123 | vloss = vec3f(fwdSMAPE(img.x, target.x), fwdSMAPE(img.y, target.y), fwdSMAPE(img.z, target.z)); 124 | else 125 | vloss = vec3f(abs(img.x - target.x), abs(img.y - target.y), abs(img.z - target.z)); 126 | 127 | floss = sum(vloss) / 3.0f; 128 | } 129 | 130 | floss = warpSum(floss); 131 | 132 | dim3 warpSize = getWarpSize(blockDim); 133 | if (px < p.gridSize.x && py < p.gridSize.y && pz < p.gridSize.z && threadIdx.x % warpSize.x == 0 && threadIdx.y % warpSize.y == 0 && threadIdx.z % warpSize.z == 0) 134 | p.out.store(px / warpSize.x, py / warpSize.y, pz / warpSize.z, floss); 135 | } 136 | 137 | __global__ void imgLossBwdKernel(LossKernelParams p) 138 | { 139 | // Calculate pixel position. 140 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 141 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y; 142 | unsigned int pz = blockIdx.z; 143 | 144 | if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z) 145 | return; 146 | 147 | dim3 warpSize = getWarpSize(blockDim); 148 | 149 | vec3f _img = p.img.fetch3(px, py, pz); 150 | vec3f _target = p.target.fetch3(px, py, pz); 151 | float d_out = p.out.fetch1(px / warpSize.x, py / warpSize.y, pz / warpSize.z); 152 | 153 | ///////////////////////////////////////////////////////////////////// 154 | // FWD 155 | 156 | vec3f img = _img, target = _target; 157 | if (p.tonemapper == TONEMAPPER_LOG_SRGB) 158 | { 159 | img = fwdTonemapLogSRGB(img); 160 | target = fwdTonemapLogSRGB(target); 161 | } 162 | 163 | ///////////////////////////////////////////////////////////////////// 164 | // BWD 165 | 166 | vec3f d_vloss = vec3f(d_out, d_out, d_out) / 3.0f; 167 | 168 | vec3f d_img(0), d_target(0); 169 | if (p.loss == LOSS_MSE) 170 | { 171 | d_img = vec3f(d_vloss.x * 2 * (img.x - target.x), d_vloss.y * 2 * (img.y - target.y), d_vloss.x * 2 * (img.z - target.z)); 172 | d_target = -d_img; 173 | } 174 | else if (p.loss == LOSS_RELMSE) 175 | { 176 | bwdRELMSE(img.x, target.x, d_img.x, d_target.x, d_vloss.x); 177 | bwdRELMSE(img.y, target.y, d_img.y, d_target.y, d_vloss.y); 178 | bwdRELMSE(img.z, target.z, d_img.z, d_target.z, d_vloss.z); 179 | } 180 | else if (p.loss == LOSS_SMAPE) 181 | { 182 | bwdSMAPE(img.x, target.x, d_img.x, d_target.x, d_vloss.x); 183 | bwdSMAPE(img.y, target.y, d_img.y, d_target.y, d_vloss.y); 184 | bwdSMAPE(img.z, target.z, d_img.z, d_target.z, d_vloss.z); 185 | } 186 | else 187 | { 188 | d_img = d_vloss * vec3f(bwdAbs(img.x - target.x), bwdAbs(img.y - target.y), bwdAbs(img.z - target.z)); 189 | d_target = -d_img; 190 | } 191 | 192 | 193 | if (p.tonemapper == TONEMAPPER_LOG_SRGB) 194 | { 195 | vec3f d__img(0), d__target(0); 196 | bwdTonemapLogSRGB(_img, d__img, d_img); 197 | bwdTonemapLogSRGB(_target, d__target, d_target); 198 | d_img = d__img; d_target = d__target; 199 | } 200 | 201 | if (_img.x <= 0.0f || _img.x >= 65535.0f) d_img.x = 0; 202 | if (_img.y <= 0.0f || _img.y >= 65535.0f) d_img.y = 0; 203 | if (_img.z <= 0.0f || _img.z >= 65535.0f) d_img.z = 0; 204 | if (_target.x <= 0.0f || _target.x >= 65535.0f) d_target.x = 0; 205 | if (_target.y <= 0.0f || _target.y >= 65535.0f) d_target.y = 0; 206 | if (_target.z <= 0.0f || _target.z >= 65535.0f) d_target.z = 0; 207 | 208 | p.img.store_grad(px, py, pz, d_img); 209 | p.target.store_grad(px, py, pz, d_target); 210 | } -------------------------------------------------------------------------------- /render/renderutils/c_src/loss.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "common.h" 15 | 16 | enum TonemapperType 17 | { 18 | TONEMAPPER_NONE = 0, 19 | TONEMAPPER_LOG_SRGB = 1 20 | }; 21 | 22 | enum LossType 23 | { 24 | LOSS_L1 = 0, 25 | LOSS_MSE = 1, 26 | LOSS_RELMSE = 2, 27 | LOSS_SMAPE = 3 28 | }; 29 | 30 | struct LossKernelParams 31 | { 32 | Tensor img; 33 | Tensor target; 34 | Tensor out; 35 | dim3 gridSize; 36 | TonemapperType tonemapper; 37 | LossType loss; 38 | }; 39 | -------------------------------------------------------------------------------- /render/renderutils/c_src/mesh.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #include 13 | #include 14 | 15 | #include "common.h" 16 | #include "mesh.h" 17 | 18 | 19 | //------------------------------------------------------------------------ 20 | // Kernels 21 | 22 | __global__ void xfmPointsFwdKernel(XfmKernelParams p) 23 | { 24 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 25 | unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z; 26 | 27 | __shared__ float mtx[4][4]; 28 | if (threadIdx.x < 16) 29 | mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0)); 30 | __syncthreads(); 31 | 32 | if (px >= p.gridSize.x) 33 | return; 34 | 35 | vec3f pos( 36 | p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)), 37 | p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)), 38 | p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0)) 39 | ); 40 | 41 | if (p.isPoints) 42 | { 43 | p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0] + mtx[3][0]); 44 | p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1] + mtx[3][1]); 45 | p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2] + mtx[3][2]); 46 | p.out.store(p.out.nhwcIndex(pz, px, 3, 0), pos.x * mtx[0][3] + pos.y * mtx[1][3] + pos.z * mtx[2][3] + mtx[3][3]); 47 | } 48 | else 49 | { 50 | p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0]); 51 | p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1]); 52 | p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2]); 53 | } 54 | } 55 | 56 | __global__ void xfmPointsBwdKernel(XfmKernelParams p) 57 | { 58 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 59 | unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z; 60 | 61 | __shared__ float mtx[4][4]; 62 | if (threadIdx.x < 16) 63 | mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0)); 64 | __syncthreads(); 65 | 66 | if (px >= p.gridSize.x) 67 | return; 68 | 69 | vec3f pos( 70 | p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)), 71 | p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)), 72 | p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0)) 73 | ); 74 | 75 | vec4f d_out( 76 | p.out.fetch(p.out.nhwcIndex(pz, px, 0, 0)), 77 | p.out.fetch(p.out.nhwcIndex(pz, px, 1, 0)), 78 | p.out.fetch(p.out.nhwcIndex(pz, px, 2, 0)), 79 | p.out.fetch(p.out.nhwcIndex(pz, px, 3, 0)) 80 | ); 81 | 82 | if (p.isPoints) 83 | { 84 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2] + d_out.w * mtx[0][3]); 85 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2] + d_out.w * mtx[1][3]); 86 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2] + d_out.w * mtx[2][3]); 87 | } 88 | else 89 | { 90 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2]); 91 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2]); 92 | p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2]); 93 | } 94 | } -------------------------------------------------------------------------------- /render/renderutils/c_src/mesh.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "common.h" 15 | 16 | struct XfmKernelParams 17 | { 18 | bool isPoints; 19 | Tensor points; 20 | Tensor matrix; 21 | Tensor out; 22 | dim3 gridSize; 23 | }; 24 | -------------------------------------------------------------------------------- /render/renderutils/c_src/normal.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #include "common.h" 13 | #include "normal.h" 14 | 15 | #define NORMAL_THRESHOLD 0.1f 16 | 17 | //------------------------------------------------------------------------ 18 | // Perturb shading normal by tangent frame 19 | 20 | __device__ vec3f fwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, bool opengl) 21 | { 22 | vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm); 23 | vec3f smooth_bitng = safeNormalize(_smooth_bitng); 24 | vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f); 25 | return safeNormalize(_shading_nrm); 26 | } 27 | 28 | __device__ void bwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, vec3f &d_perturbed_nrm, vec3f &d_smooth_nrm, vec3f &d_smooth_tng, const vec3f d_out, bool opengl) 29 | { 30 | //////////////////////////////////////////////////////////////////////// 31 | // FWD 32 | vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm); 33 | vec3f smooth_bitng = safeNormalize(_smooth_bitng); 34 | vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f); 35 | 36 | //////////////////////////////////////////////////////////////////////// 37 | // BWD 38 | vec3f d_shading_nrm(0); 39 | bwdSafeNormalize(_shading_nrm, d_shading_nrm, d_out); 40 | 41 | vec3f d_smooth_bitng(0); 42 | 43 | if (perturbed_nrm.z > 0.0f) 44 | { 45 | d_smooth_nrm += d_shading_nrm * perturbed_nrm.z; 46 | d_perturbed_nrm.z += sum(d_shading_nrm * smooth_nrm); 47 | } 48 | 49 | d_smooth_bitng += (opengl ? -1 : 1) * d_shading_nrm * perturbed_nrm.y; 50 | d_perturbed_nrm.y += (opengl ? -1 : 1) * sum(d_shading_nrm * smooth_bitng); 51 | 52 | d_smooth_tng += d_shading_nrm * perturbed_nrm.x; 53 | d_perturbed_nrm.x += sum(d_shading_nrm * smooth_tng); 54 | 55 | vec3f d__smooth_bitng(0); 56 | bwdSafeNormalize(_smooth_bitng, d__smooth_bitng, d_smooth_bitng); 57 | 58 | bwdCross(smooth_tng, smooth_nrm, d_smooth_tng, d_smooth_nrm, d__smooth_bitng); 59 | } 60 | 61 | //------------------------------------------------------------------------ 62 | #define bent_nrm_eps 0.001f 63 | 64 | __device__ vec3f fwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm) 65 | { 66 | float dp = dot(view_vec, smooth_nrm); 67 | float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f); 68 | return geom_nrm * (1.0f - t) + smooth_nrm * t; 69 | } 70 | 71 | __device__ void bwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm, vec3f& d_view_vec, vec3f& d_smooth_nrm, vec3f& d_geom_nrm, const vec3f d_out) 72 | { 73 | //////////////////////////////////////////////////////////////////////// 74 | // FWD 75 | float dp = dot(view_vec, smooth_nrm); 76 | float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f); 77 | 78 | //////////////////////////////////////////////////////////////////////// 79 | // BWD 80 | if (dp > NORMAL_THRESHOLD) 81 | d_smooth_nrm += d_out; 82 | else 83 | { 84 | // geom_nrm * (1.0f - t) + smooth_nrm * t; 85 | d_geom_nrm += d_out * (1.0f - t); 86 | d_smooth_nrm += d_out * t; 87 | float d_t = sum(d_out * (smooth_nrm - geom_nrm)); 88 | 89 | float d_dp = dp < 0.0f || dp > NORMAL_THRESHOLD ? 0.0f : d_t / NORMAL_THRESHOLD; 90 | 91 | bwdDot(view_vec, smooth_nrm, d_view_vec, d_smooth_nrm, d_dp); 92 | } 93 | } 94 | 95 | //------------------------------------------------------------------------ 96 | // Kernels 97 | 98 | __global__ void PrepareShadingNormalFwdKernel(PrepareShadingNormalKernelParams p) 99 | { 100 | // Calculate pixel position. 101 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 102 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y; 103 | unsigned int pz = blockIdx.z; 104 | if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z) 105 | return; 106 | 107 | vec3f pos = p.pos.fetch3(px, py, pz); 108 | vec3f view_pos = p.view_pos.fetch3(px, py, pz); 109 | vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz); 110 | vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz); 111 | vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz); 112 | vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz); 113 | 114 | vec3f smooth_nrm = safeNormalize(_smooth_nrm); 115 | vec3f smooth_tng = safeNormalize(_smooth_tng); 116 | vec3f view_vec = safeNormalize(view_pos - pos); 117 | vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl); 118 | 119 | vec3f res; 120 | if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f) 121 | res = fwdBendNormal(view_vec, -shading_nrm, -geom_nrm); 122 | else 123 | res = fwdBendNormal(view_vec, shading_nrm, geom_nrm); 124 | 125 | p.out.store(px, py, pz, res); 126 | } 127 | 128 | __global__ void PrepareShadingNormalBwdKernel(PrepareShadingNormalKernelParams p) 129 | { 130 | // Calculate pixel position. 131 | unsigned int px = blockIdx.x * blockDim.x + threadIdx.x; 132 | unsigned int py = blockIdx.y * blockDim.y + threadIdx.y; 133 | unsigned int pz = blockIdx.z; 134 | if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z) 135 | return; 136 | 137 | vec3f pos = p.pos.fetch3(px, py, pz); 138 | vec3f view_pos = p.view_pos.fetch3(px, py, pz); 139 | vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz); 140 | vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz); 141 | vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz); 142 | vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz); 143 | vec3f d_out = p.out.fetch3(px, py, pz); 144 | 145 | /////////////////////////////////////////////////////////////////////////////////////////////////// 146 | // FWD 147 | 148 | vec3f smooth_nrm = safeNormalize(_smooth_nrm); 149 | vec3f smooth_tng = safeNormalize(_smooth_tng); 150 | vec3f _view_vec = view_pos - pos; 151 | vec3f view_vec = safeNormalize(view_pos - pos); 152 | 153 | vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl); 154 | 155 | /////////////////////////////////////////////////////////////////////////////////////////////////// 156 | // BWD 157 | 158 | vec3f d_view_vec(0), d_shading_nrm(0), d_geom_nrm(0); 159 | if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f) 160 | { 161 | bwdBendNormal(view_vec, -shading_nrm, -geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out); 162 | d_shading_nrm = -d_shading_nrm; 163 | d_geom_nrm = -d_geom_nrm; 164 | } 165 | else 166 | bwdBendNormal(view_vec, shading_nrm, geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out); 167 | 168 | vec3f d_perturbed_nrm(0), d_smooth_nrm(0), d_smooth_tng(0); 169 | bwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, d_perturbed_nrm, d_smooth_nrm, d_smooth_tng, d_shading_nrm, p.opengl); 170 | 171 | vec3f d__view_vec(0), d__smooth_nrm(0), d__smooth_tng(0); 172 | bwdSafeNormalize(_view_vec, d__view_vec, d_view_vec); 173 | bwdSafeNormalize(_smooth_nrm, d__smooth_nrm, d_smooth_nrm); 174 | bwdSafeNormalize(_smooth_tng, d__smooth_tng, d_smooth_tng); 175 | 176 | p.pos.store_grad(px, py, pz, -d__view_vec); 177 | p.view_pos.store_grad(px, py, pz, d__view_vec); 178 | p.perturbed_nrm.store_grad(px, py, pz, d_perturbed_nrm); 179 | p.smooth_nrm.store_grad(px, py, pz, d__smooth_nrm); 180 | p.smooth_tng.store_grad(px, py, pz, d__smooth_tng); 181 | p.geom_nrm.store_grad(px, py, pz, d_geom_nrm); 182 | } -------------------------------------------------------------------------------- /render/renderutils/c_src/normal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "common.h" 15 | 16 | struct PrepareShadingNormalKernelParams 17 | { 18 | Tensor pos; 19 | Tensor view_pos; 20 | Tensor perturbed_nrm; 21 | Tensor smooth_nrm; 22 | Tensor smooth_tng; 23 | Tensor geom_nrm; 24 | Tensor out; 25 | dim3 gridSize; 26 | bool two_sided_shading, opengl; 27 | }; 28 | -------------------------------------------------------------------------------- /render/renderutils/c_src/tensor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | #if defined(__CUDACC__) && defined(BFLOAT16) 14 | #include // bfloat16 is float32 compatible with less mantissa bits 15 | #endif 16 | 17 | //--------------------------------------------------------------------------------- 18 | // CUDA-side Tensor class for in/out parameter parsing. Can be float32 or bfloat16 19 | 20 | struct Tensor 21 | { 22 | void* val; 23 | void* d_val; 24 | int dims[4], _dims[4]; 25 | int strides[4]; 26 | bool fp16; 27 | 28 | #if defined(__CUDA__) && !defined(__CUDA_ARCH__) 29 | Tensor() : val(nullptr), d_val(nullptr), fp16(true), dims{ 0, 0, 0, 0 }, _dims{ 0, 0, 0, 0 }, strides{ 0, 0, 0, 0 } {} 30 | #endif 31 | 32 | #ifdef __CUDACC__ 33 | // Helpers to index and read/write a single element 34 | __device__ inline int _nhwcIndex(int n, int h, int w, int c) const { return n * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]; } 35 | __device__ inline int nhwcIndex(int n, int h, int w, int c) const { return (dims[0] == 1 ? 0 : n * strides[0]) + (dims[1] == 1 ? 0 : h * strides[1]) + (dims[2] == 1 ? 0 : w * strides[2]) + (dims[3] == 1 ? 0 : c * strides[3]); } 36 | __device__ inline int nhwcIndexContinuous(int n, int h, int w, int c) const { return ((n * _dims[1] + h) * _dims[2] + w) * _dims[3] + c; } 37 | #ifdef BFLOAT16 38 | __device__ inline float fetch(unsigned int idx) const { return fp16 ? __bfloat162float(((__nv_bfloat16*)val)[idx]) : ((float*)val)[idx]; } 39 | __device__ inline void store(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)val)[idx] = __float2bfloat16(_val); else ((float*)val)[idx] = _val; } 40 | __device__ inline void store_grad(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)d_val)[idx] = __float2bfloat16(_val); else ((float*)d_val)[idx] = _val; } 41 | #else 42 | __device__ inline float fetch(unsigned int idx) const { return ((float*)val)[idx]; } 43 | __device__ inline void store(unsigned int idx, float _val) { ((float*)val)[idx] = _val; } 44 | __device__ inline void store_grad(unsigned int idx, float _val) { ((float*)d_val)[idx] = _val; } 45 | #endif 46 | 47 | ////////////////////////////////////////////////////////////////////////////////////////// 48 | // Fetch, use broadcasting for tensor dimensions of size 1 49 | __device__ inline float fetch1(unsigned int x, unsigned int y, unsigned int z) const 50 | { 51 | return fetch(nhwcIndex(z, y, x, 0)); 52 | } 53 | 54 | __device__ inline vec3f fetch3(unsigned int x, unsigned int y, unsigned int z) const 55 | { 56 | return vec3f( 57 | fetch(nhwcIndex(z, y, x, 0)), 58 | fetch(nhwcIndex(z, y, x, 1)), 59 | fetch(nhwcIndex(z, y, x, 2)) 60 | ); 61 | } 62 | 63 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////// 64 | // Store, no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside 65 | __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, float _val) 66 | { 67 | store(_nhwcIndex(z, y, x, 0), _val); 68 | } 69 | 70 | __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, vec3f _val) 71 | { 72 | store(_nhwcIndex(z, y, x, 0), _val.x); 73 | store(_nhwcIndex(z, y, x, 1), _val.y); 74 | store(_nhwcIndex(z, y, x, 2), _val.z); 75 | } 76 | 77 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////// 78 | // Store gradient , no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside 79 | __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, float _val) 80 | { 81 | store_grad(nhwcIndexContinuous(z, y, x, 0), _val); 82 | } 83 | 84 | __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, vec3f _val) 85 | { 86 | store_grad(nhwcIndexContinuous(z, y, x, 0), _val.x); 87 | store_grad(nhwcIndexContinuous(z, y, x, 1), _val.y); 88 | store_grad(nhwcIndexContinuous(z, y, x, 2), _val.z); 89 | } 90 | #endif 91 | 92 | }; 93 | -------------------------------------------------------------------------------- /render/renderutils/c_src/vec3f.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | struct vec3f 15 | { 16 | float x, y, z; 17 | 18 | #ifdef __CUDACC__ 19 | __device__ vec3f() { } 20 | __device__ vec3f(float v) { x = v; y = v; z = v; } 21 | __device__ vec3f(float _x, float _y, float _z) { x = _x; y = _y; z = _z; } 22 | __device__ vec3f(float3 v) { x = v.x; y = v.y; z = v.z; } 23 | 24 | __device__ inline vec3f& operator+=(const vec3f& b) { x += b.x; y += b.y; z += b.z; return *this; } 25 | __device__ inline vec3f& operator-=(const vec3f& b) { x -= b.x; y -= b.y; z -= b.z; return *this; } 26 | __device__ inline vec3f& operator*=(const vec3f& b) { x *= b.x; y *= b.y; z *= b.z; return *this; } 27 | __device__ inline vec3f& operator/=(const vec3f& b) { x /= b.x; y /= b.y; z /= b.z; return *this; } 28 | #endif 29 | }; 30 | 31 | #ifdef __CUDACC__ 32 | __device__ static inline vec3f operator+(const vec3f& a, const vec3f& b) { return vec3f(a.x + b.x, a.y + b.y, a.z + b.z); } 33 | __device__ static inline vec3f operator-(const vec3f& a, const vec3f& b) { return vec3f(a.x - b.x, a.y - b.y, a.z - b.z); } 34 | __device__ static inline vec3f operator*(const vec3f& a, const vec3f& b) { return vec3f(a.x * b.x, a.y * b.y, a.z * b.z); } 35 | __device__ static inline vec3f operator/(const vec3f& a, const vec3f& b) { return vec3f(a.x / b.x, a.y / b.y, a.z / b.z); } 36 | __device__ static inline vec3f operator-(const vec3f& a) { return vec3f(-a.x, -a.y, -a.z); } 37 | 38 | __device__ static inline float sum(vec3f a) 39 | { 40 | return a.x + a.y + a.z; 41 | } 42 | 43 | __device__ static inline vec3f cross(vec3f a, vec3f b) 44 | { 45 | vec3f out; 46 | out.x = a.y * b.z - a.z * b.y; 47 | out.y = a.z * b.x - a.x * b.z; 48 | out.z = a.x * b.y - a.y * b.x; 49 | return out; 50 | } 51 | 52 | __device__ static inline void bwdCross(vec3f a, vec3f b, vec3f &d_a, vec3f &d_b, vec3f d_out) 53 | { 54 | d_a.x += d_out.z * b.y - d_out.y * b.z; 55 | d_a.y += d_out.x * b.z - d_out.z * b.x; 56 | d_a.z += d_out.y * b.x - d_out.x * b.y; 57 | 58 | d_b.x += d_out.y * a.z - d_out.z * a.y; 59 | d_b.y += d_out.z * a.x - d_out.x * a.z; 60 | d_b.z += d_out.x * a.y - d_out.y * a.x; 61 | } 62 | 63 | __device__ static inline float dot(vec3f a, vec3f b) 64 | { 65 | return a.x * b.x + a.y * b.y + a.z * b.z; 66 | } 67 | 68 | __device__ static inline void bwdDot(vec3f a, vec3f b, vec3f& d_a, vec3f& d_b, float d_out) 69 | { 70 | d_a.x += d_out * b.x; d_a.y += d_out * b.y; d_a.z += d_out * b.z; 71 | d_b.x += d_out * a.x; d_b.y += d_out * a.y; d_b.z += d_out * a.z; 72 | } 73 | 74 | __device__ static inline vec3f reflect(vec3f x, vec3f n) 75 | { 76 | return n * 2.0f * dot(n, x) - x; 77 | } 78 | 79 | __device__ static inline void bwdReflect(vec3f x, vec3f n, vec3f& d_x, vec3f& d_n, const vec3f d_out) 80 | { 81 | d_x.x += d_out.x * (2 * n.x * n.x - 1) + d_out.y * (2 * n.x * n.y) + d_out.z * (2 * n.x * n.z); 82 | d_x.y += d_out.x * (2 * n.x * n.y) + d_out.y * (2 * n.y * n.y - 1) + d_out.z * (2 * n.y * n.z); 83 | d_x.z += d_out.x * (2 * n.x * n.z) + d_out.y * (2 * n.y * n.z) + d_out.z * (2 * n.z * n.z - 1); 84 | 85 | d_n.x += d_out.x * (2 * (2 * n.x * x.x + n.y * x.y + n.z * x.z)) + d_out.y * (2 * n.y * x.x) + d_out.z * (2 * n.z * x.x); 86 | d_n.y += d_out.x * (2 * n.x * x.y) + d_out.y * (2 * (n.x * x.x + 2 * n.y * x.y + n.z * x.z)) + d_out.z * (2 * n.z * x.y); 87 | d_n.z += d_out.x * (2 * n.x * x.z) + d_out.y * (2 * n.y * x.z) + d_out.z * (2 * (n.x * x.x + n.y * x.y + 2 * n.z * x.z)); 88 | } 89 | 90 | __device__ static inline vec3f safeNormalize(vec3f v) 91 | { 92 | float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); 93 | return l > 0.0f ? (v / l) : vec3f(0.0f); 94 | } 95 | 96 | __device__ static inline void bwdSafeNormalize(const vec3f v, vec3f& d_v, const vec3f d_out) 97 | { 98 | 99 | float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); 100 | if (l > 0.0f) 101 | { 102 | float fac = 1.0 / powf(v.x * v.x + v.y * v.y + v.z * v.z, 1.5f); 103 | d_v.x += (d_out.x * (v.y * v.y + v.z * v.z) - d_out.y * (v.x * v.y) - d_out.z * (v.x * v.z)) * fac; 104 | d_v.y += (d_out.y * (v.x * v.x + v.z * v.z) - d_out.x * (v.y * v.x) - d_out.z * (v.y * v.z)) * fac; 105 | d_v.z += (d_out.z * (v.x * v.x + v.y * v.y) - d_out.x * (v.z * v.x) - d_out.y * (v.z * v.y)) * fac; 106 | } 107 | } 108 | 109 | #endif -------------------------------------------------------------------------------- /render/renderutils/c_src/vec4f.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 5 | * property and proprietary rights in and to this material, related 6 | * documentation and any modifications thereto. Any use, reproduction, 7 | * disclosure or distribution of this material and related documentation 8 | * without an express license agreement from NVIDIA CORPORATION or 9 | * its affiliates is strictly prohibited. 10 | */ 11 | 12 | #pragma once 13 | 14 | struct vec4f 15 | { 16 | float x, y, z, w; 17 | 18 | #ifdef __CUDACC__ 19 | __device__ vec4f() { } 20 | __device__ vec4f(float v) { x = v; y = v; z = v; w = v; } 21 | __device__ vec4f(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; } 22 | __device__ vec4f(float4 v) { x = v.x; y = v.y; z = v.z; w = v.w; } 23 | #endif 24 | }; 25 | 26 | -------------------------------------------------------------------------------- /render/renderutils/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | 12 | #---------------------------------------------------------------------------- 13 | # HDR image losses 14 | #---------------------------------------------------------------------------- 15 | 16 | def _tonemap_srgb(f, exposure=5): 17 | f = f * exposure 18 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f) 19 | 20 | def _SMAPE(img, target, eps=0.01): 21 | nom = torch.abs(img - target) 22 | denom = torch.abs(img) + torch.abs(target) + 0.01 23 | return torch.mean(nom / denom) 24 | 25 | def _RELMSE(img, target, eps=0.1): 26 | nom = (img - target) * (img - target) 27 | denom = img * img + target * target + 0.1 28 | return torch.mean(nom / denom) 29 | 30 | def image_loss_fn(img, target, loss, tonemapper): 31 | if tonemapper == 'log_srgb': 32 | img = _tonemap_srgb(torch.log(torch.clamp(img, min=0, max=65535) + 1)) 33 | target = _tonemap_srgb(torch.log(torch.clamp(target, min=0, max=65535) + 1)) 34 | 35 | if loss == 'mse': 36 | return torch.nn.functional.mse_loss(img, target) 37 | elif loss == 'smape': 38 | return _SMAPE(img, target) 39 | elif loss == 'relmse': 40 | return _RELMSE(img, target) 41 | else: 42 | return torch.nn.functional.l1_loss(img, target) 43 | -------------------------------------------------------------------------------- /render/renderutils/tests/test_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | 12 | import os 13 | import sys 14 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 15 | import renderutils as ru 16 | 17 | RES = 8 18 | DTYPE = torch.float32 19 | 20 | def tonemap_srgb(f): 21 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f) 22 | 23 | def l1(output, target): 24 | x = torch.clamp(output, min=0, max=65535) 25 | r = torch.clamp(target, min=0, max=65535) 26 | x = tonemap_srgb(torch.log(x + 1)) 27 | r = tonemap_srgb(torch.log(r + 1)) 28 | return torch.nn.functional.l1_loss(x,r) 29 | 30 | def relative_loss(name, ref, cuda): 31 | ref = ref.float() 32 | cuda = cuda.float() 33 | print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item()) 34 | 35 | def test_loss(loss, tonemapper): 36 | img_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 37 | img_ref = img_cuda.clone().detach().requires_grad_(True) 38 | target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 39 | target_ref = target_cuda.clone().detach().requires_grad_(True) 40 | 41 | ref_loss = ru.image_loss(img_ref, target_ref, loss=loss, tonemapper=tonemapper, use_python=True) 42 | ref_loss.backward() 43 | 44 | cuda_loss = ru.image_loss(img_cuda, target_cuda, loss=loss, tonemapper=tonemapper) 45 | cuda_loss.backward() 46 | 47 | print("-------------------------------------------------------------") 48 | print(" Loss: %s, %s" % (loss, tonemapper)) 49 | print("-------------------------------------------------------------") 50 | 51 | relative_loss("res:", ref_loss, cuda_loss) 52 | relative_loss("img:", img_ref.grad, img_cuda.grad) 53 | relative_loss("target:", target_ref.grad, target_cuda.grad) 54 | 55 | 56 | test_loss('l1', 'none') 57 | test_loss('l1', 'log_srgb') 58 | test_loss('mse', 'log_srgb') 59 | test_loss('smape', 'none') 60 | test_loss('relmse', 'none') 61 | test_loss('mse', 'none') -------------------------------------------------------------------------------- /render/renderutils/tests/test_mesh.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | 12 | import os 13 | import sys 14 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 15 | import renderutils as ru 16 | 17 | BATCH = 8 18 | RES = 1024 19 | DTYPE = torch.float32 20 | 21 | torch.manual_seed(0) 22 | 23 | def tonemap_srgb(f): 24 | return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f) 25 | 26 | def l1(output, target): 27 | x = torch.clamp(output, min=0, max=65535) 28 | r = torch.clamp(target, min=0, max=65535) 29 | x = tonemap_srgb(torch.log(x + 1)) 30 | r = tonemap_srgb(torch.log(r + 1)) 31 | return torch.nn.functional.l1_loss(x,r) 32 | 33 | def relative_loss(name, ref, cuda): 34 | ref = ref.float() 35 | cuda = cuda.float() 36 | print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref)).item()) 37 | 38 | def test_xfm_points(): 39 | points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 40 | points_ref = points_cuda.clone().detach().requires_grad_(True) 41 | mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False) 42 | mtx_ref = mtx_cuda.clone().detach().requires_grad_(True) 43 | target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True) 44 | 45 | ref_out = ru.xfm_points(points_ref, mtx_ref, use_python=True) 46 | ref_loss = torch.nn.MSELoss()(ref_out, target) 47 | ref_loss.backward() 48 | 49 | cuda_out = ru.xfm_points(points_cuda, mtx_cuda) 50 | cuda_loss = torch.nn.MSELoss()(cuda_out, target) 51 | cuda_loss.backward() 52 | 53 | print("-------------------------------------------------------------") 54 | 55 | relative_loss("res:", ref_out, cuda_out) 56 | relative_loss("points:", points_ref.grad, points_cuda.grad) 57 | 58 | def test_xfm_vectors(): 59 | points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 60 | points_ref = points_cuda.clone().detach().requires_grad_(True) 61 | points_cuda_p = points_cuda.clone().detach().requires_grad_(True) 62 | points_ref_p = points_cuda.clone().detach().requires_grad_(True) 63 | mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False) 64 | mtx_ref = mtx_cuda.clone().detach().requires_grad_(True) 65 | target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True) 66 | 67 | ref_out = ru.xfm_vectors(points_ref.contiguous(), mtx_ref, use_python=True) 68 | ref_loss = torch.nn.MSELoss()(ref_out, target[..., 0:3]) 69 | ref_loss.backward() 70 | 71 | cuda_out = ru.xfm_vectors(points_cuda.contiguous(), mtx_cuda) 72 | cuda_loss = torch.nn.MSELoss()(cuda_out, target[..., 0:3]) 73 | cuda_loss.backward() 74 | 75 | ref_out_p = ru.xfm_points(points_ref_p.contiguous(), mtx_ref, use_python=True) 76 | ref_loss_p = torch.nn.MSELoss()(ref_out_p, target) 77 | ref_loss_p.backward() 78 | 79 | cuda_out_p = ru.xfm_points(points_cuda_p.contiguous(), mtx_cuda) 80 | cuda_loss_p = torch.nn.MSELoss()(cuda_out_p, target) 81 | cuda_loss_p.backward() 82 | 83 | print("-------------------------------------------------------------") 84 | 85 | relative_loss("res:", ref_out, cuda_out) 86 | relative_loss("points:", points_ref.grad, points_cuda.grad) 87 | relative_loss("points_p:", points_ref_p.grad, points_cuda_p.grad) 88 | 89 | test_xfm_points() 90 | test_xfm_vectors() 91 | -------------------------------------------------------------------------------- /render/renderutils/tests/test_perf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 4 | # property and proprietary rights in and to this material, related 5 | # documentation and any modifications thereto. Any use, reproduction, 6 | # disclosure or distribution of this material and related documentation 7 | # without an express license agreement from NVIDIA CORPORATION or 8 | # its affiliates is strictly prohibited. 9 | 10 | import torch 11 | 12 | import os 13 | import sys 14 | sys.path.insert(0, os.path.join(sys.path[0], '../..')) 15 | import renderutils as ru 16 | 17 | DTYPE=torch.float32 18 | 19 | def test_bsdf(BATCH, RES, ITR): 20 | kd_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 21 | kd_ref = kd_cuda.clone().detach().requires_grad_(True) 22 | arm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 23 | arm_ref = arm_cuda.clone().detach().requires_grad_(True) 24 | pos_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 25 | pos_ref = pos_cuda.clone().detach().requires_grad_(True) 26 | nrm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 27 | nrm_ref = nrm_cuda.clone().detach().requires_grad_(True) 28 | view_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 29 | view_ref = view_cuda.clone().detach().requires_grad_(True) 30 | light_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True) 31 | light_ref = light_cuda.clone().detach().requires_grad_(True) 32 | target = torch.rand(BATCH, RES, RES, 3, device='cuda') 33 | 34 | start = torch.cuda.Event(enable_timing=True) 35 | end = torch.cuda.Event(enable_timing=True) 36 | 37 | ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda) 38 | 39 | print("--- Testing: [%d, %d, %d] ---" % (BATCH, RES, RES)) 40 | 41 | start.record() 42 | for i in range(ITR): 43 | ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True) 44 | end.record() 45 | torch.cuda.synchronize() 46 | print("Pbr BSDF python:", start.elapsed_time(end)) 47 | 48 | start.record() 49 | for i in range(ITR): 50 | cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda) 51 | end.record() 52 | torch.cuda.synchronize() 53 | print("Pbr BSDF cuda:", start.elapsed_time(end)) 54 | 55 | test_bsdf(1, 512, 1000) 56 | test_bsdf(16, 512, 1000) 57 | test_bsdf(1, 2048, 1000) 58 | --------------------------------------------------------------------------------