├── .gitignore
├── GMeshDiffusion
    ├── diffusion_configs
    │   ├── config_lower_occgrid_normalized.py
    │   └── config_upper_occgrid_normalized.py
    ├── lib
    │   ├── dataset
    │   │   ├── gshell_dataset.py
    │   │   └── gshell_dataset_aug.py
    │   └── diffusion
    │   │   ├── evaler.py
    │   │   ├── likelihood.py
    │   │   ├── losses.py
    │   │   ├── models
    │   │       ├── __init__.py
    │   │       ├── ema.py
    │   │       ├── functional.py
    │   │       ├── layers.py
    │   │       ├── normalization.py
    │   │       ├── unet3d_occgrid.py
    │   │       └── utils.py
    │   │   ├── sampling.py
    │   │   ├── sde_lib.py
    │   │   ├── trainer.py
    │   │   ├── trainer_ddp.py
    │   │   └── utils.py
    ├── main_diffusion.py
    ├── main_diffusion_ddp.py
    ├── metadata
    │   ├── get_splits_lower.py
    │   ├── get_splits_upper.py
    │   ├── save_tet_info.py
    │   └── tet_to_cubic_grid_dataset.py
    └── scripts
    │   ├── run_eval_lower_occgrid_normalized.sh
    │   ├── run_eval_upper_occgrid_normalized.sh
    │   ├── run_lower_occgrid_normalized_ddp.sh
    │   └── run_upper_occgrid_normalized_ddp.sh
├── README.md
├── assets
    ├── gshell_logo.png
    └── teaser.png
├── configs
    ├── deepfashion_mc.json
    ├── deepfashion_mc_256.json
    ├── deepfashion_mc_512.json
    ├── deepfashion_mc_80.json
    ├── nerf_chair.json
    ├── polycam_mc.json
    ├── polycam_mc_128.json
    └── polycam_mc_16samples.json
├── data
    └── tets
    │   └── generate_tets.py
├── dataset
    ├── __init__.py
    ├── dataset.py
    ├── dataset_deepfashion.py
    ├── dataset_deepfashion_testset.py
    ├── dataset_llff.py
    ├── dataset_mesh.py
    ├── dataset_nerf.py
    └── dataset_nerf_colmap.py
├── denoiser
    └── denoiser.py
├── eval_gmeshdiffusion_generated_samples.py
├── geometry
    ├── embedding.py
    ├── flexicubes_table.py
    ├── gshell_flexicubes.py
    ├── gshell_flexicubes_geometry.py
    ├── gshell_tets.py
    ├── gshell_tets_geometry.py
    └── mlp.py
├── render
    ├── light.py
    ├── material.py
    ├── mesh.py
    ├── mlptexture.py
    ├── obj.py
    ├── optixutils
    │   ├── __init__.py
    │   ├── c_src
    │   │   ├── accessor.h
    │   │   ├── bsdf.h
    │   │   ├── common.h
    │   │   ├── denoising.cu
    │   │   ├── denoising.h
    │   │   ├── envsampling
    │   │   │   ├── kernel.cu
    │   │   │   └── params.h
    │   │   ├── math_utils.h
    │   │   ├── optix_wrapper.cpp
    │   │   ├── optix_wrapper.h
    │   │   └── torch_bindings.cpp
    │   ├── include
    │   │   ├── internal
    │   │   │   ├── optix_7_device_impl.h
    │   │   │   ├── optix_7_device_impl_exception.h
    │   │   │   └── optix_7_device_impl_transformations.h
    │   │   ├── optix.h
    │   │   ├── optix_7_device.h
    │   │   ├── optix_7_host.h
    │   │   ├── optix_7_types.h
    │   │   ├── optix_denoiser_tiling.h
    │   │   ├── optix_device.h
    │   │   ├── optix_function_table.h
    │   │   ├── optix_function_table_definition.h
    │   │   ├── optix_host.h
    │   │   ├── optix_stack_size.h
    │   │   ├── optix_stubs.h
    │   │   └── optix_types.h
    │   ├── ops.py
    │   └── tests
    │   │   └── filter_test.py
    ├── regularizer.py
    ├── render.py
    ├── renderutils
    │   ├── __init__.py
    │   ├── bsdf.py
    │   ├── c_src
    │   │   ├── bsdf.cu
    │   │   ├── bsdf.h
    │   │   ├── common.cpp
    │   │   ├── common.h
    │   │   ├── cubemap.cu
    │   │   ├── cubemap.h
    │   │   ├── loss.cu
    │   │   ├── loss.h
    │   │   ├── mesh.cu
    │   │   ├── mesh.h
    │   │   ├── normal.cu
    │   │   ├── normal.h
    │   │   ├── tensor.h
    │   │   ├── torch_bindings.cpp
    │   │   ├── vec3f.h
    │   │   └── vec4f.h
    │   ├── loss.py
    │   ├── ops.py
    │   └── tests
    │   │   ├── test_bsdf.py
    │   │   ├── test_loss.py
    │   │   ├── test_mesh.py
    │   │   └── test_perf.py
    ├── texture.py
    └── util.py
├── train_gflexicubes_deepfashion.py
├── train_gflexicubes_polycam.py
├── train_gshelltet_deepfashion.py
├── train_gshelltet_polycam.py
└── train_gshelltet_synthetic.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | # lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .DS_STORE
163 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/diffusion_configs/config_lower_occgrid_normalized.py:
--------------------------------------------------------------------------------
  1 | import ml_collections
  2 | import torch
  3 | import os
  4 | 
  5 | 
  6 | def get_config():
  7 |     config = ml_collections.ConfigDict()
  8 | 
  9 |     # data
 10 |     data = config.data = ml_collections.ConfigDict()
 11 |     data.root_dir = 'PLACEHOLDER'
 12 |     # data.dataset_metapath = os.path.join(data.root_dir, 'metadata/lower_res64_train.txt')
 13 |     data.num_workers = 4
 14 |     data.grid_size = 128
 15 |     data.tet_resolution = 64
 16 |     data.num_channels = 4
 17 |     data.use_occ_grid = True
 18 |     data.grid_metafile = os.path.join(data.root_dir, 'metadata/lower_res64_grid_train.txt')
 19 |     data.occgrid_metafile = os.path.join(data.root_dir, 'metadata/lower_res64_occgrid_train.txt')
 20 | 
 21 |     data.occ_mask_path = os.path.join(data.root_dir, 'metadata/occ_mask_res64.pt')
 22 |     data.tet_info_path = os.path.join(data.root_dir, 'metadata/tet_info.pt')
 23 | 
 24 |     data.filter_meta_path = None
 25 |     data.aug = True
 26 | 
 27 |     # training
 28 |     training = config.training = ml_collections.ConfigDict()
 29 |     training.sde = 'vpsde'
 30 |     training.continuous = False
 31 |     training.reduce_mean = True
 32 |     training.batch_size = 1 ### for DDP, global_batch_size = nproc * local_batch_size
 33 |     training.num_grad_acc_steps = 4 
 34 |     training.n_iters = 2400001
 35 |     training.snapshot_freq = 1000
 36 |     training.log_freq = 50
 37 |     ## produce samples at each snapshot.
 38 |     training.snapshot_sampling = True
 39 |     training.likelihood_weighting = False
 40 |     training.loss_type = 'l2'
 41 |     training.train_dir = "PLACEHOLDER"
 42 |     training.snapshot_freq_for_preemption = 1000
 43 |     training.gradscaler_growth_interval = 1000
 44 |     training.use_aux_loss = False
 45 | 
 46 | 
 47 |     training.compile = True # PyTorch 2.0, torch.compile
 48 |     training.enable_xformers_memory_efficient_attention = True
 49 | 
 50 |     # sampling
 51 |     sampling = config.sampling = ml_collections.ConfigDict()
 52 |     sampling.method = 'pc'
 53 |     sampling.predictor = 'ancestral_sampling'
 54 |     sampling.corrector = 'none'
 55 |     sampling.n_steps_each = 1
 56 |     sampling.noise_removal = True
 57 |     sampling.probability_flow = False
 58 |     sampling.snr = 0.075
 59 | 
 60 | 
 61 |     # model
 62 |     model = config.model = ml_collections.ConfigDict()
 63 |     model.name = 'unet3d_occgrid'
 64 |     model.use_occ_grid = True
 65 |     model.num_res_blocks = 2
 66 |     model.num_res_blocks_1st_layer = 2
 67 |     model.base_channels = 128
 68 |     model.ch_mult = (1, 2, 2, 4, 4, 4)
 69 |     model.down_block_types = (
 70 |         "ResBlock", "ResBlock", "ResBlock", "AttnResBlock", "ResBlock", "ResBlock"
 71 |     )
 72 |     model.up_block_types = (
 73 |        "ResBlock", "ResBlock", "AttnResBlock", "ResBlock", "ResBlock", "ResBlock"
 74 |     )
 75 |     model.scale_by_sigma = False
 76 |     model.num_scales = 1000
 77 |     model.ema_rate = 0.9999
 78 |     model.normalization = 'GroupNorm'
 79 |     model.act_fn = 'swish'
 80 |     model.attn_resolutions = (16,)
 81 |     model.resamp_with_conv = True
 82 |     model.dropout = 0.1
 83 |     model.sigma_max = 378
 84 |     model.sigma_min = 0.01
 85 |     model.beta_min = 0.1
 86 |     model.beta_max = 20.
 87 |     model.embedding_type = 'fourier'
 88 |     model.pred_type = 'noise'
 89 |     model.conditional = True
 90 | 
 91 |     model.feature_mask_path = os.path.join(data.root_dir, 'metadata/global_mask_res64.pt')
 92 |     model.pixcat_mask_path = os.path.join(data.root_dir, 'metadata/cat_mask_res64.pt')
 93 | 
 94 |     # optimization
 95 |     config.optim = optim = ml_collections.ConfigDict()
 96 |     optim.weight_decay = 1e-5
 97 |     optim.optimizer = 'AdamW'
 98 |     optim.lr = 1e-5
 99 |     optim.beta1 = 0.9
100 |     optim.eps = 1e-8
101 |     optim.warmup = 5000
102 |     optim.grad_clip = 1.
103 | 
104 |     # eval
105 |     config.eval = eval_config = ml_collections.ConfigDict()
106 |     eval_config.batch_size = 2
107 |     eval_config.idx = 0
108 |     eval_config.bin_size = 30
109 |     eval_config.eval_dir = "PLACEHOLDER"
110 |     eval_config.ckpt_path = "PLACEHOLDER"
111 |     
112 | 
113 |     config.seed = 42
114 |     config.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
115 | 
116 | 
117 |     return config
118 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/diffusion_configs/config_upper_occgrid_normalized.py:
--------------------------------------------------------------------------------
  1 | import ml_collections
  2 | import torch
  3 | import os
  4 | 
  5 | 
  6 | def get_config():
  7 |     config = ml_collections.ConfigDict()
  8 | 
  9 |     # data
 10 |     data = config.data = ml_collections.ConfigDict()
 11 |     data.root_dir = 'PLACEHOLDER'
 12 |     # data.dataset_metapath = os.path.join(data.root_dir, 'metadata/upper_res64_train.txt')
 13 |     data.num_workers = 4
 14 |     data.grid_size = 128
 15 |     data.tet_resolution = 64
 16 |     data.num_channels = 4
 17 |     data.use_occ_grid = True
 18 |     data.grid_metafile = os.path.join(data.root_dir, 'metadata/upper_res64_grid_train.txt')
 19 |     data.occgrid_metafile = os.path.join(data.root_dir, 'metadata/upper_res64_occgrid_train.txt')
 20 | 
 21 |     data.occ_mask_path = os.path.join(data.root_dir, 'metadata/occ_mask_res64.pt')
 22 |     data.tet_info_path = os.path.join(data.root_dir, 'metadata/tet_info.pt')
 23 | 
 24 |     data.filter_meta_path = None
 25 |     data.aug = True
 26 | 
 27 |     # training
 28 |     training = config.training = ml_collections.ConfigDict()
 29 |     training.sde = 'vpsde'
 30 |     training.continuous = False
 31 |     training.reduce_mean = True
 32 |     training.batch_size = 1 ### for DDP, global_batch_size = nproc * local_batch_size
 33 |     training.num_grad_acc_steps = 4 
 34 |     training.n_iters = 2400001
 35 |     training.snapshot_freq = 1000
 36 |     training.log_freq = 50
 37 |     ## produce samples at each snapshot.
 38 |     training.snapshot_sampling = True
 39 |     training.likelihood_weighting = False
 40 |     training.loss_type = 'l2'
 41 |     training.train_dir = "PLACEHOLDER"
 42 |     training.snapshot_freq_for_preemption = 1000
 43 |     training.gradscaler_growth_interval = 1000
 44 |     training.use_aux_loss = False
 45 | 
 46 | 
 47 |     training.compile = True # PyTorch 2.0, torch.compile
 48 |     training.enable_xformers_memory_efficient_attention = True
 49 | 
 50 |     # sampling
 51 |     sampling = config.sampling = ml_collections.ConfigDict()
 52 |     sampling.method = 'pc'
 53 |     sampling.predictor = 'ancestral_sampling'
 54 |     sampling.corrector = 'none'
 55 |     sampling.n_steps_each = 1
 56 |     sampling.noise_removal = True
 57 |     sampling.probability_flow = False
 58 |     sampling.snr = 0.075
 59 | 
 60 | 
 61 |     # model
 62 |     model = config.model = ml_collections.ConfigDict()
 63 |     model.name = 'unet3d_occgrid'
 64 |     model.use_occ_grid = True
 65 |     model.num_res_blocks = 2
 66 |     model.num_res_blocks_1st_layer = 2
 67 |     model.base_channels = 128
 68 |     model.ch_mult = (1, 2, 2, 4, 4, 4)
 69 |     model.down_block_types = (
 70 |         "ResBlock", "ResBlock", "ResBlock", "AttnResBlock", "ResBlock", "ResBlock"
 71 |     )
 72 |     model.up_block_types = (
 73 |        "ResBlock", "ResBlock", "AttnResBlock", "ResBlock", "ResBlock", "ResBlock"
 74 |     )
 75 |     model.scale_by_sigma = False
 76 |     model.num_scales = 1000
 77 |     model.ema_rate = 0.9999
 78 |     model.normalization = 'GroupNorm'
 79 |     model.act_fn = 'swish'
 80 |     model.attn_resolutions = (16,)
 81 |     model.resamp_with_conv = True
 82 |     model.dropout = 0.1
 83 |     model.sigma_max = 378
 84 |     model.sigma_min = 0.01
 85 |     model.beta_min = 0.1
 86 |     model.beta_max = 20.
 87 |     model.embedding_type = 'fourier'
 88 |     model.pred_type = 'noise'
 89 |     model.conditional = True
 90 | 
 91 |     model.feature_mask_path = os.path.join(data.root_dir, 'metadata/global_mask_res64_occaug_normalized_v1.pt')
 92 |     model.pixcat_mask_path = os.path.join(data.root_dir, 'metadata/cat_mask_res64_occaug_normalized_v1.pt')
 93 | 
 94 |     # optimization
 95 |     config.optim = optim = ml_collections.ConfigDict()
 96 |     optim.weight_decay = 1e-5
 97 |     optim.optimizer = 'AdamW'
 98 |     optim.lr = 1e-5
 99 |     optim.beta1 = 0.9
100 |     optim.eps = 1e-8
101 |     optim.warmup = 5000
102 |     optim.grad_clip = 1.
103 | 
104 |     # eval
105 |     config.eval = eval_config = ml_collections.ConfigDict()
106 |     eval_config.batch_size = 2
107 |     eval_config.idx = 0
108 |     eval_config.bin_size = 30
109 |     eval_config.eval_dir = "PLACEHOLDER"
110 |     eval_config.ckpt_path = "PLACEHOLDER"
111 |     
112 | 
113 |     config.seed = 42
114 |     config.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
115 | 
116 | 
117 |     return config
118 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/lib/dataset/gshell_dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from torch.utils.data import Dataset
 4 | 
 5 | class GShellDataset(Dataset):
 6 |     def __init__(self, filepath_metafile, extension='pt'):
 7 |         super().__init__()
 8 |         with open(filepath_metafile, 'r') as f:
 9 |             self.filepath_list = [fpath.rstrip() for fpath in f]
10 | 
11 |         self.extension = extension
12 |         assert self.extension in ['pt', 'npy']
13 |     
14 |     def __len__(self):
15 |         return len(self.filepath_list)
16 | 
17 |     def __getitem__(self, idx):
18 |         with torch.no_grad():
19 |             if self.extension == 'pt':
20 |                 datum = torch.load(self.filepath_list[idx], map_location='cpu')
21 |             else:
22 |                 datum = torch.tensor(np.load(self.filepath_list[idx]))
23 |         return datum
24 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/lib/dataset/gshell_dataset_aug.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | 
 4 | class GShellAugDataset(Dataset):
 5 |     def __init__(self, FLAGS, extension='pt'):
 6 |         super().__init__()
 7 |         with open(FLAGS.data.grid_metafile, 'r') as f:
 8 |             self.filepath_list = [fpath.rstrip() for fpath in f]
 9 |         with open(FLAGS.data.occgrid_metafile, 'r') as f:
10 |             self.occ_filepath_list = [fpath.rstrip() for fpath in f]
11 | 
12 |         self.extension = extension
13 |         self.num_channels = FLAGS.data.num_channels
14 |         print('num_channels: ', self.num_channels)
15 |         assert self.extension in ['pt', 'npy']
16 |     
17 |     def __len__(self):
18 |         return len(self.filepath_list)
19 | 
20 |     def __getitem__(self, idx):
21 |         with torch.no_grad():
22 |             grid = torch.load(self.filepath_list[idx], map_location='cpu')
23 |             try:
24 |                 occ_grid = torch.load(self.occ_filepath_list[idx], map_location='cpu')
25 |             except:
26 |                 print(self.occ_filepath_list[idx])
27 |                 raise
28 |         return (grid[:self.num_channels], occ_grid)
29 |     
30 |     @staticmethod
31 |     def collate(data):
32 |         return {
33 |             'grid': torch.stack([x[0] for x in data]),
34 |             'occgrid': torch.stack([x[1] for x in data]),
35 |         }
36 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/lib/diffusion/likelihood.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google Research Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # pylint: skip-file
 17 | # pytype: skip-file
 18 | """Various sampling methods."""
 19 | 
 20 | import torch
 21 | import numpy as np
 22 | from scipy import integrate
 23 | from .models import utils as mutils
 24 | 
 25 | 
 26 | def get_div_fn(fn):
 27 |   """Create the divergence function of `fn` using the Hutchinson-Skilling trace estimator."""
 28 | 
 29 |   def div_fn(x, t, eps):
 30 |     with torch.enable_grad():
 31 |       x.requires_grad_(True)
 32 |       fn_eps = torch.sum(fn(x, t) * eps)
 33 |       grad_fn_eps = torch.autograd.grad(fn_eps, x)[0]
 34 |     x.requires_grad_(False)
 35 |     return torch.sum(grad_fn_eps * eps, dim=tuple(range(1, len(x.shape))))
 36 | 
 37 |   return div_fn
 38 | 
 39 | 
 40 | def get_likelihood_fn(sde, inverse_scaler, hutchinson_type='Rademacher',
 41 |                       rtol=1e-5, atol=1e-5, method='RK45', eps=1e-5):
 42 |   """Create a function to compute the unbiased log-likelihood estimate of a given data point.
 43 | 
 44 |   Args:
 45 |     sde: A `sde_lib.SDE` object that represents the forward SDE.
 46 |     inverse_scaler: The inverse data normalizer.
 47 |     hutchinson_type: "Rademacher" or "Gaussian". The type of noise for Hutchinson-Skilling trace estimator.
 48 |     rtol: A `float` number. The relative tolerance level of the black-box ODE solver.
 49 |     atol: A `float` number. The absolute tolerance level of the black-box ODE solver.
 50 |     method: A `str`. The algorithm for the black-box ODE solver.
 51 |       See documentation for `scipy.integrate.solve_ivp`.
 52 |     eps: A `float` number. The probability flow ODE is integrated to `eps` for numerical stability.
 53 | 
 54 |   Returns:
 55 |     A function that a batch of data points and returns the log-likelihoods in bits/dim,
 56 |       the latent code, and the number of function evaluations cost by computation.
 57 |   """
 58 | 
 59 |   def drift_fn(model, x, t):
 60 |     """The drift function of the reverse-time SDE."""
 61 |     score_fn = mutils.get_score_fn(sde, model, train=False, continuous=True)
 62 |     # Probability flow ODE is a special case of Reverse SDE
 63 |     rsde = sde.reverse(score_fn, probability_flow=True)
 64 |     return rsde.sde(x, t)[0]
 65 | 
 66 |   def div_fn(model, x, t, noise):
 67 |     return get_div_fn(lambda xx, tt: drift_fn(model, xx, tt))(x, t, noise)
 68 | 
 69 |   def likelihood_fn(model, data):
 70 |     """Compute an unbiased estimate to the log-likelihood in bits/dim.
 71 | 
 72 |     Args:
 73 |       model: A score model.
 74 |       data: A PyTorch tensor.
 75 | 
 76 |     Returns:
 77 |       bpd: A PyTorch tensor of shape [batch size]. The log-likelihoods on `data` in bits/dim.
 78 |       z: A PyTorch tensor of the same shape as `data`. The latent representation of `data` under the
 79 |         probability flow ODE.
 80 |       nfe: An integer. The number of function evaluations used for running the black-box ODE solver.
 81 |     """
 82 |     with torch.no_grad():
 83 |       shape = data.shape
 84 |       if hutchinson_type == 'Gaussian':
 85 |         epsilon = torch.randn_like(data)
 86 |       elif hutchinson_type == 'Rademacher':
 87 |         epsilon = torch.randint_like(data, low=0, high=2).float() * 2 - 1.
 88 |       else:
 89 |         raise NotImplementedError(f"Hutchinson type {hutchinson_type} unknown.")
 90 | 
 91 |       def ode_func(t, x):
 92 |         sample = mutils.from_flattened_numpy(x[:-shape[0]], shape).to(data.device).type(torch.float32)
 93 |         vec_t = torch.ones(sample.shape[0], device=sample.device) * t
 94 |         drift = mutils.to_flattened_numpy(drift_fn(model, sample, vec_t))
 95 |         logp_grad = mutils.to_flattened_numpy(div_fn(model, sample, vec_t, epsilon))
 96 |         return np.concatenate([drift, logp_grad], axis=0)
 97 | 
 98 |       init = np.concatenate([mutils.to_flattened_numpy(data), np.zeros((shape[0],))], axis=0)
 99 |       solution = integrate.solve_ivp(ode_func, (eps, sde.T), init, rtol=rtol, atol=atol, method=method)
100 |       nfe = solution.nfev
101 |       zp = solution.y[:, -1]
102 |       z = mutils.from_flattened_numpy(zp[:-shape[0]], shape).to(data.device).type(torch.float32)
103 |       delta_logp = mutils.from_flattened_numpy(zp[-shape[0]:], (shape[0],)).to(data.device).type(torch.float32)
104 |       prior_logp = sde.prior_logp(z)
105 |       bpd = -(prior_logp + delta_logp) / np.log(2)
106 |       N = np.prod(shape[1:])
107 |       bpd = bpd / N
108 |       # A hack to convert log-likelihoods to bits/dim
109 |       offset = 7. - inverse_scaler(-1.)
110 |       bpd = bpd + offset
111 |       return bpd, z, nfe
112 | 
113 |   return likelihood_fn
114 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/lib/diffusion/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/lib/diffusion/models/ema.py:
--------------------------------------------------------------------------------
  1 | # Modified from https://raw.githubusercontent.com/fadel/pytorch_ema/master/torch_ema/ema.py
  2 | 
  3 | from __future__ import division
  4 | from __future__ import unicode_literals
  5 | 
  6 | import torch
  7 | 
  8 | 
  9 | # Partially based on: https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/python/training/moving_averages.py
 10 | class ExponentialMovingAverage:
 11 |   """
 12 |   Maintains (exponential) moving average of a set of parameters.
 13 |   """
 14 | 
 15 |   def __init__(self, parameters, decay, use_num_updates=True):
 16 |     """
 17 |     Args:
 18 |       parameters: Iterable of `torch.nn.Parameter`; usually the result of
 19 |         `model.parameters()`.
 20 |       decay: The exponential decay.
 21 |       use_num_updates: Whether to use number of updates when computing
 22 |         averages.
 23 |     """
 24 |     if decay < 0.0 or decay > 1.0:
 25 |       raise ValueError('Decay must be between 0 and 1')
 26 |     self.decay = decay
 27 |     self.num_updates = 0 if use_num_updates else None
 28 |     self.shadow_params = [p.clone().detach()
 29 |                           for p in parameters if p.requires_grad]
 30 |     self.collected_params = []
 31 | 
 32 |   def update(self, parameters):
 33 |     """
 34 |     Update currently maintained parameters.
 35 | 
 36 |     Call this every time the parameters are updated, such as the result of
 37 |     the `optimizer.step()` call.
 38 | 
 39 |     Args:
 40 |       parameters: Iterable of `torch.nn.Parameter`; usually the same set of
 41 |         parameters used to initialize this object.
 42 |     """
 43 |     decay = self.decay
 44 |     if self.num_updates is not None:
 45 |       self.num_updates += 1
 46 |       decay = min(decay, (1 + self.num_updates) / (10 + self.num_updates))
 47 |     one_minus_decay = 1.0 - decay
 48 |     with torch.no_grad():
 49 |       parameters = [p for p in parameters if p.requires_grad]
 50 |       for s_param, param in zip(self.shadow_params, parameters):
 51 |         # print(s_param.device, s_param.device, param.device)
 52 |         s_param.sub_(one_minus_decay * (s_param - param))
 53 | 
 54 |   def copy_to(self, parameters):
 55 |     """
 56 |     Copy current parameters into given collection of parameters.
 57 | 
 58 |     Args:
 59 |       parameters: Iterable of `torch.nn.Parameter`; the parameters to be
 60 |         updated with the stored moving averages.
 61 |     """
 62 |     parameters = [p for p in parameters if p.requires_grad]
 63 |     for s_param, param in zip(self.shadow_params, parameters):
 64 |       if param.requires_grad:
 65 |         param.data.copy_(s_param.data)
 66 | 
 67 |   def store(self, parameters):
 68 |     """
 69 |     Save the current parameters for restoring later.
 70 | 
 71 |     Args:
 72 |       parameters: Iterable of `torch.nn.Parameter`; the parameters to be
 73 |         temporarily stored.
 74 |     """
 75 |     self.collected_params = [param.clone() for param in parameters]
 76 | 
 77 |   def restore(self, parameters):
 78 |     """
 79 |     Restore the parameters stored with the `store` method.
 80 |     Useful to validate the model with EMA parameters without affecting the
 81 |     original optimization process. Store the parameters before the
 82 |     `copy_to` method. After validation (or model saving), use this to
 83 |     restore the former parameters.
 84 | 
 85 |     Args:
 86 |       parameters: Iterable of `torch.nn.Parameter`; the parameters to be
 87 |         updated with the stored parameters.
 88 |     """
 89 |     for c_param, param in zip(self.collected_params, parameters):
 90 |       param.data.copy_(c_param.data)
 91 | 
 92 |   def state_dict(self):
 93 |     return dict(decay=self.decay, num_updates=self.num_updates,
 94 |                 shadow_params=self.shadow_params)
 95 | 
 96 |   def load_state_dict(self, state_dict, device='cuda'):
 97 |     self.decay = state_dict['decay']
 98 |     self.num_updates = state_dict['num_updates']
 99 |     self.shadow_params = state_dict['shadow_params']
100 |     for k, _ in enumerate(self.shadow_params):
101 |       self.shadow_params[k] = self.shadow_params[k].to(device)
102 |     # for k in self.shadow_params:
103 |     #   print(k.device)
104 |     # raise


--------------------------------------------------------------------------------
/GMeshDiffusion/lib/diffusion/trainer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | 
  5 | import logging
  6 | # Keep the import below for registering all model definitions
  7 | from .models import unet3d, unet3d_occgrid, unet3d_tet_aware, unet3d_occgrid_v2, unet3d_meshdiffusion
  8 | 
  9 | from . import losses
 10 | from .models import utils as mutils
 11 | from .models.ema import ExponentialMovingAverage
 12 | from . import sde_lib
 13 | import torch
 14 | from torch.utils import tensorboard
 15 | from .utils import save_checkpoint, restore_checkpoint
 16 | from ..dataset.gshell_dataset import GShellDataset
 17 | from ..dataset.gshell_dataset_aug import GShellAugDataset
 18 | 
 19 | 
 20 | def train(config):
 21 |     """Runs the training pipeline.
 22 | 
 23 |     Args:
 24 |     config: Configuration to use.
 25 |     workdir: Working directory for checkpoints and TF summaries. If this
 26 |         contains checkpoint training will be resumed from the latest checkpoint.
 27 |     """
 28 | 
 29 |     workdir = config.training.train_dir
 30 |     # Create directories for experimental logs
 31 |     logging.info("working dir: {:s}".format(workdir))
 32 | 
 33 | 
 34 |     tb_dir = os.path.join(workdir, "tensorboard")
 35 |     writer = tensorboard.SummaryWriter(tb_dir)
 36 | 
 37 |     # Initialize model.
 38 |     score_model = mutils.create_model(config)
 39 |     ema = ExponentialMovingAverage(score_model.parameters(), decay=config.model.ema_rate)
 40 |     optimizer = losses.get_optimizer(config, score_model.parameters())
 41 |     gradscaler = torch.cuda.amp.GradScaler(enabled=True)
 42 | 
 43 |     state = dict(optimizer=optimizer, model=score_model, ema=ema, gradscaler=gradscaler, step=0)
 44 | 
 45 | 
 46 |     # Create checkpoints directory
 47 |     checkpoint_dir = os.path.join(workdir, "checkpoints")
 48 |     # Intermediate checkpoints to resume training after pre-emption in cloud environments
 49 |     checkpoint_meta_dir = os.path.join(workdir, "checkpoints-meta", "checkpoint.pth")
 50 |     os.makedirs(checkpoint_dir, exist_ok=True)
 51 |     os.makedirs(os.path.dirname(checkpoint_meta_dir), exist_ok=True)
 52 | 
 53 |     # Resume training when intermediate checkpoints are detected
 54 |     state = restore_checkpoint(checkpoint_meta_dir, state, config.device)
 55 |     initial_step = int(state['step'])
 56 | 
 57 |     print(f"work dir: {workdir}")
 58 | 
 59 |     
 60 |     try:
 61 |         use_occ_grid = config.data.use_occ_grid
 62 |     except:
 63 |         use_occ_grid = False
 64 |     if use_occ_grid:
 65 |         train_dataset = GShellAugDataset(config)
 66 |     else:
 67 |         train_dataset = GShellDataset(config.data.dataset_metapath)
 68 | 
 69 | 
 70 |     try:
 71 |         collate_fn = train_dataset.collate
 72 |     except:
 73 |         collate_fn = None
 74 | 
 75 |     train_loader = torch.utils.data.DataLoader(
 76 |         train_dataset, 
 77 |         batch_size=config.training.batch_size, 
 78 |         shuffle=True,
 79 |         num_workers=config.data.num_workers,
 80 |         collate_fn=collate_fn,
 81 |         pin_memory=True
 82 |     )
 83 | 
 84 |     data_iter = iter(train_loader)
 85 | 
 86 |     print("data loader set")
 87 | 
 88 |     # Setup SDEs
 89 |     sde = sde_lib.VPSDE(beta_min=config.model.beta_min, beta_max=config.model.beta_max, N=config.model.num_scales)
 90 | 
 91 |     # Build one-step training and evaluation functions
 92 |     optimize_fn = losses.optimization_manager(config)
 93 |     try:
 94 |         use_vis_mask = config.model.use_vis_mask
 95 |     except:
 96 |         use_vis_mask = False
 97 |     print('use_vis_mask', use_vis_mask)
 98 |     train_step_fn = losses.get_step_fn(sde, train=True, optimize_fn=optimize_fn,
 99 |                                         loss_type=config.training.loss_type,
100 |                                         pred_type=config.model.pred_type,
101 |                                         use_vis_mask=use_vis_mask,
102 |                                         use_occ=use_occ_grid,
103 |                                         use_aux=config.training.use_aux_loss)
104 | 
105 |     num_train_steps = config.training.n_iters
106 | 
107 |     # In case there are multiple hosts (e.g., TPU pods), only log to host 0
108 |     logging.info("Starting training loop at step %d." % (initial_step // config.training.num_grad_acc_steps,))
109 | 
110 | 
111 |     iter_size = config.training.num_grad_acc_steps
112 |     for step in range(initial_step // iter_size, num_train_steps + 1):
113 |         tmp_loss_dict = {
114 |             'loss_total': 0.0,
115 |             'loss_score': 0.0,
116 |             'loss_reg': 0.0,
117 |         }
118 |         for step_inner in range(iter_size):
119 |             try:
120 |                 # batch, batch_mask = next(data_iter)
121 |                 batch = next(data_iter)
122 |             except StopIteration:
123 |                 # StopIteration is thrown if dataset ends
124 |                 # reinitialize data loader 
125 |                 data_iter = iter(train_loader)
126 |                 batch = next(data_iter)
127 | 
128 |             
129 |             if type(batch) == dict:
130 |                 for k in batch:
131 |                     batch[k] = batch[k].to('cuda', non_blocking=False)
132 |             else:
133 |                 batch = batch.to('cuda', non_blocking=False)
134 | 
135 |             # Execute one training step
136 |             clear_grad_flag = (step_inner == 0)
137 |             update_param_flag = (step_inner == iter_size - 1)
138 |             loss_dict = train_step_fn(state, batch, clear_grad=clear_grad_flag, update_param=update_param_flag, gradscaler=gradscaler)
139 |             for key in loss_dict:
140 |                 tmp_loss_dict[key] += loss_dict[key].item() / iter_size
141 | 
142 |             # print(torch.cuda.memory_summary())
143 | 
144 |         if step % config.training.log_freq == 0:
145 |             # logging.info("step: %d, training_loss: %.5e" % (step, tmp_loss))
146 |             logging.info(
147 |                 "step: %d, loss_total: %.5e, loss_score: %.5e, loss_reg: %.5e" 
148 |                 % (step, tmp_loss_dict['loss_total'], tmp_loss_dict['loss_score'], tmp_loss_dict['loss_reg'])
149 |             )
150 |             sys.stdout.flush()
151 |             writer.add_scalar("loss_total", tmp_loss_dict['loss_total'], step)
152 |             writer.add_scalar("loss_score", tmp_loss_dict['loss_score'], step)
153 |             writer.add_scalar("loss_reg", tmp_loss_dict['loss_reg'], step)
154 | 
155 |         # Save a temporary checkpoint to resume training after pre-emption periodically
156 |         if step != 0 and step % config.training.snapshot_freq_for_preemption == 0:
157 |             logging.info(f"save meta at iter {step}")
158 |             save_checkpoint(checkpoint_meta_dir, state)
159 | 
160 |         # Save a checkpoint periodically and generate samples if needed
161 |         if step != 0 and step % config.training.snapshot_freq == 0 or step == num_train_steps:
162 |             logging.info(f"save model: {step}-th")
163 |             save_checkpoint(os.path.join(checkpoint_dir, f'checkpoint_{step}.pth'), state)
164 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/lib/diffusion/trainer_ddp.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | 
  5 | import logging
  6 | # Keep the import below for registering all model definitions
  7 | from .models import unet3d, unet3d_occgrid, unet3d_tet_aware, unet3d_occgrid_v2, unet3d_meshdiffusion
  8 | 
  9 | from . import losses
 10 | from .models import utils as mutils
 11 | from .models.ema import ExponentialMovingAverage
 12 | from . import sde_lib
 13 | import torch
 14 | from torch.utils import tensorboard
 15 | from .utils import save_checkpoint, restore_checkpoint
 16 | from ..dataset.gshell_dataset import GShellDataset
 17 | from ..dataset.gshell_dataset_aug import GShellAugDataset
 18 | 
 19 | from .lion.lion import Lion
 20 | import torch.distributed as dist
 21 | 
 22 | def train(config):
 23 |     """Runs the training pipeline.
 24 | 
 25 |     Args:
 26 |     config: Configuration to use.
 27 |     workdir: Working directory for checkpoints and TF summaries. If this
 28 |         contains checkpoint training will be resumed from the latest checkpoint.
 29 |     """
 30 |     dist.init_process_group("nccl")
 31 |     rank = dist.get_rank()
 32 |     torch.cuda.set_device(rank)
 33 |     device = torch.device("cuda", rank)
 34 |     print(f"Start running basic DDP example on rank {rank}.")
 35 | 
 36 |     # create model and move it to GPU with id rank
 37 |     world_size = torch.cuda.device_count()
 38 |     device_id = rank % torch.cuda.device_count()
 39 | 
 40 |     workdir = config.training.train_dir
 41 |     # Create directories for experimental logs
 42 |     logging.info("working dir: {:s}".format(workdir))
 43 | 
 44 | 
 45 |     tb_dir = os.path.join(workdir, "tensorboard")
 46 |     writer = tensorboard.SummaryWriter(tb_dir)
 47 | 
 48 |     # Initialize model.
 49 |     score_model = mutils.create_model(config, ddp=True, rank=rank)
 50 |     ema = ExponentialMovingAverage(score_model.parameters(), decay=config.model.ema_rate)
 51 |     optimizer = losses.get_optimizer(config, score_model.parameters())
 52 |     gradscaler = torch.cuda.amp.GradScaler(growth_interval=config.training.gradscaler_growth_interval)
 53 | 
 54 |     state = dict(optimizer=optimizer, model=score_model, ema=ema, gradscaler=gradscaler, step=0)
 55 | 
 56 | 
 57 |     # Create checkpoints directory
 58 |     checkpoint_dir = os.path.join(workdir, "checkpoints")
 59 |     # Intermediate checkpoints to resume training after pre-emption in cloud environments
 60 |     checkpoint_meta_dir = os.path.join(workdir, "checkpoints-meta", "checkpoint.pth")
 61 |     os.makedirs(checkpoint_dir, exist_ok=True)
 62 |     os.makedirs(os.path.dirname(checkpoint_meta_dir), exist_ok=True)
 63 | 
 64 |     # Resume training when intermediate checkpoints are detected
 65 |     state = restore_checkpoint(checkpoint_meta_dir, state, config.device, rank=rank)
 66 |     initial_step = int(state['step'])
 67 | 
 68 |     print(f"work dir: {workdir}")
 69 | 
 70 |     try:
 71 |         use_occ_grid = config.data.use_occ_grid
 72 |     except:
 73 |         use_occ_grid = False
 74 |     if use_occ_grid:
 75 |         train_dataset = GShellAugDataset(config)
 76 |     else:
 77 |         train_dataset = GShellDataset(config.data.dataset_metapath)
 78 | 
 79 |     train_sampler = torch.utils.data.distributed.DistributedSampler(
 80 |     	train_dataset,
 81 |     	num_replicas=world_size,
 82 |     	rank=rank
 83 |     )
 84 | 
 85 |     try:
 86 |         collate_fn = train_dataset.collate
 87 |     except:
 88 |         collate_fn = None
 89 |     train_loader = torch.utils.data.DataLoader(
 90 |         train_dataset, 
 91 |         batch_size=config.training.batch_size, 
 92 |         num_workers=config.data.num_workers,
 93 |         # pin_memory=True,
 94 |         sampler=train_sampler,
 95 |         collate_fn=collate_fn
 96 |     )
 97 | 
 98 |     data_iter = iter(train_loader)
 99 | 
100 |     print("data loader set")
101 | 
102 |     # Setup SDEs
103 |     sde = sde_lib.VPSDE(beta_min=config.model.beta_min, beta_max=config.model.beta_max, N=config.model.num_scales)
104 | 
105 |     # Build one-step training and evaluation functions
106 |     optimize_fn = losses.optimization_manager(config)
107 |     try:
108 |         use_vis_mask = config.model.use_vis_mask
109 |     except:
110 |         use_vis_mask = False
111 |     print('use_vis_mask', use_vis_mask)
112 |     train_step_fn = losses.get_step_fn(sde, train=True, optimize_fn=optimize_fn,
113 |                                         loss_type=config.training.loss_type,
114 |                                         pred_type=config.model.pred_type,
115 |                                         use_vis_mask=use_vis_mask,
116 |                                         use_occ=use_occ_grid,
117 |                                         use_aux=config.training.use_aux_loss)
118 | 
119 |     num_train_steps = config.training.n_iters
120 | 
121 |     # In case there are multiple hosts (e.g., TPU pods), only log to host 0
122 |     logging.info("Starting training loop at step %d." % (initial_step // config.training.num_grad_acc_steps,))
123 | 
124 |     iter_size = config.training.num_grad_acc_steps
125 |     epoch = 0
126 |     train_sampler.set_epoch(epoch)
127 |     for step in range(initial_step // iter_size, num_train_steps + 1):
128 |         tmp_loss_dict = {
129 |             'loss_total': 0.0,
130 |             'loss_score': 0.0,
131 |             'loss_reg': 0.0,
132 |         }
133 |         for step_inner in range(iter_size):
134 |             try:
135 |                 # batch, batch_mask = next(data_iter)
136 |                 batch = next(data_iter)
137 |             except StopIteration:
138 |                 # StopIteration is thrown if dataset ends
139 |                 # reinitialize data loader 
140 |                 epoch += 1
141 |                 train_sampler.set_epoch(epoch)
142 |                 data_iter = iter(train_loader)
143 |                 batch = next(data_iter)
144 | 
145 |             if type(batch) == dict:
146 |                 for k in batch:
147 |                     batch[k] = batch[k].to(rank, non_blocking=False)
148 |             else:
149 |                 batch = batch.to(rank, non_blocking=False)
150 | 
151 |             # Execute one training step
152 |             clear_grad_flag = (step_inner == 0)
153 |             update_param_flag = (step_inner == iter_size - 1)
154 |             if not update_param_flag:
155 |                 with score_model.no_sync():
156 |                     loss_dict = train_step_fn(state, batch, clear_grad=clear_grad_flag, update_param=update_param_flag, gradscaler=gradscaler)
157 |             else:
158 |                 loss_dict = train_step_fn(state, batch, clear_grad=clear_grad_flag, update_param=update_param_flag, gradscaler=gradscaler)
159 |             for key in loss_dict:
160 |                 tmp_loss_dict[key] += loss_dict[key].item() / iter_size
161 | 
162 |             # print(torch.cuda.memory_summary())
163 | 
164 |         if step % config.training.log_freq == 0:
165 |             loss = tmp_loss_dict['loss_total']
166 |             loss = torch.tensor(loss / world_size).to(rank)
167 | 
168 |             # logging.info("step: %d, training_loss: %.5e" % (step, tmp_loss))
169 |             dist.reduce(loss, dst=0, op=dist.ReduceOp.SUM)
170 |             if rank == 0:
171 |                 loss = loss.item()
172 |                 logging.info("step: %d, loss: %.5e, scale: %.5e" % (step, loss, gradscaler.get_scale()))
173 |                 sys.stdout.flush()
174 |                 writer.add_scalar("loss", loss, step)
175 | 
176 |         if rank == 0:
177 |             # Save a temporary checkpoint to resume training after pre-emption periodically
178 |             if step != 0 and step % config.training.snapshot_freq_for_preemption == 0:
179 |                 logging.info(f"save meta at iter {step}")
180 |                 save_checkpoint(checkpoint_meta_dir, state)
181 | 
182 |             # Save a checkpoint periodically and generate samples if needed
183 |             if step != 0 and step % config.training.snapshot_freq == 0 or step == num_train_steps:
184 |                 logging.info(f"save model: {step}-th")
185 |                 save_checkpoint(os.path.join(checkpoint_dir, f'checkpoint_{step}.pth'), state)
186 | 
187 |     dist.destroy_process_group()


--------------------------------------------------------------------------------
/GMeshDiffusion/lib/diffusion/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | import logging
 4 | 
 5 | 
 6 | def restore_checkpoint(ckpt_dir, state, device, strict=False, rank=None):
 7 |   if not os.path.exists(ckpt_dir):
 8 |     os.makedirs(os.path.dirname(ckpt_dir), exist_ok=True)
 9 |     logging.warning(f"No checkpoint found at {ckpt_dir}. "
10 |                     f"Returned the same state as input")
11 |     if strict:
12 |       raise
13 |     return state
14 |   else:
15 |     if rank is not None:
16 |       device = f"cuda:{rank}"
17 |     # loaded_state = torch.load(ckpt_dir, map_location=device)
18 |     loaded_state = torch.load(ckpt_dir, map_location='cpu')
19 |     state['optimizer'].load_state_dict(loaded_state['optimizer'])
20 |     try:
21 |       state['model'].load_state_dict(loaded_state['model'], strict=False)
22 |     except:
23 |       consume_prefix_in_state_dict_if_present(loaded_state['model'])
24 |       state['model'].load_state_dict(loaded_state['model'], strict=False)
25 |     state['ema'].load_state_dict(loaded_state['ema'], device=device)
26 |     state['step'] = loaded_state['step']
27 |     state['model'].to(device)
28 |     try:
29 |       state['gradscaler'].load_state_dict(loaded_state['gradscaler'])
30 |       # state['gradscaler'].to(device)
31 |     except:
32 |       # raise
33 |       pass
34 |     torch.cuda.empty_cache()
35 |     return state
36 | 
37 | 
38 | def save_checkpoint(ckpt_dir, state):
39 |   saved_state = {
40 |     'optimizer': state['optimizer'].state_dict(),
41 |     'model': state['model'].state_dict(),
42 |     'ema': state['ema'].state_dict(),
43 |     'step': state['step'],
44 |     'gradscaler': state['gradscaler'].state_dict()
45 |   }
46 |   torch.save(saved_state, ckpt_dir)


--------------------------------------------------------------------------------
/GMeshDiffusion/main_diffusion.py:
--------------------------------------------------------------------------------
 1 | """Training and evaluation"""
 2 | 
 3 | from absl import app
 4 | from absl import flags
 5 | from ml_collections.config_flags import config_flags
 6 | 
 7 | import lib.diffusion.trainer as trainer
 8 | import lib.diffusion.evaler as evaler
 9 | 
10 | 
11 | FLAGS = flags.FLAGS
12 | 
13 | config_flags.DEFINE_config_file(
14 |     "config", None, "diffusion configs", lock_config=False)
15 | flags.DEFINE_enum("mode", None, ["train", "uncond_gen", "cond_gen", "uncond_gen_interp"], "Running mode")
16 | flags.mark_flags_as_required(["config", "mode"])
17 | 
18 | 
19 | def main(argv):
20 |     if FLAGS.mode == 'train':
21 |         trainer.train(FLAGS.config)
22 |     elif FLAGS.mode == 'uncond_gen':
23 |         evaler.uncond_gen(FLAGS.config)
24 |     elif FLAGS.mode == 'uncond_gen_interp':
25 |         evaler.uncond_gen_interp(FLAGS.config)
26 |     elif FLAGS.mode == 'cond_gen':
27 |         evaler.cond_gen(FLAGS.config)
28 | 
29 | if __name__ == "__main__":
30 |   app.run(main)
31 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/main_diffusion_ddp.py:
--------------------------------------------------------------------------------
 1 | """Training and evaluation"""
 2 | 
 3 | from absl import app
 4 | from absl import flags
 5 | from ml_collections.config_flags import config_flags
 6 | 
 7 | import lib.diffusion.trainer_ddp as trainer
 8 | import lib.diffusion.evaler as evaler
 9 | 
10 | 
11 | 
12 | 
13 | FLAGS = flags.FLAGS
14 | 
15 | config_flags.DEFINE_config_file(
16 |     "config", None, "diffusion configs", lock_config=False)
17 | flags.DEFINE_enum("mode", None, ["train", "uncond_gen", "cond_gen", "uncond_gen_interp"], "Running mode")
18 | flags.mark_flags_as_required(["config", "mode"])
19 | 
20 | def main(argv):
21 |     print(FLAGS.config)
22 |     if FLAGS.mode == 'train':
23 |         trainer.train(FLAGS.config)
24 | 
25 | if __name__ == "__main__":
26 |   app.run(main)
27 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/metadata/get_splits_lower.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | 
 4 | random.seed(42)
 5 | 
 6 | split_ratio = 0.9
 7 | data_root = 'PLACEHOLDER'
 8 | grid_root = os.path.join(data_root, 'grid')
 9 | occgrid_root = os.path.join(data_root, 'grid_aug')
10 | data_path_list = sorted([os.path.join(data_root, fpath) for fpath in os.listdir(data_root)])
11 | 
12 | random.shuffle(data_path_list)
13 | 
14 | n_train = int(len(data_path_list) * split_ratio)
15 | train_list = data_path_list[:n_train]
16 | test_list = data_path_list[n_train:]
17 | 
18 | with open('lower_res64_grid_train.txt', 'w') as f:
19 |     f.write('\n'.join(train_list))
20 | 
21 | with open('lower_res64_grid_test.txt', 'w') as f:
22 |     f.write('\n'.join(test_list))
23 | 
24 | 
25 | occgrid_train_list = [os.path.join(occgrid_root, x.split('/')[-1]) for x in train_list]
26 | occgrid_test_list = [os.path.join(occgrid_root, x.split('/')[-1]) for x in test_list]
27 | 
28 | with open('lower_res64_occgrid_train.txt', 'w') as f:
29 |     f.write('\n'.join(occgrid_train_list))
30 | 
31 | with open('lower_res64_occgrid_test.txt', 'w') as f:
32 |     f.write('\n'.join(occgrid_test_list))
33 | 
34 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/metadata/get_splits_upper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | 
 4 | random.seed(42)
 5 | 
 6 | split_ratio = 0.9
 7 | data_root = 'PLACEHOLDER'
 8 | grid_root = os.path.join(data_root, 'grid')
 9 | occgrid_root = os.path.join(data_root, 'grid_aug')
10 | data_path_list = sorted([os.path.join(data_root, fpath) for fpath in os.listdir(data_root)])
11 | 
12 | random.shuffle(data_path_list)
13 | 
14 | n_train = int(len(data_path_list) * split_ratio)
15 | train_list = data_path_list[:n_train]
16 | test_list = data_path_list[n_train:]
17 | 
18 | with open('upper_res64_grid_train.txt', 'w') as f:
19 |     f.write('\n'.join(train_list))
20 | 
21 | with open('upper_res64_grid_test.txt', 'w') as f:
22 |     f.write('\n'.join(test_list))
23 | 
24 | 
25 | occgrid_train_list = [os.path.join(occgrid_root, x.split('/')[-1]) for x in train_list]
26 | occgrid_test_list = [os.path.join(occgrid_root, x.split('/')[-1]) for x in test_list]
27 | 
28 | with open('upper_res64_occgrid_train.txt', 'w') as f:
29 |     f.write('\n'.join(occgrid_train_list))
30 | 
31 | with open('upper_res64_occgrid_test.txt', 'w') as f:
32 |     f.write('\n'.join(occgrid_test_list))
33 | 
34 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/metadata/save_tet_info.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |     Storing tet-grid related meta-info into a single file
  3 | '''
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import os
  8 | import tqdm
  9 | import argparse
 10 | 
 11 | from itertools import combinations
 12 | 
 13 | 
 14 | def tet_to_grids(vertices, values_list, grid_size):
 15 |     grid = torch.zeros(12, grid_size, grid_size, grid_size, device=vertices.device)
 16 |     with torch.no_grad():
 17 |         for k, values in enumerate(values_list):
 18 |             if k == 0:
 19 |                 grid[k, vertices[:, 0], vertices[:, 1], vertices[:, 2]] = values.squeeze()
 20 |             else:
 21 |                 grid[1:4, vertices[:, 0], vertices[:, 1], vertices[:, 2]] = values.transpose(0, 1)
 22 |     return grid
 23 | 
 24 | if __name__ == "__main__":
 25 |     parser = argparse.ArgumentParser(description='nvdiffrec')
 26 |     parser.add_argument('-res', '--resolution', type=int)
 27 |     parser.add_argument('-r', '--root', type=str)
 28 |     parser.add_argument('-s', '--source', type=str)
 29 |     parser.add_argument('-t', '--target', type=str)
 30 |     FLAGS = parser.parse_args()
 31 | 
 32 |     tet_path = f'./tets/{FLAGS.resolution}_tets_cropped_reordered.npz'
 33 |     tet = np.load(tet_path)
 34 |     vertices = torch.tensor(tet['vertices']).cuda()
 35 |     indices = torch.tensor(tet['indices']).long().cuda()
 36 | 
 37 |     edges = torch.tensor(tet['edges']).long().cuda()
 38 |     tet_edges = torch.tensor(tet['tet_edges']).long().view(-1, 2).cuda()
 39 | 
 40 |     vertices_unique = vertices[:].unique()
 41 |     dx = vertices_unique[1] - vertices_unique[0]
 42 |     dx = dx / 2.0 ### denser grid
 43 |     vertices_discretized = (
 44 |         ((vertices - vertices.min()) / dx)
 45 |     ).long()
 46 | 
 47 |     midpoints = (vertices_discretized[edges[:, 0]] + vertices_discretized[edges[:, 1]]) / 2.0
 48 |     midpoints_dicretized = midpoints.long()
 49 | 
 50 |     tet_verts = vertices_discretized[indices.view(-1)].view(-1, 4, 3)
 51 |     tet_center = tet_verts.float().mean(dim=1)
 52 |     tet_center_discretized = tet_center.long()
 53 | 
 54 | 
 55 |     edge_ind_list = [[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]
 56 |     msdf_tetedges = []
 57 |     msdf_from_tetverts = []
 58 |     for i in range(5):
 59 |         for j in range(i+1, 6):
 60 |             if (edge_ind_list[i][0] == edge_ind_list[j][0]
 61 |                 or edge_ind_list[i][0] == edge_ind_list[j][1]
 62 |                 or edge_ind_list[i][1] == edge_ind_list[j][0]
 63 |                 or edge_ind_list[i][1] == edge_ind_list[j][1]
 64 |             ):
 65 |                 msdf_tetedges.append(i)
 66 |                 msdf_tetedges.append(j)
 67 |                 msdf_from_tetverts.extend([edge_ind_list[i][0], edge_ind_list[i][1], edge_ind_list[j][0], edge_ind_list[j][1]])
 68 |     msdf_tetedges = torch.tensor(msdf_tetedges)
 69 |     msdf_from_tetverts = torch.tensor(msdf_from_tetverts)
 70 |     print(msdf_tetedges)
 71 |     print(msdf_tetedges.size())
 72 | 
 73 | 
 74 | 
 75 |     tet_edges = tet_edges.view(-1, 2)
 76 |     msdf_tetedges = msdf_tetedges.view(-1)
 77 |     tet_edgenodes_pos = (vertices_discretized[tet_edges[:, 0]] + vertices_discretized[tet_edges[:, 1]]) / 2.0
 78 |     tet_edgenodes_pos = tet_edgenodes_pos.view(-1, 6, 2)
 79 |     occ_edge_pos = tet_edgenodes_pos[:, msdf_tetedges].view(-1, 12, 2, 3)
 80 |     
 81 | 
 82 |     edge_twopoint_order = torch.sign(occ_edge_pos[:, :, 0, :] - occ_edge_pos[:, :, 1, :])
 83 |     edge_twopoint_order_binary_code = (edge_twopoint_order * torch.tensor([16, 4, 1], device=edge_twopoint_order.device).view(1, 1, -1)).sum(dim=-1)
 84 |     edge_twopoint_order_binary_code = torch.stack([edge_twopoint_order_binary_code, -edge_twopoint_order_binary_code], dim=-1)
 85 |     _, edge_twopoint_order = edge_twopoint_order_binary_code.sort(dim=-1)
 86 | 
 87 |     occ_edge_cano_order = torch.arange(2).view(1, 1, 2).expand(occ_edge_pos.size(0), 12, 2).cuda()
 88 |     occ_edge_cano_order = torch.gather(
 89 |         input=occ_edge_cano_order,
 90 |         dim=-1,
 91 |         index=edge_twopoint_order
 92 |     )
 93 | 
 94 |     tet_edges = tet_edges.view(-1)
 95 | 
 96 |     torch.save({
 97 |         'tet_v_pos': vertices,
 98 |         'tet_edge_vpos': vertices[tet_edges].view(-1, 2, 3),
 99 |         'tet_edge_pix_loc': vertices_discretized[tet_edges].view(-1, 2, 3),
100 |         'tet_center_loc': tet_center_discretized,
101 |         'msdf_edges': msdf_tetedges.view(12, 2),
102 |         'occ_edge_cano_order': occ_edge_cano_order
103 |     }, 'tet_info.pt')
104 | 


--------------------------------------------------------------------------------
/GMeshDiffusion/scripts/run_eval_lower_occgrid_normalized.sh:
--------------------------------------------------------------------------------
1 | python main_diffusion.py --mode uncond_gen --config diffusion_configs/config_lower_occgrid_normalized.py \
2 | --config.eval.eval_dir=$EVAL_DIR \
3 | --config.data.root_dir=$REPO_ROOT_DIR \
4 | --config.sampling.method=ddim \
5 | --config.eval.ckpt_path=$CKPT_PATH \
6 | --config.eval.bin_size=30 \
7 | --config.eval.idx $1


--------------------------------------------------------------------------------
/GMeshDiffusion/scripts/run_eval_upper_occgrid_normalized.sh:
--------------------------------------------------------------------------------
1 | python main_diffusion.py --mode uncond_gen --config diffusion_configs/config_upper_occgrid_normalized.py \
2 | --config.eval.eval_dir=$EVAL_DIR \
3 | --config.data.root_dir=$REPO_ROOT_DIR \
4 | --config.sampling.method=ddim \
5 | --config.eval.ckpt_path=$CKPT_PATH \
6 | --config.eval.bin_size=10 \
7 | --config.eval.idx $1


--------------------------------------------------------------------------------
/GMeshDiffusion/scripts/run_lower_occgrid_normalized_ddp.sh:
--------------------------------------------------------------------------------
1 | torchrun --nnodes=1 --nproc_per_node=8 main_diffusion_ddp.py --mode=train --config=diffusion_configs/config_lower_occgrid_normalized.py \
2 | --config.training.train_dir=$SAVE_DIR --config.data.root_dir=$REPO_ROOT_DIR


--------------------------------------------------------------------------------
/GMeshDiffusion/scripts/run_upper_occgrid_normalized_ddp.sh:
--------------------------------------------------------------------------------
1 | torchrun --nnodes=1 --nproc_per_node=8 main_diffusion_ddp.py --mode=train --config=diffusion_configs/config_upper_occgrid_normalized.py \
2 | --config.training.train_dir=$SAVE_DIR --config.data.root_dir=$REPO_ROOT_DIR
3 | 


--------------------------------------------------------------------------------
/assets/gshell_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzzcd001/GShell/c2f0ba9ea01a7f1499e309968d386324292e6c92/assets/gshell_logo.png


--------------------------------------------------------------------------------
/assets/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lzzcd001/GShell/c2f0ba9ea01a7f1499e309968d386324292e6c92/assets/teaser.png


--------------------------------------------------------------------------------
/configs/deepfashion_mc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ref_mesh": "data/spot/spot.obj",
 3 |     "random_textures": true,
 4 |     "iter": 5000,
 5 |     "save_interval": 100,
 6 |     "texture_res": [ 1024, 1024 ],
 7 |     "train_res": [1024, 1024],
 8 |     "batch": 2,
 9 |     "learning_rate": [0.03, 0.005],
10 |     "ks_min" : [0, 0.001, 0.0],
11 |     "ks_max" : [0, 1.0, 1.0],
12 |     "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr",
13 |     "lock_pos" : false,
14 |     "display": [{"latlong" : true}],
15 |     "background" : "white",
16 |     "denoiser": "bilateral",
17 |     "n_samples" : 24,
18 |     "env_scale" : 2.0,
19 |     "gshell_grid" : 128,
20 |     "validate" : true,
21 |     "laplace_scale" : 6000,
22 |     "boxscale": [1, 1, 1],
23 |     "aabb": [-1, -1, -1, 1, 1, 1]
24 | }


--------------------------------------------------------------------------------
/configs/deepfashion_mc_256.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ref_mesh": "data/spot/spot.obj",
 3 |     "random_textures": true,
 4 |     "iter": 5000,
 5 |     "save_interval": 100,
 6 |     "texture_res": [ 1024, 1024 ],
 7 |     "train_res": [1024, 1024],
 8 |     "batch": 2,
 9 |     "learning_rate": [0.03, 0.005],
10 |     "ks_min" : [0, 0.001, 0.0],
11 |     "ks_max" : [0, 1.0, 1.0],
12 |     "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr",
13 |     "lock_pos" : false,
14 |     "display": [{"latlong" : true}],
15 |     "background" : "white",
16 |     "denoiser": "bilateral",
17 |     "n_samples" : 24,
18 |     "env_scale" : 2.0,
19 |     "gshell_grid" : 256,
20 |     "validate" : true,
21 |     "laplace_scale" : 6000,
22 |     "boxscale": [1, 1, 1],
23 |     "aabb": [-1, -1, -1, 1, 1, 1]
24 | }


--------------------------------------------------------------------------------
/configs/deepfashion_mc_512.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ref_mesh": "data/spot/spot.obj",
 3 |     "random_textures": true,
 4 |     "iter": 5000,
 5 |     "save_interval": 100,
 6 |     "texture_res": [ 1024, 1024 ],
 7 |     "train_res": [1024, 1024],
 8 |     "batch": 2,
 9 |     "learning_rate": [0.03, 0.005],
10 |     "ks_min" : [0, 0.001, 0.0],
11 |     "ks_max" : [0, 1.0, 1.0],
12 |     "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr",
13 |     "validate" : false,
14 |     "lock_pos" : false,
15 |     "display": [{"latlong" : true}],
16 |     "background" : "white",
17 |     "denoiser": "bilateral",
18 |     "n_samples" : 12,
19 |     "env_scale" : 2.0,
20 |     "gshell_grid" : 512,
21 |     "validate" : true,
22 |     "laplace_scale" : 6000,
23 |     "boxscale": [1, 1, 1],
24 |     "aabb": [-1, -1, -1, 1, 1, 1]
25 | }


--------------------------------------------------------------------------------
/configs/deepfashion_mc_80.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ref_mesh": "data/spot/spot.obj",
 3 |     "random_textures": true,
 4 |     "iter": 5000,
 5 |     "save_interval": 100,
 6 |     "texture_res": [ 1024, 1024 ],
 7 |     "train_res": [1024, 1024],
 8 |     "batch": 2,
 9 |     "learning_rate": [0.03, 0.005],
10 |     "ks_min" : [0, 0.001, 0.0],
11 |     "ks_max" : [0, 1.0, 1.0],
12 |     "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr",
13 |     "lock_pos" : false,
14 |     "display": [{"latlong" : true}],
15 |     "background" : "white",
16 |     "denoiser": "bilateral",
17 |     "n_samples" : 24,
18 |     "env_scale" : 2.0,
19 |     "gshell_grid" : 80,
20 |     "validate" : true,
21 |     "laplace_scale" : 6000,
22 |     "boxscale": [1, 1, 1],
23 |     "aabb": [-1, -1, -1, 1, 1, 1]
24 | }


--------------------------------------------------------------------------------
/configs/nerf_chair.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ref_mesh": "data/nerf_synthetic/chair",
 3 |     "random_textures": true,
 4 |     "iter": 5000,
 5 |     "save_interval": 100,
 6 |     "texture_res": [ 1024, 1024 ],
 7 |     "train_res": [800, 800],
 8 |     "batch": 2,
 9 |     "learning_rate": [0.03, 0.005],
10 |     "gshell_grid" : 128,
11 |     "mesh_scale" : 2.1,
12 |     "validate" : true,
13 |     "n_samples" : 8,
14 |     "denoiser" : "bilateral",
15 |     "display": [{"latlong" : true}, {"bsdf" : "kd"}, {"bsdf" : "ks"}, {"bsdf" : "normal"}],
16 |     "background" : "white",
17 |     "boxscale": [1, 1, 1],
18 |     "aabb": [-1, -1, -1, 1, 1, 1]
19 | }


--------------------------------------------------------------------------------
/configs/polycam_mc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ref_mesh": "data/spot/spot.obj",
 3 |     "random_textures": true,
 4 |     "iter": 5000,
 5 |     "save_interval": 100,
 6 |     "texture_res": [ 1024, 1024 ],
 7 |     "train_res": [768, 1024],
 8 |     "batch": 2,
 9 |     "learning_rate": [0.03, 0.005],
10 |     "ks_min" : [0, 0.001, 0.0],
11 |     "ks_max" : [0, 1.0, 1.0],
12 |     "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr",
13 |     "lock_pos" : false,
14 |     "display": [{"latlong" : true}],
15 |     "background" : "white",
16 |     "denoiser": "bilateral",
17 |     "n_samples" : 8,
18 |     "env_scale" : 2.0,
19 |     "gshell_grid" : 256,
20 |     "validate" : true,
21 |     "laplace_scale" : 6000,
22 |     "boxscale": [1, 1, 1],
23 |     "aabb": [-1, -1, -1, 1, 1, 1]
24 | }


--------------------------------------------------------------------------------
/configs/polycam_mc_128.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ref_mesh": "data/spot/spot.obj",
 3 |     "random_textures": true,
 4 |     "iter": 5000,
 5 |     "save_interval": 100,
 6 |     "texture_res": [ 1024, 1024 ],
 7 |     "train_res": [768, 1024],
 8 |     "batch": 2,
 9 |     "learning_rate": [0.03, 0.005],
10 |     "ks_min" : [0, 0.001, 0.0],
11 |     "ks_max" : [0, 1.0, 1.0],
12 |     "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr",
13 |     "lock_pos" : false,
14 |     "display": [{"latlong" : true}],
15 |     "background" : "white",
16 |     "denoiser": "bilateral",
17 |     "n_samples" : 8,
18 |     "env_scale" : 2.0,
19 |     "gshell_grid" : 128,
20 |     "validate" : true,
21 |     "laplace_scale" : 6000,
22 |     "boxscale": [1, 1, 1],
23 |     "aabb": [-1, -1, -1, 1, 1, 1]
24 | }


--------------------------------------------------------------------------------
/configs/polycam_mc_16samples.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ref_mesh": "data/spot/spot.obj",
 3 |     "random_textures": true,
 4 |     "iter": 5000,
 5 |     "save_interval": 100,
 6 |     "texture_res": [ 1024, 1024 ],
 7 |     "train_res": [768, 1024],
 8 |     "batch": 2,
 9 |     "learning_rate": [0.03, 0.005],
10 |     "ks_min" : [0, 0.001, 0.0],
11 |     "ks_max" : [0, 1.0, 1.0],
12 |     "envlight": "data/irrmaps/aerodynamics_workshop_2k.hdr",
13 |     "lock_pos" : false,
14 |     "display": [{"latlong" : true}],
15 |     "background" : "white",
16 |     "denoiser": "bilateral",
17 |     "n_samples" : 16,
18 |     "env_scale" : 2.0,
19 |     "gshell_grid" : 256,
20 |     "validate" : true,
21 |     "laplace_scale" : 6000,
22 |     "boxscale": [1, 1, 1],
23 |     "aabb": [-1, -1, -1, 1, 1, 1]
24 | }


--------------------------------------------------------------------------------
/data/tets/generate_tets.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import os
11 | import numpy as np
12 | 
13 | 
14 | '''
15 | This code segment shows how to use Quartet: https://github.com/crawforddoran/quartet, 
16 | to generate a tet grid 
17 | 1) Download, compile and run Quartet as described in the link above. Example usage `quartet meshes/cube.obj 0.5 cube_5.tet`
18 | 2) Run the function below to generate a file `cube_32_tet.tet`
19 | '''
20 | 
21 | def generate_tetrahedron_grid_file(res=32, root='..'):
22 |     frac = 1.0 / res
23 |     command = 'cd %s/quartet; ' % (root) + \
24 |                 './quartet meshes/cube.obj %f meshes/cube_%f_tet.tet -s meshes/cube_boundary_%f.obj' % (frac, res, res)
25 |     os.system(command)
26 | 
27 | 
28 | '''
29 | This code segment shows how to convert from a quartet .tet file to compressed npz file
30 | '''
31 | def convert_from_quartet_to_npz(quartetfile = 'cube_32_tet.tet', npzfile = '32_tets'):
32 | 
33 |     file1 = open(quartetfile, 'r')
34 |     header = file1.readline()
35 |     numvertices = int(header.split(" ")[1])
36 |     numtets     = int(header.split(" ")[2])
37 |     print(numvertices, numtets)
38 | 
39 |     # load vertices
40 |     vertices = np.loadtxt(quartetfile, skiprows=1, max_rows=numvertices)
41 |     print(vertices.shape)
42 | 
43 |     # load indices
44 |     indices = np.loadtxt(quartetfile, dtype=int, skiprows=1+numvertices, max_rows=numtets)
45 |     print(indices.shape)
46 | 
47 |     np.savez_compressed(npzfile, vertices=vertices, indices=indices)


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto. Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | from .dataset import Dataset
10 | from .dataset_mesh import DatasetMesh
11 | from .dataset_nerf import DatasetNERF
12 | from .dataset_llff import DatasetLLFF


--------------------------------------------------------------------------------
/dataset/dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import torch
11 | 
12 | class Dataset(torch.utils.data.Dataset):
13 |     """Basic dataset interface"""
14 |     def __init__(self): 
15 |         super().__init__()
16 | 
17 |     def __len__(self):
18 |         raise NotImplementedError
19 | 
20 |     def __getitem__(self):
21 |         raise NotImplementedError
22 | 
23 |     def collate(self, batch):
24 |         iter_res, iter_spp = batch[0]['resolution'], batch[0]['spp']
25 |         return {
26 |             'mv' : torch.cat(list([item['mv'] for item in batch]), dim=0),
27 |             'mvp' : torch.cat(list([item['mvp'] for item in batch]), dim=0),
28 |             'campos' : torch.cat(list([item['campos'] for item in batch]), dim=0),
29 |             'resolution' : iter_res,
30 |             'spp' : iter_spp,
31 |             'img' : torch.cat(list([item['img'] for item in batch]), dim=0) if 'img' in batch[0] else None,
32 |             'img_second' : torch.cat(list([item['img_second'] for item in batch]), dim=0) if 'img_second' in batch[0] else None,
33 |             'invdepth' : torch.cat(list([item['invdepth'] for item in batch]), dim=0)if 'invdepth' in batch[0] else None,
34 |             'invdepth_second' : torch.cat(list([item['invdepth_second'] for item in batch]), dim=0) if 'invdepth_second' in batch[0] else None,
35 |             'envlight_transform': torch.cat(list([item['envlight_transform'] for item in batch]), dim=0) if 'envlight_transform' in batch and batch[0]['envlight_transform'] is not None else None,
36 |         }


--------------------------------------------------------------------------------
/dataset/dataset_deepfashion.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
  2 | #
  3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | # property and proprietary rights in and to this material, related
  5 | # documentation and any modifications thereto. Any use, reproduction, 
  6 | # disclosure or distribution of this material and related documentation 
  7 | # without an express license agreement from NVIDIA CORPORATION or 
  8 | # its affiliates is strictly prohibited.
  9 | 
 10 | import os
 11 | import glob
 12 | import json
 13 | 
 14 | import torch
 15 | import numpy as np
 16 | 
 17 | from render import util
 18 | 
 19 | from .dataset import Dataset
 20 | 
 21 | import cv2 as cv
 22 | 
 23 | # This function is borrowed from IDR: https://github.com/lioryariv/idr
 24 | def load_K_Rt_from_P(filename, P=None):
 25 |     if P is None:
 26 |         lines = open(filename).read().splitlines()
 27 |         if len(lines) == 4:
 28 |             lines = lines[1:]
 29 |         lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
 30 |         P = np.asarray(lines).astype(np.float32).squeeze()
 31 | 
 32 |     out = cv.decomposeProjectionMatrix(P)
 33 |     K = out[0]
 34 |     R = out[1]
 35 |     t = out[2]
 36 | 
 37 |     K = K / K[2, 2]
 38 |     intrinsics = np.eye(4)
 39 |     intrinsics[:3, :3] = K
 40 | 
 41 | 
 42 |     pose = np.eye(4, dtype=np.float32)
 43 |     pose[:3, :3] = R.transpose()
 44 |     pose[:3, 3] = (t[:3] / t[3])[:, 0]
 45 | 
 46 |     return intrinsics, pose
 47 | 
 48 | def _load_img(path):
 49 |     img = util.load_image_raw(path)
 50 |     if img.dtype != np.float32: # LDR image
 51 |         img = torch.tensor(img / 255, dtype=torch.float32)
 52 |         img[..., 0:3] = util.srgb_to_rgb(img[..., 0:3])
 53 |     else:
 54 |         img = torch.tensor(img, dtype=torch.float32)
 55 |     return img
 56 | 
 57 | 
 58 | 
 59 | class DatasetDeepFashion(Dataset):
 60 |     def __init__(self, base_dir, FLAGS, examples=None):
 61 |         self.FLAGS = FLAGS
 62 |         self.examples = examples
 63 |         self.base_dir = base_dir
 64 | 
 65 |         # Load config / transforms
 66 |         self.n_images = 72 ### hardcoded
 67 | 
 68 |         self.fovy               = np.deg2rad(60)
 69 |         self.proj_mtx = util.perspective(
 70 |             self.fovy, self.FLAGS.display_res[1] / self.FLAGS.display_res[0], self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1]
 71 |         )
 72 | 
 73 | 
 74 | 
 75 |         camera_dict = np.load(os.path.join(self.base_dir, 'cameras_sphere.npz'))
 76 | 
 77 |         # world_mat is a projection matrix from world to image
 78 |         self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in range(self.n_images)]
 79 |         self.scale_mats_np = []
 80 | 
 81 | 
 82 |         # scale_mat: used for coordinate normalization, we assume the scene to render is inside a unit sphere at origin.
 83 |         self.scale_mats_np = [camera_dict['scale_mat_%d' % idx].astype(np.float32) for idx in range(self.n_images)]
 84 |         self.intrinsics_all = []
 85 |         self.pose_all = []
 86 | 
 87 |         for scale_mat, world_mat in zip(self.scale_mats_np, self.world_mats_np):
 88 |             P = world_mat @ scale_mat
 89 |             P = P[:3, :4]
 90 |             intrinsics, pose = load_K_Rt_from_P(None, P)
 91 |             self.intrinsics_all.append(torch.from_numpy(intrinsics).float())
 92 |             self.pose_all.append(torch.from_numpy(pose).float())
 93 | 
 94 |         # Determine resolution & aspect ratio
 95 |         self.resolution = _load_img(os.path.join(self.base_dir, '{:03d}.png'.format(0))).shape[0:2]
 96 |         self.aspect = self.resolution[1] / self.resolution[0]
 97 | 
 98 |         if self.FLAGS.local_rank == 0:
 99 |             print("DatasetNERF: %d images with shape [%d, %d]" % (self.n_images, self.resolution[0], self.resolution[1]))
100 | 
101 |     def _parse_frame(self, idx):
102 |         # Load image data and modelview matrix
103 |         img    = _load_img(os.path.join(self.base_dir, '{:03d}.png'.format(idx)))
104 |         img[:,:,:3] = img[:,:,:3] * img[:,:,3:]
105 |         img[:,:,3] = torch.sign(img[:,:,3])
106 |         assert img.size(-1) == 4
107 | 
108 |         flip_mat = torch.tensor([
109 |             [ 1,  0,  0,  0],
110 |             [ 0, -1,  0,  0],
111 |             [ 0,  0, -1,  0],
112 |             [ 0,  0,  0,  1]
113 |         ], dtype=torch.float)
114 | 
115 |         mv = flip_mat @ torch.linalg.inv(self.pose_all[idx])
116 |         campos = torch.linalg.inv(mv)[:3, 3]
117 |         mvp = self.proj_mtx @ mv
118 | 
119 |         return img[None, ...].cuda(), mv[None, ...].cuda(), mvp[None, ...].cuda(), campos[None, ...].cuda() # Add batch dimension
120 | 
121 |     def __len__(self):
122 |         return self.n_images if self.examples is None else self.examples
123 | 
124 |     def __getitem__(self, itr):
125 |         iter_res = self.FLAGS.train_res
126 |         
127 |         img      = []
128 | 
129 |         img, mv, mvp, campos = self._parse_frame(itr % self.n_images)
130 | 
131 |         return {
132 |             'mv' : mv,
133 |             'mvp' : mvp,
134 |             'campos' : campos,
135 |             'resolution' : iter_res,
136 |             'spp' : self.FLAGS.spp,
137 |             'img' : img
138 |         }
139 | 


--------------------------------------------------------------------------------
/dataset/dataset_deepfashion_testset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
  2 | #
  3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | # property and proprietary rights in and to this material, related
  5 | # documentation and any modifications thereto. Any use, reproduction, 
  6 | # disclosure or distribution of this material and related documentation 
  7 | # without an express license agreement from NVIDIA CORPORATION or 
  8 | # its affiliates is strictly prohibited.
  9 | 
 10 | import os
 11 | import glob
 12 | import json
 13 | 
 14 | import torch
 15 | import numpy as np
 16 | 
 17 | from render import util
 18 | 
 19 | from .dataset import Dataset
 20 | 
 21 | import cv2 as cv
 22 | 
 23 | # This function is borrowed from IDR: https://github.com/lioryariv/idr
 24 | def load_K_Rt_from_P(filename, P=None):
 25 |     if P is None:
 26 |         lines = open(filename).read().splitlines()
 27 |         if len(lines) == 4:
 28 |             lines = lines[1:]
 29 |         lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
 30 |         P = np.asarray(lines).astype(np.float32).squeeze()
 31 | 
 32 |     out = cv.decomposeProjectionMatrix(P)
 33 |     K = out[0]
 34 |     R = out[1]
 35 |     t = out[2]
 36 | 
 37 |     K = K / K[2, 2]
 38 |     intrinsics = np.eye(4)
 39 |     intrinsics[:3, :3] = K
 40 | 
 41 | 
 42 |     pose = np.eye(4, dtype=np.float32)
 43 |     pose[:3, :3] = R.transpose()
 44 |     pose[:3, 3] = (t[:3] / t[3])[:, 0]
 45 | 
 46 |     return intrinsics, pose
 47 | 
 48 | def _load_img(path):
 49 |     img = util.load_image_raw(path)
 50 |     if img.dtype != np.float32: # LDR image
 51 |         img = torch.tensor(img / 255, dtype=torch.float32)
 52 |         img[..., 0:3] = util.srgb_to_rgb(img[..., 0:3])
 53 |     else:
 54 |         img = torch.tensor(img, dtype=torch.float32)
 55 |     return img
 56 | 
 57 | 
 58 | def _load_mask(path):
 59 |     img = util.load_image_raw(path)
 60 |     if img.dtype != np.float32: # LDR image
 61 |         img = torch.tensor(img / 255, dtype=torch.float32)
 62 |     else:
 63 |         img = torch.tensor(img, dtype=torch.float32)
 64 |     return img
 65 | 
 66 | 
 67 | class DatasetDeepFashionTestset(Dataset):
 68 |     def __init__(self, base_dir, FLAGS, examples=None):
 69 |         self.FLAGS = FLAGS
 70 |         self.examples = examples
 71 |         self.base_dir = base_dir
 72 | 
 73 |         # Load config / transforms
 74 |         self.n_images = 200 ### hardcoded
 75 | 
 76 | 
 77 |         proj_mtx_all = np.load(os.path.join(self.base_dir, 'proj_mtx_all.npy'))
 78 |         self.intrinsics_all = []
 79 |         self.pose_all = []
 80 | 
 81 | 
 82 |         self.fovy               = np.deg2rad(60)
 83 |         self.proj_mtx = util.perspective(
 84 |             self.fovy, self.FLAGS.display_res[1] / self.FLAGS.display_res[0], self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1]
 85 |         )
 86 | 
 87 |         for i in range(proj_mtx_all.shape[0]):
 88 |             P = proj_mtx_all[i]
 89 |             P = P[:3, :4]
 90 |             intrinsics, pose = load_K_Rt_from_P(None, P)
 91 |             self.intrinsics_all.append(torch.from_numpy(intrinsics).float())
 92 |             self.pose_all.append(torch.from_numpy(pose).float())
 93 | 
 94 |         # Determine resolution & aspect ratio
 95 |         self.resolution = _load_img(os.path.join(self.base_dir, '{:03d}.png'.format(0))).shape[0:2]
 96 |         self.aspect = self.resolution[1] / self.resolution[0]
 97 | 
 98 |         if self.FLAGS.local_rank == 0:
 99 |             print("DatasetNERF: %d images with shape [%d, %d]" % (self.n_images, self.resolution[0], self.resolution[1]))
100 | 
101 |     def _parse_frame(self, idx):
102 |         # Load image data and modelview matrix
103 |         img    = _load_img(os.path.join(self.base_dir, '{:03d}.png'.format(idx)))
104 |         assert img.size(-1) == 4
105 | 
106 |         flip_mat = torch.tensor([
107 |             [ 1,  0,  0,  0],
108 |             [ 0, -1,  0,  0],
109 |             [ 0,  0, -1,  0],
110 |             [ 0,  0,  0,  1]
111 |         ], dtype=torch.float)
112 | 
113 |         mv = flip_mat @ torch.linalg.inv(self.pose_all[idx])
114 |         campos = torch.linalg.inv(mv)[:3, 3]
115 |         mvp = self.proj_mtx @ mv
116 | 
117 |         return img[None, ...].cuda(), mv[None, ...].cuda(), mvp[None, ...].cuda(), campos[None, ...].cuda() # Add batch dimension
118 | 
119 |     def __len__(self):
120 |         return self.n_images if self.examples is None else self.examples
121 | 
122 |     def __getitem__(self, itr):
123 |         iter_res = self.FLAGS.train_res
124 |         
125 |         img      = []
126 | 
127 |         img, mv, mvp, campos = self._parse_frame(itr % self.n_images)
128 |         
129 | 
130 |         return {
131 |             'mv' : mv,
132 |             'mvp' : mvp,
133 |             'campos' : campos,
134 |             'resolution' : iter_res,
135 |             'spp' : self.FLAGS.spp,
136 |             'img' : img
137 |         }
138 | 


--------------------------------------------------------------------------------
/dataset/dataset_llff.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
  2 | #
  3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | # property and proprietary rights in and to this material, related
  5 | # documentation and any modifications thereto. Any use, reproduction, 
  6 | # disclosure or distribution of this material and related documentation 
  7 | # without an express license agreement from NVIDIA CORPORATION or 
  8 | # its affiliates is strictly prohibited.
  9 | 
 10 | import os
 11 | import glob
 12 | 
 13 | import torch
 14 | import numpy as np
 15 | 
 16 | from render import util
 17 | 
 18 | from .dataset import Dataset
 19 | 
 20 | def _load_mask(fn):
 21 |     img = torch.tensor(util.load_image(fn), dtype=torch.float32)
 22 |     if len(img.shape) == 2:
 23 |         img = img[..., None].repeat(1, 1, 3)
 24 |     return img
 25 | 
 26 | def _load_img(fn):
 27 |     img = util.load_image_raw(fn)
 28 |     if img.dtype != np.float32: # LDR image
 29 |         img = torch.tensor(img / 255, dtype=torch.float32)
 30 |         img[..., 0:3] = util.srgb_to_rgb(img[..., 0:3])
 31 |     else:
 32 |         img = torch.tensor(img, dtype=torch.float32)
 33 |     return img
 34 | 
 35 | ###############################################################################
 36 | # LLFF datasets (real world camera lightfields)
 37 | ###############################################################################
 38 | 
 39 | class DatasetLLFF(Dataset):
 40 |     def __init__(self, base_dir, FLAGS, examples=None):
 41 |         self.FLAGS = FLAGS
 42 |         self.base_dir = base_dir
 43 |         self.examples = examples
 44 | 
 45 |         # Enumerate all image files and get resolution
 46 |         all_img = [f for f in sorted(glob.glob(os.path.join(self.base_dir, "images", "*"))) if f.lower().endswith('png') or f.lower().endswith('jpg') or f.lower().endswith('jpeg')]
 47 |         self.resolution = _load_img(all_img[0]).shape[0:2]
 48 | 
 49 |         # Load camera poses
 50 |         poses_bounds = np.load(os.path.join(self.base_dir, 'poses_bounds.npy'))
 51 |         
 52 |         poses        = poses_bounds[:, :-2].reshape([-1, 3, 5]).transpose([1,2,0])
 53 |         poses        = np.concatenate([poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1) # Taken from nerf, swizzles from LLFF to expected coordinate system
 54 |         poses        = np.moveaxis(poses, -1, 0).astype(np.float32)
 55 |         
 56 |         lcol         = np.array([0,0,0,1], dtype=np.float32)[None, None, :].repeat(poses.shape[0], 0)
 57 |         self.imvs    = torch.tensor(np.concatenate((poses[:, :, 0:4], lcol), axis=1), dtype=torch.float32)
 58 |         self.aspect  = self.resolution[1] / self.resolution[0] # width / height
 59 |         self.fovy    = util.focal_length_to_fovy(poses[:, 2, 4], poses[:, 0, 4])
 60 | 
 61 |         # Recenter scene so lookat position is origin
 62 |         center                = util.lines_focal(self.imvs[..., :3, 3], -self.imvs[..., :3, 2])
 63 |         self.imvs[..., :3, 3] = self.imvs[..., :3, 3] - center[None, ...]
 64 | 
 65 |         if self.FLAGS.local_rank == 0:
 66 |             print("DatasetLLFF: %d images with shape [%d, %d]" % (len(all_img), self.resolution[0], self.resolution[1]))
 67 |             print("DatasetLLFF: auto-centering at %s" % (center.cpu().numpy()))
 68 | 
 69 |         # Pre-load from disc to avoid slow png parsing
 70 |         if self.FLAGS.pre_load:
 71 |             self.preloaded_data = []
 72 |             for i in range(self.imvs.shape[0]):
 73 |                 self.preloaded_data += [self._parse_frame(i)]
 74 | 
 75 |     def _parse_frame(self, idx):
 76 |         all_img  = [f for f in sorted(glob.glob(os.path.join(self.base_dir, "images", "*"))) if f.lower().endswith('png') or f.lower().endswith('jpg') or f.lower().endswith('jpeg')]
 77 |         all_mask = [f for f in sorted(glob.glob(os.path.join(self.base_dir, "masks", "*"))) if f.lower().endswith('png') or f.lower().endswith('jpg') or f.lower().endswith('jpeg')]
 78 |         assert len(all_img) == self.imvs.shape[0] and len(all_mask) == self.imvs.shape[0]
 79 | 
 80 |         # Load image+mask data
 81 |         img  = _load_img(all_img[idx])
 82 |         mask = _load_mask(all_mask[idx])
 83 |         img  = torch.cat((img, mask[..., 0:1]), dim=-1)
 84 | 
 85 |         # Setup transforms
 86 |         proj   = util.perspective(self.fovy[idx, ...], self.aspect, self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1])
 87 |         mv     = torch.linalg.inv(self.imvs[idx, ...])
 88 |         campos = torch.linalg.inv(mv)[:3, 3]
 89 |         mvp    = proj @ mv
 90 | 
 91 |         return img[None, ...], mv[None, ...], mvp[None, ...], campos[None, ...] # Add batch dimension
 92 | 
 93 |     def __len__(self):
 94 |         return self.imvs.shape[0] if self.examples is None else self.examples
 95 | 
 96 |     def __getitem__(self, itr):
 97 |         if self.FLAGS.pre_load:
 98 |             img, mv, mvp, campos = self.preloaded_data[itr % self.imvs.shape[0]]
 99 |         else:
100 |             img, mv, mvp, campos = self._parse_frame(itr % self.imvs.shape[0])
101 | 
102 |         return {
103 |             'mv' : mv,
104 |             'mvp' : mvp,
105 |             'campos' : campos,
106 |             'resolution' : self.resolution,
107 |             'spp' : self.FLAGS.spp,
108 |             'img' : img
109 |         }
110 | 


--------------------------------------------------------------------------------
/dataset/dataset_mesh.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
  2 | #
  3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | # property and proprietary rights in and to this material, related
  5 | # documentation and any modifications thereto. Any use, reproduction, 
  6 | # disclosure or distribution of this material and related documentation 
  7 | # without an express license agreement from NVIDIA CORPORATION or 
  8 | # its affiliates is strictly prohibited.
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | from render import util
 14 | from render import mesh
 15 | from render import render
 16 | from render import light
 17 | 
 18 | from .dataset import Dataset
 19 | 
 20 | ###############################################################################
 21 | # Reference dataset using mesh & rendering
 22 | ###############################################################################
 23 | 
 24 | class DatasetMesh(Dataset):
 25 | 
 26 |     def __init__(self, ref_mesh, glctx, cam_radius, FLAGS, validate=False, mesh_center=None):
 27 |         # Init 
 28 |         self.glctx              = glctx
 29 |         self.cam_radius         = cam_radius
 30 |         self.FLAGS              = FLAGS
 31 |         self.validate           = validate
 32 |         self.fovy               = np.deg2rad(45)
 33 |         self.aspect             = FLAGS.train_res[1] / FLAGS.train_res[0]
 34 |         self.random_lgt         = FLAGS.random_lgt
 35 |         self.camera_lgt         = False
 36 | 
 37 |         self.mesh_center = mesh_center
 38 | 
 39 |         if self.FLAGS.local_rank == 0:
 40 |             print("DatasetMesh: ref mesh has %d triangles and %d vertices" % (ref_mesh.t_pos_idx.shape[0], ref_mesh.v_pos.shape[0]))
 41 | 
 42 |         # Sanity test training texture resolution
 43 |         ref_texture_res = np.maximum(ref_mesh.material['kd'].getRes(), ref_mesh.material['ks'].getRes())
 44 |         if 'normal' in ref_mesh.material:
 45 |             ref_texture_res = np.maximum(ref_texture_res, ref_mesh.material['normal'].getRes())
 46 |         if self.FLAGS.local_rank == 0 and FLAGS.texture_res[0] < ref_texture_res[0] or FLAGS.texture_res[1] < ref_texture_res[1]:
 47 |             print("---> WARNING: Picked a texture resolution lower than the reference mesh [%d, %d] < [%d, %d]" % (FLAGS.texture_res[0], FLAGS.texture_res[1], ref_texture_res[0], ref_texture_res[1]))
 48 | 
 49 |         # Load environment map texture
 50 |         self.envlight = light.load_env(FLAGS.envmap, scale=FLAGS.env_scale)
 51 |         
 52 |         self.ref_mesh = mesh.compute_tangents(ref_mesh)
 53 | 
 54 |     def _rotate_scene(self, itr):
 55 |         proj_mtx = util.perspective(self.fovy, self.FLAGS.display_res[1] / self.FLAGS.display_res[0], self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1])
 56 | 
 57 |         # Smooth rotation for display.
 58 |         ang    = (itr / 50) * np.pi * 2
 59 |         mv     = util.translate(0, 0, -self.cam_radius) @ (util.rotate_x(-0.4) @ util.rotate_y(ang))
 60 |         mvp    = proj_mtx @ mv
 61 |         campos = torch.linalg.inv(mv)[:3, 3]
 62 | 
 63 |         return mv[None, ...].cuda(), mvp[None, ...].cuda(), campos[None, ...].cuda(), self.FLAGS.display_res, self.FLAGS.spp
 64 | 
 65 |     def _random_scene(self):
 66 |         # ==============================================================================================
 67 |         #  Setup projection matrix
 68 |         # ==============================================================================================
 69 |         iter_res = self.FLAGS.train_res
 70 |         proj_mtx = util.perspective(self.fovy, iter_res[1] / iter_res[0], self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1])
 71 | 
 72 |         # ==============================================================================================
 73 |         #  Random camera & light position
 74 |         # ==============================================================================================
 75 | 
 76 |         # Random rotation/translation matrix for optimization.
 77 |         if self.mesh_center is not None:
 78 |             mv     = (
 79 |                 util.translate(-self.mesh_center[0], -self.mesh_center[1], -self.mesh_center[2]-self.cam_radius) 
 80 |                 @ util.random_rotation_translation(0.25)
 81 |             )
 82 |         else:
 83 |             mv     = util.translate(0, 0, -self.cam_radius) @ util.random_rotation_translation(0.25)
 84 |         mvp    = proj_mtx @ mv
 85 |         campos = torch.linalg.inv(mv)[:3, 3]
 86 | 
 87 |         return mv[None, ...].cuda(), mvp[None, ...].cuda(), campos[None, ...].cuda(), iter_res, self.FLAGS.spp # Add batch dimension
 88 | 
 89 |     def __len__(self):
 90 |         return 50 if self.validate else (self.FLAGS.iter + 1) * self.FLAGS.batch
 91 | 
 92 |     def __getitem__(self, itr):
 93 |         # ==============================================================================================
 94 |         #  Randomize scene parameters
 95 |         # ==============================================================================================
 96 | 
 97 |         if self.validate:
 98 |             mv, mvp, campos, iter_res, iter_spp = self._rotate_scene(itr)
 99 |             camera_mv = None
100 |         else:
101 |             mv, mvp, campos, iter_res, iter_spp = self._random_scene()
102 |             if self.random_lgt:
103 |                 rnd_rot = util.random_rotation()
104 |                 camera_mv = rnd_rot.unsqueeze(0).clone()
105 |             elif self.camera_lgt:
106 |                 camera_mv = mv.clone()
107 |             else:
108 |                 camera_mv = None
109 | 
110 |         with torch.no_grad():
111 |             rendered = render.render_mesh(self.glctx, self.ref_mesh, mvp, campos, self.envlight, iter_res, spp=iter_spp, 
112 |                                     num_layers=self.FLAGS.layers, msaa=True, background=None, shade_data=True)
113 |         return {
114 |             'mv' : mv,
115 |             'mvp' : mvp,
116 |             'campos' : campos,
117 |             'resolution' : iter_res,
118 |             'spp' : iter_spp,
119 |             'img' : rendered['shaded'],
120 |             'img_second' : rendered['shaded_second'],
121 |             'invdepth' : rendered['invdepth'],
122 |             'invdepth_second' : rendered['invdepth_second'],
123 |             'envlight_transform': camera_mv
124 |         }
125 | 


--------------------------------------------------------------------------------
/dataset/dataset_nerf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import os
11 | import glob
12 | import json
13 | 
14 | import torch
15 | import numpy as np
16 | 
17 | from render import util
18 | 
19 | from .dataset import Dataset
20 | 
21 | ###############################################################################
22 | # NERF image based dataset (synthetic)
23 | ###############################################################################
24 | 
25 | def _load_img(path):
26 |     files = glob.glob(path + '.*')
27 |     assert len(files) > 0, "Tried to find image file for: %s, but found 0 files" % (path)
28 |     img = util.load_image_raw(files[0])
29 |     if img.dtype != np.float32: # LDR image
30 |         img = torch.tensor(img / 255, dtype=torch.float32)
31 |         img[..., 0:3] = util.srgb_to_rgb(img[..., 0:3])
32 |     else:
33 |         img = torch.tensor(img, dtype=torch.float32)
34 |     return img
35 | 
36 | class DatasetNERF(Dataset):
37 |     def __init__(self, cfg_path, FLAGS, examples=None):
38 |         self.FLAGS = FLAGS
39 |         self.examples = examples
40 |         self.base_dir = os.path.dirname(cfg_path)
41 | 
42 |         # Load config / transforms
43 |         self.cfg = json.load(open(cfg_path, 'r'))
44 |         self.n_images = len(self.cfg['frames'])
45 | 
46 |         # Determine resolution & aspect ratio
47 |         self.resolution = _load_img(os.path.join(self.base_dir, self.cfg['frames'][0]['file_path'])).shape[0:2]
48 |         self.aspect = self.resolution[1] / self.resolution[0]
49 | 
50 |         if self.FLAGS.local_rank == 0:
51 |             print("DatasetNERF: %d images with shape [%d, %d]" % (self.n_images, self.resolution[0], self.resolution[1]))
52 | 
53 |         # Pre-load from disc to avoid slow png parsing
54 |         if self.FLAGS.pre_load:
55 |             self.preloaded_data = []
56 |             for i in range(self.n_images):
57 |                 self.preloaded_data += [self._parse_frame(self.cfg, i)]
58 | 
59 |     def _parse_frame(self, cfg, idx):
60 |         # Config projection matrix (static, so could be precomputed)
61 |         fovy   = util.fovx_to_fovy(cfg['camera_angle_x'], self.aspect)
62 |         proj   = util.perspective(fovy, self.aspect, self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1])
63 | 
64 |         # Load image data and modelview matrix
65 |         img    = _load_img(os.path.join(self.base_dir, cfg['frames'][idx]['file_path']))
66 |         mv     = torch.linalg.inv(torch.tensor(cfg['frames'][idx]['transform_matrix'], dtype=torch.float32))
67 |         mv     = mv @ util.rotate_x(-np.pi / 2)
68 |         campos = torch.linalg.inv(mv)[:3, 3]
69 |         mvp    = proj @ mv
70 | 
71 |         return img[None, ...], mv[None, ...], mvp[None, ...], campos[None, ...] # Add batch dimension
72 | 
73 |     def __len__(self):
74 |         return self.n_images if self.examples is None else self.examples
75 | 
76 |     def __getitem__(self, itr):
77 |         iter_res = self.FLAGS.train_res
78 |         
79 |         img      = []
80 |         fovy     = util.fovx_to_fovy(self.cfg['camera_angle_x'], self.aspect)
81 | 
82 |         if self.FLAGS.pre_load:
83 |             img, mv, mvp, campos = self.preloaded_data[itr % self.n_images]
84 |         else:
85 |             img, mv, mvp, campos = self._parse_frame(self.cfg, itr % self.n_images)
86 | 
87 |         return {
88 |             'mv' : mv,
89 |             'mvp' : mvp,
90 |             'campos' : campos,
91 |             'resolution' : iter_res,
92 |             'spp' : self.FLAGS.spp,
93 |             'img' : img
94 |         }
95 | 


--------------------------------------------------------------------------------
/dataset/dataset_nerf_colmap.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import os
11 | import glob
12 | import json
13 | 
14 | import torch
15 | import numpy as np
16 | 
17 | from render import util
18 | 
19 | from .dataset import Dataset
20 | 
21 | ###############################################################################
22 | # NERF image based dataset (synthetic)
23 | ###############################################################################
24 | 
25 | def _load_img(path):
26 |     img = util.load_image_raw(path)
27 |     if img.dtype != np.float32: # LDR image
28 |         img = torch.tensor(img / 255, dtype=torch.float32)
29 |         img[..., 0:3] = util.srgb_to_rgb(img[..., 0:3])
30 |     else:
31 |         img = torch.tensor(img, dtype=torch.float32)
32 |     return img
33 | 
34 | class DatasetNERF(Dataset):
35 |     def __init__(self, cfg_path, FLAGS, examples=None):
36 |         self.FLAGS = FLAGS
37 |         self.examples = examples
38 |         self.base_dir = os.path.dirname(cfg_path)
39 | 
40 |         # Load config / transforms
41 |         self.cfg = json.load(open(cfg_path, 'r'))
42 |         self.n_images = len(self.cfg['frames'])
43 | 
44 |         # Determine resolution & aspect ratio
45 |         self.resolution = _load_img(os.path.join(self.base_dir, self.cfg['frames'][0]['file_path'])).shape[0:2]
46 |         self.aspect = self.resolution[1] / self.resolution[0]
47 | 
48 |         if self.FLAGS.local_rank == 0:
49 |             print("DatasetNERF: %d images with shape [%d, %d]" % (self.n_images, self.resolution[0], self.resolution[1]))
50 | 
51 |         # Pre-load from disc to avoid slow png parsing
52 |         if self.FLAGS.pre_load:
53 |             self.preloaded_data = []
54 |             for i in range(self.n_images):
55 |                 self.preloaded_data += [self._parse_frame(self.cfg, i)]
56 | 
57 |     def _parse_frame(self, cfg, idx):
58 |         # Config projection matrix (static, so could be precomputed)
59 |         fovy   = util.fovx_to_fovy(cfg['frames'][idx]['camera_angle_x'], self.aspect)
60 |         proj   = util.perspective(fovy, self.aspect, self.FLAGS.cam_near_far[0], self.FLAGS.cam_near_far[1])
61 | 
62 |         # Load image data and modelview matrix
63 |         img    = _load_img(os.path.join(self.base_dir, cfg['frames'][idx]['file_path']))
64 |         mask   = _load_img(os.path.join(self.base_dir, cfg['frames'][idx]['file_path']).replace('/image/', '/mask/').replace('.jpg', '.png'))
65 |         img    = torch.cat([img, mask[:,:,:1]], dim=-1)
66 |         mv     = torch.linalg.inv(torch.tensor(cfg['frames'][idx]['transform_matrix'], dtype=torch.float32))
67 |         mv     = mv @ util.rotate_x(-np.pi / 2)
68 |         campos = torch.linalg.inv(mv)[:3, 3]
69 |         mvp    = proj @ mv
70 | 
71 |         return img[None, ...], mv[None, ...], mvp[None, ...], campos[None, ...] # Add batch dimension
72 | 
73 |     def __len__(self):
74 |         return self.n_images if self.examples is None else self.examples
75 | 
76 |     def __getitem__(self, itr):
77 |         iter_res = self.FLAGS.train_res
78 |         
79 |         img      = []
80 |         fovy     = util.fovx_to_fovy(self.cfg['frames'][itr % self.n_images]['camera_angle_x'], self.aspect)
81 | 
82 |         if self.FLAGS.pre_load:
83 |             img, mv, mvp, campos = self.preloaded_data[itr % self.n_images]
84 |         else:
85 |             img, mv, mvp, campos = self._parse_frame(self.cfg, itr % self.n_images)
86 | 
87 |         return {
88 |             'mv' : mv,
89 |             'mvp' : mvp,
90 |             'campos' : campos,
91 |             'resolution' : iter_res,
92 |             'spp' : self.FLAGS.spp,
93 |             'img' : img
94 |         }
95 | 


--------------------------------------------------------------------------------
/denoiser/denoiser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | import numpy as np
 5 | import math
 6 | 
 7 | from render import util
 8 | if "TWOSIDED_TEXTURE" not in os.environ or os.environ["TWOSIDED_TEXTURE"] == "True":
 9 | 	from render import optixutils as ou
10 | else:
11 | 	from render import optixutils_single_sided as ou
12 | 
13 | 
14 | ###############################################################################
15 | # Bilateral denoiser
16 | #
17 | # Loosely based on SVGF, but removing temporal components and variance stopping guides.
18 | # https://research.nvidia.com/publication/2017-07_spatiotemporal-variance-guided-filtering-real-time-reconstruction-path-traced
19 | ###############################################################################
20 | 
21 | class BilateralDenoiser(torch.nn.Module):
22 | 	def __init__(self, influence=1.0):
23 | 		super(BilateralDenoiser, self).__init__()
24 | 		self.set_influence(influence)
25 | 
26 | 	def set_influence(self, factor):
27 | 		self.sigma = max(factor * 2, 0.0001)
28 | 		self.variance = self.sigma**2.
29 | 		self.N = 2 * math.ceil(self.sigma * 2.5) + 1
30 | 
31 | 	def forward(self, input):
32 | 		col    = input[..., 0:3]
33 | 		nrm    = util.safe_normalize(input[..., 3:6]) # Bent normals can produce normals of length < 1 here
34 | 		zdz    = input[..., 6:8]
35 | 		return ou.bilateral_denoiser(col, nrm, zdz, self.sigma)
36 | 


--------------------------------------------------------------------------------
/geometry/embedding.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class Embedding(nn.Module):
 5 |     def __init__(self, in_channels, N_freqs, logscale=True):
 6 |         """
 7 |         Defines a function that embeds x to (x, sin(2^k x), cos(2^k x), ...)
 8 |         in_channels: number of input channels (3 for both xyz and direction)
 9 |         """
10 |         super(Embedding, self).__init__()
11 |         self.N_freqs = N_freqs
12 |         self.in_channels = in_channels
13 |         self.funcs = [torch.sin, torch.cos]
14 |         self.out_channels = in_channels*(len(self.funcs)*N_freqs+1)
15 | 
16 |         if logscale:
17 |             self.freq_bands = 2**torch.linspace(0, N_freqs-1, N_freqs)
18 |         else:
19 |             self.freq_bands = torch.linspace(1, 2**(N_freqs-1), N_freqs)
20 | 
21 |     def forward(self, x):
22 |         """
23 |         Embeds x to (x, sin(2^k x), cos(2^k x), ...) 
24 |         Different from the paper, "x" is also in the output
25 |         See https://github.com/bmild/nerf/issues/12
26 | 
27 |         Inputs:
28 |             x: (B, self.in_channels)
29 | 
30 |         Outputs:
31 |             out: (B, self.out_channels)
32 |         """
33 |         out = [x]
34 |         for freq in self.freq_bands:
35 |             for func in self.funcs:
36 |                 out += [func(freq*x)]
37 | 
38 |         return torch.cat(out, -1)
39 | 
40 | 


--------------------------------------------------------------------------------
/geometry/mlp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | 
 5 | from .embedding import Embedding
 6 | 
 7 | class MLP(nn.Module):
 8 |     def __init__(self, n_freq=6, d_hidden=128, d_out=1, n_hidden=3, skip_in=[], use_float16=False):
 9 |         super().__init__()
10 |         self.emb = Embedding(3, n_freq)
11 |         layers = [
12 |             nn.Linear(self.emb.out_channels, d_hidden),
13 |             nn.Softplus(beta=100)
14 |         ]
15 |         count = 2
16 |         self.skip_count = []
17 |         self.skip_in = skip_in
18 |         for i in range(n_hidden):
19 |             if i in skip_in:
20 |                 layers.append(nn.Linear(d_hidden + self.emb.out_channels, d_hidden))
21 |                 self.skip_count.append(count)
22 |             else:
23 |                 layers.append(nn.Linear(d_hidden, d_hidden))
24 |             count += 1
25 |             layers.append(nn.Softplus(beta=100))
26 |             count += 1
27 |         layers.append(nn.Linear(d_hidden, d_out))
28 |         count += 1
29 |         self.net = nn.ModuleList(layers)
30 |         self.use_float16 = use_float16
31 |     
32 |     def forward(self, x):
33 |         emb = self.emb(x)
34 |         x = emb
35 |         with torch.autocast('cuda', dtype=torch.float16, enabled=self.use_float16):
36 |             for i, module in enumerate(self.net):
37 |                 if i in self.skip_count:
38 |                     x = module(torch.cat([x, emb], dim=-1))
39 |                 else:
40 |                     x = module(x)
41 |         return x


--------------------------------------------------------------------------------
/render/light.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import numpy as np
 11 | import torch
 12 | import nvdiffrast.torch as dr
 13 | 
 14 | from . import util
 15 | from . import renderutils as ru
 16 | 
 17 | ######################################################################################
 18 | # Monte-carlo sampled environment light with PDF / CDF computation
 19 | ######################################################################################
 20 | 
 21 | class EnvironmentLight:
 22 |     LIGHT_MIN_RES = 16
 23 | 
 24 |     MIN_ROUGHNESS = 0.08
 25 |     MAX_ROUGHNESS = 0.5
 26 | 
 27 |     def __init__(self, base):
 28 |         self.mtx = None
 29 |         self.base = base
 30 | 
 31 |         self.pdf_scale = (self.base.shape[0] * self.base.shape[1]) / (2 * np.pi * np.pi)
 32 |         self.update_pdf()
 33 | 
 34 |     def xfm(self, mtx):
 35 |         self.mtx = mtx
 36 | 
 37 |     def parameters(self):
 38 |         return [self.base]
 39 | 
 40 |     def clone(self):
 41 |         return EnvironmentLight(self.base.clone().detach())
 42 | 
 43 |     def clamp_(self, min=None, max=None):
 44 |         self.base.clamp_(min, max)
 45 | 
 46 |     def update_pdf(self):
 47 |         with torch.no_grad():
 48 |             # Compute PDF
 49 |             Y = util.pixel_grid(self.base.shape[1], self.base.shape[0])[..., 1]
 50 |             self._pdf = torch.max(self.base, dim=-1)[0] * torch.sin(Y * np.pi) # Scale by sin(theta) for lat-long, https://cs184.eecs.berkeley.edu/sp18/article/25
 51 |             self._pdf = self._pdf / torch.sum(self._pdf)
 52 | 
 53 |             # Compute cumulative sums over the columns and rows
 54 |             self.cols = torch.cumsum(self._pdf, dim=1)
 55 |             self.rows = torch.cumsum(self.cols[:, -1:].repeat([1, self.cols.shape[1]]), dim=0)
 56 | 
 57 |             # Normalize
 58 |             self.cols = self.cols / torch.where(self.cols[:, -1:] > 0, self.cols[:, -1:], torch.ones_like(self.cols))
 59 |             self.rows = self.rows / torch.where(self.rows[-1:, :] > 0, self.rows[-1:, :], torch.ones_like(self.rows))
 60 | 
 61 |     @torch.no_grad()
 62 |     def generate_image(self, res):
 63 |         texcoord = util.pixel_grid(res[1], res[0])
 64 |         return dr.texture(self.base[None, ...].contiguous(), texcoord[None, ...].contiguous(), filter_mode='linear')[0]
 65 | 
 66 | ######################################################################################
 67 | # Load and store
 68 | ######################################################################################
 69 | 
 70 | @torch.no_grad()
 71 | def _load_env_hdr(fn, scale=1.0, res=None, trainable=False):
 72 |     latlong_img = torch.tensor(util.load_image(fn), dtype=torch.float32, device='cuda')*scale
 73 | 
 74 |     if res is not None:
 75 |         texcoord = util.pixel_grid(res[1], res[0])
 76 |         latlong_img = torch.clamp(dr.texture(latlong_img[None, ...], texcoord[None, ...], filter_mode='linear')[0], min=0.0001)
 77 | 
 78 |     print("EnvProbe,", latlong_img.shape, ", min/max", torch.min(latlong_img).item(), torch.max(latlong_img).item())
 79 |     if trainable:
 80 |         print("trainable light loaded")
 81 |         return EnvironmentLight(base=latlong_img.clone().detach().requires_grad_(True))
 82 |     else:
 83 |         return EnvironmentLight(base=latlong_img)
 84 | 
 85 | @torch.no_grad()
 86 | def load_env(fn, scale=1.0, res=None, trainable=False):
 87 |     if os.path.splitext(fn)[1].lower() == ".hdr":
 88 |         return _load_env_hdr(fn, scale, res, trainable=trainable)
 89 |     else:
 90 |         assert False, "Unknown envlight extension %s" % os.path.splitext(fn)[1]
 91 | 
 92 | @torch.no_grad()
 93 | def save_env_map(fn, light):
 94 |     assert isinstance(light, EnvironmentLight)
 95 |     color = light.generate_image([512, 1024])
 96 |     util.save_image_raw(fn, color.detach().cpu().numpy())
 97 | 
 98 | ######################################################################################
 99 | # Create trainable with random initialization
100 | ######################################################################################
101 | 
102 | def create_trainable_env_rnd(base_res, scale=0.5, bias=0.25):  
103 |     base = torch.rand(base_res, base_res, 3, dtype=torch.float32, device='cuda') * scale + bias
104 |     l = EnvironmentLight(base.clone().detach().requires_grad_(True))
105 |     return l
106 |     


--------------------------------------------------------------------------------
/render/material.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | from . import util
 14 | from . import texture
 15 | from . import mlptexture
 16 | 
 17 | ######################################################################################
 18 | # .mtl material format loading / storing
 19 | ######################################################################################
 20 | 
 21 | def load_mtl(fn, clear_ks=True):
 22 |     import re
 23 |     mtl_path = os.path.dirname(fn)
 24 | 
 25 |     # Read file
 26 |     with open(fn, 'r') as f:
 27 |         lines = f.readlines()
 28 | 
 29 |     # Parse materials
 30 |     materials = []
 31 |     for line in lines:
 32 |         split_line = re.split(' +|\t+|\n+', line.strip())
 33 |         prefix = split_line[0].lower()
 34 |         data = split_line[1:]
 35 |         if 'newmtl' in prefix:
 36 |             material = {'name' : data[0]}
 37 |             materials += [material]
 38 |         elif materials:
 39 |             if 'bsdf' in prefix or 'map_kd' in prefix or 'map_ks' in prefix or 'bump' in prefix:
 40 |                 material[prefix] = data[0]
 41 |             else:
 42 |                 material[prefix] = torch.tensor(tuple(float(d) for d in data), dtype=torch.float32, device='cuda')
 43 | 
 44 |     # Convert everything to textures. Our code expects 'kd' and 'ks' to be texture maps. So replace constants with 1x1 maps
 45 |     for mat in materials:
 46 |         if not 'bsdf' in mat:
 47 |             mat['bsdf'] = 'pbr'
 48 | 
 49 |         if 'map_kd' in mat:
 50 |             mat['kd'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_kd']))
 51 |         else:
 52 |             mat['kd'] = texture.Texture2D(mat['kd'])
 53 |         
 54 |         if 'map_ks' in mat:
 55 |             mat['ks'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_ks']), channels=3)
 56 |         else:
 57 |             mat['ks'] = texture.Texture2D(mat['ks'])
 58 | 
 59 |         if 'bump' in mat:
 60 |             mat['normal'] = texture.load_texture2D(os.path.join(mtl_path, mat['bump']), lambda_fn=lambda x: x * 2 - 1, channels=3)
 61 | 
 62 |         # Convert Kd from sRGB to linear RGB
 63 |         mat['kd'] = texture.srgb_to_rgb(mat['kd'])
 64 | 
 65 |         if clear_ks:
 66 |             # Override ORM occlusion (red) channel by zeros. We hijack this channel
 67 |             for mip in mat['ks'].getMips():
 68 |                 mip[..., 0] = 0.0 
 69 | 
 70 |     return materials
 71 | 
 72 | def save_mtl(fn, material):
 73 |     folder = os.path.dirname(fn)
 74 |     with open(fn, "w") as f:
 75 |         f.write('newmtl defaultMat\n')
 76 |         if material is not None:
 77 |             f.write('bsdf   %s\n' % material['bsdf'])
 78 |             if 'kd' in material.keys():
 79 |                 f.write('map_Kd texture_kd.png\n')
 80 |                 texture.save_texture2D(os.path.join(folder, 'texture_kd.png'), texture.rgb_to_srgb(material['kd']))
 81 |             if 'ks' in material.keys():
 82 |                 f.write('map_Ks texture_ks.png\n')
 83 |                 texture.save_texture2D(os.path.join(folder, 'texture_ks.png'), material['ks'])
 84 |             if 'normal' in material.keys():
 85 |                 f.write('bump texture_n.png\n')
 86 |                 texture.save_texture2D(os.path.join(folder, 'texture_n.png'), material['normal'], lambda_fn=lambda x:(util.safe_normalize(x)+1)*0.5)
 87 |         else:
 88 |             f.write('Kd 1 1 1\n')
 89 |             f.write('Ks 0 0 0\n')
 90 |             f.write('Ka 0 0 0\n')
 91 |             f.write('Tf 1 1 1\n')
 92 |             f.write('Ni 1\n')
 93 |             f.write('Ns 0\n')
 94 | 
 95 | ######################################################################################
 96 | # Utility function to convert an existing material and make all textures trainable
 97 | ######################################################################################
 98 | 
 99 | def create_trainable(material):
100 |     result = material.copy()
101 |     for key, val in result.items():
102 |         if isinstance(val, texture.Texture2D):
103 |             result[key] = texture.create_trainable(val)
104 |     return result
105 | 
106 | def get_parameters(material):
107 |     trainable = []
108 |     for key, val in material.items():
109 |         if isinstance(val, texture.Texture2D) or isinstance(val, mlptexture.MLPTexture3D):
110 |             trainable += val.parameters()
111 |     return trainable
112 | 
113 | ######################################################################################
114 | # Merge multiple materials into a single uber-material
115 | ######################################################################################
116 | 
117 | def _upscale_replicate(x, full_res):
118 |     x = x.permute(0, 3, 1, 2)
119 |     x = torch.nn.functional.pad(x, (0, full_res[1] - x.shape[3], 0, full_res[0] - x.shape[2]), 'replicate')
120 |     return x.permute(0, 2, 3, 1).contiguous()
121 | 
122 | def merge_materials(materials, texcoords, tfaces, mfaces):
123 |     assert len(materials) > 0
124 |     for mat in materials:
125 |         assert mat['bsdf'] == materials[0]['bsdf'], "All materials must have the same BSDF (uber shader)"
126 |         assert ('normal' in mat) is ('normal' in materials[0]), "All materials must have either normal map enabled or disabled"
127 | 
128 |     uber_material = {
129 |         'name' : 'uber_material',
130 |         'bsdf' : materials[0]['bsdf'],
131 |     }
132 | 
133 |     textures = ['kd', 'ks', 'normal']
134 | 
135 |     # Find maximum texture resolution across all materials and textures
136 |     max_res = None
137 |     for mat in materials:
138 |         for tex in textures:
139 |             tex_res = np.array(mat[tex].getRes()) if tex in mat else np.array([1, 1])
140 |             max_res = np.maximum(max_res, tex_res) if max_res is not None else tex_res
141 |     
142 |     # Compute size of compund texture and round up to nearest PoT
143 |     full_res = 2**np.ceil(np.log2(max_res * np.array([1, len(materials)]))).astype(np.int)
144 | 
145 |     # Normalize texture resolution across all materials & combine into a single large texture
146 |     for tex in textures:
147 |         if tex in materials[0]:
148 |             tex_data = torch.cat(tuple(util.scale_img_nhwc(mat[tex].data, tuple(max_res)) for mat in materials), dim=2) # Lay out all textures horizontally, NHWC so dim2 is x
149 |             tex_data = _upscale_replicate(tex_data, full_res)
150 |             uber_material[tex] = texture.Texture2D(tex_data)
151 | 
152 |     # Compute scaling values for used / unused texture area
153 |     s_coeff = [full_res[0] / max_res[0], full_res[1] / max_res[1]]
154 | 
155 |     # Recompute texture coordinates to cooincide with new composite texture
156 |     new_tverts = {}
157 |     new_tverts_data = []
158 |     for fi in range(len(tfaces)):
159 |         matIdx = mfaces[fi]
160 |         for vi in range(3):
161 |             ti = tfaces[fi][vi]
162 |             if not (ti in new_tverts):
163 |                 new_tverts[ti] = {}
164 |             if not (matIdx in new_tverts[ti]): # create new vertex
165 |                 new_tverts_data.append([(matIdx + texcoords[ti][0]) / s_coeff[1], texcoords[ti][1] / s_coeff[0]]) # Offset texture coodrinate (x direction) by material id & scale to local space. Note, texcoords are (u,v) but texture is stored (w,h) so the indexes swap here
166 |                 new_tverts[ti][matIdx] = len(new_tverts_data) - 1
167 |             tfaces[fi][vi] = new_tverts[ti][matIdx] # reindex vertex
168 | 
169 |     return uber_material, new_tverts_data, tfaces
170 | 


--------------------------------------------------------------------------------
/render/mlptexture.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
  2 | #
  3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | # property and proprietary rights in and to this material, related
  5 | # documentation and any modifications thereto. Any use, reproduction, 
  6 | # disclosure or distribution of this material and related documentation 
  7 | # without an express license agreement from NVIDIA CORPORATION or 
  8 | # its affiliates is strictly prohibited.
  9 | 
 10 | import torch
 11 | import tinycudann as tcnn
 12 | import numpy as np
 13 | 
 14 | #######################################################################################################################################################
 15 | # Small MLP using PyTorch primitives, internal helper class
 16 | #######################################################################################################################################################
 17 | 
 18 | class _MLP(torch.nn.Module):
 19 |     def __init__(self, cfg, loss_scale=1.0):
 20 |         super(_MLP, self).__init__()
 21 |         self.loss_scale = loss_scale
 22 |         net = (torch.nn.Linear(cfg['n_input_dims'], cfg['n_neurons'], bias=False), torch.nn.ReLU())
 23 |         for i in range(cfg['n_hidden_layers']-1):
 24 |             net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_neurons'], bias=False), torch.nn.ReLU())
 25 |         net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_output_dims'], bias=False),)
 26 |         self.net = torch.nn.Sequential(*net).cuda()
 27 |         
 28 |         self.net.apply(self._init_weights)
 29 |         
 30 |         if self.loss_scale != 1.0:
 31 |             self.net.register_full_backward_hook(lambda module, grad_i, grad_o: (grad_i[0] * self.loss_scale, ))
 32 | 
 33 |     def forward(self, x):
 34 |         return self.net(x.to(torch.float32))
 35 | 
 36 |     @staticmethod
 37 |     def _init_weights(m):
 38 |         if type(m) == torch.nn.Linear:
 39 |             torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
 40 |             if hasattr(m.bias, 'data'):
 41 |                 m.bias.data.fill_(0.0)
 42 | 
 43 | #######################################################################################################################################################
 44 | # Outward visible MLP class
 45 | #######################################################################################################################################################
 46 | 
 47 | class MLPTexture3D(torch.nn.Module):
 48 |     def __init__(self, AABB, channels = 3, internal_dims = 32, hidden = 2, min_max = None, use_float16=False):
 49 |         super(MLPTexture3D, self).__init__()
 50 | 
 51 |         self.channels = channels
 52 |         self.internal_dims = internal_dims
 53 |         self.AABB = AABB
 54 |         self.min_max = min_max
 55 |         self.use_float16 = use_float16
 56 | 
 57 |         # Setup positional encoding, see https://github.com/NVlabs/tiny-cuda-nn for details
 58 |         desired_resolution = 4096
 59 |         base_grid_resolution = 16
 60 |         num_levels = 16
 61 |         per_level_scale = np.exp(np.log(desired_resolution / base_grid_resolution) / (num_levels-1))
 62 | 
 63 |         enc_cfg =  {
 64 |             "otype": "HashGrid",
 65 |             "n_levels": num_levels,
 66 |             "n_features_per_level": 2,
 67 |             "log2_hashmap_size": 19,
 68 |             "base_resolution": base_grid_resolution,
 69 |             "per_level_scale" : per_level_scale
 70 | 	    }
 71 | 
 72 |         gradient_scaling = 128.0
 73 |         self.encoder = tcnn.Encoding(3, enc_cfg)
 74 |         self.encoder.register_full_backward_hook(lambda module, grad_i, grad_o: (grad_i[0] / gradient_scaling, ))
 75 | 
 76 |         # Setup MLP
 77 |         mlp_cfg = {
 78 |             "n_input_dims" : self.encoder.n_output_dims,
 79 |             "n_output_dims" : self.channels,
 80 |             "n_hidden_layers" : hidden,
 81 |             "n_neurons" : self.internal_dims
 82 |         }
 83 |         self.net = _MLP(mlp_cfg, gradient_scaling)
 84 |         print("Encoder output: %d dims" % (self.encoder.n_output_dims))
 85 | 
 86 |     # Sample texture at a given location
 87 |     def sample(self, texc):
 88 |         _texc = (texc.view(-1, 3) - self.AABB[0][None, ...]) / (self.AABB[1][None, ...] - self.AABB[0][None, ...])
 89 |         _texc = torch.clamp(_texc, min=0, max=1)
 90 |         
 91 |         p_enc = self.encoder(_texc.contiguous())
 92 |         with torch.autocast('cuda', dtype=torch.float16, enabled=self.use_float16):
 93 |             out = self.net.forward(p_enc)
 94 | 
 95 |         # Sigmoid limit and scale to the allowed range
 96 |         out = torch.sigmoid(out) * (self.min_max[1][None, :] - self.min_max[0][None, :]) + self.min_max[0][None, :]
 97 | 
 98 |         return out.view(*texc.shape[:-1], self.channels) # Remap to [n, h, w, c]
 99 | 
100 |     # In-place clamp with no derivative to make sure values are in valid range after training
101 |     def clamp_(self):
102 |         pass
103 | 
104 |     def cleanup(self):
105 |         tcnn.free_temporary_memory()
106 | 
107 | 


--------------------------------------------------------------------------------
/render/optixutils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto. Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | from .ops import OptiXContext, optix_build_bvh, optix_env_shade, bilateral_denoiser
10 | __all__ = ["OptiXContext", "optix_build_bvh", "optix_env_shade", 'bilateral_denoiser']
11 | 


--------------------------------------------------------------------------------
/render/optixutils/c_src/common.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | 
11 | // Helper functions to do broadcast guarded fetches
12 | #if defined(__CUDACC__)
13 |     template<class T, typename U, typename... Args>
14 |     static __device__ inline float3 fetch3(const T &tensor, U idx, Args... args) {
15 |     return tensor.size(0) == 1 ? fetch3(tensor[0], args...) : fetch3(tensor[idx], args...);
16 |     }
17 |     template<class T> static __device__ inline float3 fetch3(const T &tensor) {
18 |     return tensor.size(0) == 1 ? make_float3(tensor[0], tensor[0], tensor[0]) : make_float3(tensor[0], tensor[1], tensor[2]);
19 |     }
20 | 
21 |     template<class T, typename U, typename... Args>
22 |     static __device__ inline float2 fetch2(const T &tensor, U idx, Args... args) {
23 |     return tensor.size(0) == 1 ? fetch2(tensor[0], args...) : fetch2(tensor[idx], args...);
24 |     }
25 |     template<class T> static __device__ inline float2 fetch2(const T &tensor) {
26 |     return tensor.size(0) == 1 ? make_float2(tensor[0], tensor[0]) : make_float2(tensor[0], tensor[1]);
27 |     }
28 | 
29 |     #include "math_utils.h"
30 |     #include "bsdf.h"
31 | #endif
32 | 
33 | //------------------------------------------------------------------------------
34 | // CUDA error-checking macros
35 | //------------------------------------------------------------------------------
36 | 
37 | #define CUDA_CHECK( call )                                                     \
38 |     do                                                                         \
39 |     {                                                                          \
40 |         cudaError_t error = call;                                              \
41 |         if( error != cudaSuccess )                                             \
42 |         {                                                                      \
43 |             std::stringstream ss;                                              \
44 |             ss << "CUDA call (" << #call << " ) failed with error: '"          \
45 |                << cudaGetErrorString( error )                                  \
46 |                << "' (" __FILE__ << ":" << __LINE__ << ")\n";                  \
47 |         }                                                                      \
48 |     } while( 0 )
49 | 
50 | 
51 | #define OPTIX_CHECK( call )                                                    \
52 |     do                                                                         \
53 |     {                                                                          \
54 |         OptixResult res = call;                                                \
55 |         if( res != OPTIX_SUCCESS )                                             \
56 |         {                                                                      \
57 |             std::stringstream ss;                                              \
58 |             ss << "Optix call '" << #call << "' failed: " __FILE__ ":"         \
59 |                << __LINE__ << ")\n";                                           \
60 |         }                                                                      \
61 |     } while( 0 )
62 | 
63 | #define OPTIX_CHECK_LOG( call )                                                \
64 |     do                                                                         \
65 |     {                                                                          \
66 |         OptixResult res = call;                                                \
67 |         const size_t sizeof_log_returned = sizeof_log;                         \
68 |         sizeof_log = sizeof( log ); /* reset sizeof_log for future calls */    \
69 |         if( res != OPTIX_SUCCESS )                                             \
70 |         {                                                                      \
71 |             std::stringstream ss;                                              \
72 |             ss << "Optix call '" << #call << "' failed: " __FILE__ ":"         \
73 |                << __LINE__ << ")\nLog:\n" << log                               \
74 |                << ( sizeof_log_returned > sizeof( log ) ? "<TRUNCATED>" : "" ) \
75 |                << "\n";                                                        \
76 |         }                                                                      \
77 |     } while( 0 )
78 | 
79 | #define NVRTC_CHECK_ERROR( func )                                                                                           \
80 |     do                                                                                                                      \
81 |     {                                                                                                                       \
82 |         nvrtcResult code = func;                                                                                            \
83 |         if( code != NVRTC_SUCCESS )                                                                                         \
84 |             throw std::runtime_error( "ERROR: " __FILE__ "(): " + std::string( nvrtcGetErrorString( code ) ) );             \
85 |     } while( 0 )
86 | 


--------------------------------------------------------------------------------
/render/optixutils/c_src/denoising.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
  2 | //
  3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | // and proprietary rights in and to this software, related documentation
  5 | // and any modifications thereto. Any use, reproduction, disclosure or
  6 | // distribution of this software and related documentation without an express
  7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | #include "common.h"
 10 | #include "denoising.h"
 11 | 
 12 | #define FLT_EPS 0.0001f
 13 | 
 14 | __global__ void bilateral_denoiser_fwd_kernel(BilateralDenoiserParams params)
 15 | {
 16 |     uint3 idx = make_uint3(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z);
 17 | 
 18 |     if (idx.z >= params.col.size(0) || idx.y >= params.col.size(1) || idx.x >= params.col.size(2))
 19 |         return;
 20 | 
 21 |     // Fetch central tap
 22 |     float3 c_nrm = fetch3(params.nrm, idx.z, idx.y, idx.x);
 23 |     float2 c_zdz = fetch2(params.zdz, idx.z, idx.y, idx.x);
 24 | 
 25 |     float variance = params.sigma * params.sigma;
 26 |     int filter_rad = 2 * ceil(params.sigma * 2.5) + 1;
 27 | 
 28 |     float accum_w = 0.0f;
 29 |     float3 accum_col = make_float3(0.0f);
 30 |     for (int32_t fy = -filter_rad; fy <= filter_rad; ++fy)
 31 |     {
 32 |         for (int32_t fx = -filter_rad; fx <= filter_rad; ++fx)
 33 |         {
 34 |             // Compute tap coordinates, used for input activations and bilateral guides
 35 |             int32_t y = idx.y + fy;
 36 |             int32_t x = idx.x + fx;
 37 | 
 38 |             if (y < 0 || x < 0 || y >= params.col.size(1) || x >= params.col.size(2))
 39 |                 continue;
 40 | 
 41 |             // Fetch current tap
 42 |             float3 t_col = fetch3(params.col, idx.z, y, x);
 43 |             float3 t_nrm = fetch3(params.nrm, idx.z, y, x);
 44 |             float2 t_zdz = fetch2(params.zdz, idx.z, y, x);
 45 | 
 46 |             /////////////////////////////////////////////////////////
 47 |             // Compute bilateral weight
 48 |             /////////////////////////////////////////////////////////
 49 | 
 50 |             // Distance
 51 |             float dist_sqr = fx * fx + fy * fy;
 52 |             float dist = sqrtf(dist_sqr);
 53 |             float w_xy = expf(-dist_sqr / (2.0f * variance));
 54 | 
 55 |             // Normal
 56 |             float w_normal = powf(min(max(dot(t_nrm, c_nrm), FLT_EPS), 1.0f), 128.0f);
 57 | 
 58 |             // Depth
 59 |             float w_depth = expf(-(abs(t_zdz.x - c_zdz.x) / max(c_zdz.y * dist, FLT_EPS)));
 60 | 
 61 |             float w = w_xy * w_normal * w_depth;
 62 | 
 63 |             accum_col = accum_col + t_col * w;
 64 |             accum_w += w;
 65 |         }
 66 |     }
 67 | 
 68 |     params.out[idx.z][idx.y][idx.x][0] = accum_col.x;
 69 |     params.out[idx.z][idx.y][idx.x][1] = accum_col.y;
 70 |     params.out[idx.z][idx.y][idx.x][2] = accum_col.z;
 71 |     params.out[idx.z][idx.y][idx.x][3] = max(accum_w, 0.0001f);
 72 | }
 73 | 
 74 | __global__ void bilateral_denoiser_bwd_kernel(BilateralDenoiserParams params)
 75 | {
 76 |     uint3 idx = make_uint3(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, blockIdx.z * blockDim.z + threadIdx.z);
 77 | 
 78 |     if (idx.z >= params.col.size(0) || idx.y >= params.col.size(1) || idx.x >= params.col.size(2))
 79 |         return;
 80 | 
 81 |     // Fetch central tap
 82 |     float3 c_nrm = fetch3(params.nrm, idx.z, idx.y, idx.x);
 83 |     float2 c_zdz = fetch2(params.zdz, idx.z, idx.y, idx.x);
 84 | 
 85 |     float variance = params.sigma * params.sigma;
 86 |     int filter_rad = 2 * ceil(params.sigma * 2.5) + 1;
 87 | 
 88 |     float3 accum_grad = make_float3(0.0f);
 89 |     for (int32_t fy = -filter_rad; fy <= filter_rad; ++fy)
 90 |     {
 91 |         for (int32_t fx = -filter_rad; fx <= filter_rad; ++fx)
 92 |         {
 93 |             // Compute tap coordinates, used for input activations and bilateral guides
 94 |             int32_t y = idx.y + fy;
 95 |             int32_t x = idx.x + fx;
 96 | 
 97 |             if (y < 0 || x < 0 || y >= params.col.size(1) || x >= params.col.size(2))
 98 |                 continue;
 99 | 
100 |             // Fetch current tap
101 |             float3 t_col = fetch3(params.col, idx.z, y, x);
102 |             float3 t_nrm = fetch3(params.nrm, idx.z, y, x);
103 |             float2 t_zdz = fetch2(params.zdz, idx.z, y, x);
104 | 
105 |             /////////////////////////////////////////////////////////
106 |             // Compute bilateral weight
107 |             /////////////////////////////////////////////////////////
108 | 
109 |             // Distance, transposing fx & fy doesn't affect distance
110 |             float dist_sqr = fx * fx + fy * fy;
111 |             float dist = sqrtf(dist_sqr);
112 |             float w_xy = expf(-dist_sqr / (2.0f * variance));
113 | 
114 |             // Normal, transpose c_ and t_ (it's symmetric so doesn't matter)
115 |             float w_normal = powf(min(max(dot(t_nrm, c_nrm), FLT_EPS), 1.0f), 128.0f);
116 | 
117 |             // Depth, transpose c_ and t_ (matters for the denominator)
118 |             float w_depth = expf(-(abs(t_zdz.x - c_zdz.x) / max(t_zdz.y * dist, FLT_EPS)));
119 | 
120 |             float w = w_xy * w_normal * w_depth;
121 | 
122 |             float3 t_col_grad = w * fetch3(params.out_grad, idx.z, y, x);
123 |             accum_grad += t_col_grad;
124 |         }
125 |     }
126 | 
127 |     params.col_grad[idx.z][idx.y][idx.x][0] = accum_grad.x;
128 |     params.col_grad[idx.z][idx.y][idx.x][1] = accum_grad.y;
129 |     params.col_grad[idx.z][idx.y][idx.x][2] = accum_grad.z;
130 | }
131 | 


--------------------------------------------------------------------------------
/render/optixutils/c_src/denoising.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | #include "accessor.h"
11 | 
12 | struct BilateralDenoiserParams
13 | {
14 |     PackedTensorAccessor32<float, 4> col;
15 |     PackedTensorAccessor32<float, 4> col_grad;  
16 |     PackedTensorAccessor32<float, 4> nrm;
17 |     PackedTensorAccessor32<float, 4> zdz;
18 |     PackedTensorAccessor32<float, 4> out;
19 |     PackedTensorAccessor32<float, 4> out_grad;
20 |     float sigma;
21 | };
22 | 


--------------------------------------------------------------------------------
/render/optixutils/c_src/envsampling/params.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #include "../accessor.h"
10 | 
11 | struct EnvSamplingParams
12 | {
13 |     // Ray data
14 |     PackedTensorAccessor32<float, 4>    ro;             // ray origin
15 |     
16 |     // GBuffer
17 |     PackedTensorAccessor32<float, 3>    mask;
18 |     PackedTensorAccessor32<float, 4>    gb_pos;
19 |     PackedTensorAccessor32<float, 4>    gb_pos_grad;
20 |     PackedTensorAccessor32<float, 4>    gb_normal;
21 |     PackedTensorAccessor32<float, 4>    gb_normal_grad;
22 |     PackedTensorAccessor32<float, 4>    gb_view_pos;
23 |     PackedTensorAccessor32<float, 4>    gb_kd;
24 |     PackedTensorAccessor32<float, 4>    gb_kd_grad;
25 |     PackedTensorAccessor32<float, 4>    gb_ks;
26 |     PackedTensorAccessor32<float, 4>    gb_ks_grad;
27 |     
28 |     // Light
29 |     PackedTensorAccessor32<float, 3>    light;
30 |     PackedTensorAccessor32<float, 3>    light_grad;
31 |     PackedTensorAccessor32<float, 2>    pdf;        // light pdf
32 |     PackedTensorAccessor32<float, 1>    rows;       // light sampling cdf
33 |     PackedTensorAccessor32<float, 2>    cols;       // light sampling cdf
34 | 
35 |     // Output
36 |     PackedTensorAccessor32<float, 4>    diff;
37 |     PackedTensorAccessor32<float, 4>    diff_grad;
38 |     PackedTensorAccessor32<float, 4>    spec;
39 |     PackedTensorAccessor32<float, 4>    spec_grad;
40 | 
41 |     // Table with random permutations for stratified sampling
42 |     PackedTensorAccessor32<int, 2>      perms;
43 | 
44 |     OptixTraversableHandle              handle;
45 |     unsigned int                        BSDF;
46 |     unsigned int                        n_samples_x;
47 |     unsigned int                        rnd_seed;
48 |     unsigned int                        backward;
49 |     float                               shadow_scale;
50 | };


--------------------------------------------------------------------------------
/render/optixutils/c_src/optix_wrapper.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 2 | //
 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | // and proprietary rights in and to this software, related documentation
 5 | // and any modifications thereto. Any use, reproduction, disclosure or
 6 | // distribution of this software and related documentation without an express
 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | #pragma once
10 | 
11 | #include <optix.h>
12 | #include <string>
13 | 
14 | //------------------------------------------------------------------------
15 | // Python OptiX state wrapper.
16 | 
17 | struct OptiXState
18 | {
19 |     OptixDeviceContext context;
20 |     OptixTraversableHandle gas_handle;
21 |     CUdeviceptr            d_gas_output_buffer;
22 | 
23 |     // Differentiable env sampling
24 |     OptixPipeline pipelineEnvSampling;
25 |     OptixShaderBindingTable sbtEnvSampling;
26 |     OptixModule moduleEnvSampling;
27 | };
28 | 
29 | 
30 | class OptiXStateWrapper
31 | {
32 | public:
33 |     OptiXStateWrapper     (const std::string &path, const std::string &cuda_path);
34 |     ~OptiXStateWrapper    (void);
35 |     
36 |     OptiXState*           pState;
37 | };
38 | 
39 | 


--------------------------------------------------------------------------------
/render/optixutils/include/optix.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
 4 |  *
 5 |  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
 6 |  * rights in and to this software, related documentation and any modifications thereto.
 7 |  * Any use, reproduction, disclosure or distribution of this software and related
 8 |  * documentation without an express license agreement from NVIDIA Corporation is strictly
 9 |  * prohibited.
10 |  *
11 |  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
12 |  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
13 |  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 |  * PARTICULAR PURPOSE.  IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
15 |  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
16 |  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
17 |  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18 |  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
19 |  * SUCH DAMAGES
20 |  */
21 | 
22 | /// @file
23 | /// @author NVIDIA Corporation
24 | /// @brief  OptiX public API header
25 | ///
26 | /// Includes the host api if compiling host code, includes the cuda api if compiling device code.
27 | /// For the math library routines include optix_math.h
28 | 
29 | #ifndef __optix_optix_h__
30 | #define __optix_optix_h__
31 | 
32 | /// The OptiX version.
33 | ///
34 | /// - major =  OPTIX_VERSION/10000
35 | /// - minor = (OPTIX_VERSION%10000)/100
36 | /// - micro =  OPTIX_VERSION%100
37 | #define OPTIX_VERSION 70300
38 | 
39 | 
40 | #ifdef __CUDACC__
41 | #include "optix_device.h"
42 | #else
43 | #include "optix_host.h"
44 | #endif
45 | 
46 | 
47 | #endif  // __optix_optix_h__
48 | 


--------------------------------------------------------------------------------
/render/optixutils/include/optix_device.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
 4 |  *
 5 |  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
 6 |  * rights in and to this software, related documentation and any modifications thereto.
 7 |  * Any use, reproduction, disclosure or distribution of this software and related
 8 |  * documentation without an express license agreement from NVIDIA Corporation is strictly
 9 |  * prohibited.
10 |  *
11 |  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
12 |  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
13 |  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 |  * PARTICULAR PURPOSE.  IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
15 |  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
16 |  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
17 |  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18 |  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
19 |  * SUCH DAMAGES
20 |  */
21 | 
22 |  /**
23 |  * @file   optix_device.h
24 |  * @author NVIDIA Corporation
25 |  * @brief  OptiX public API
26 |  *
27 |  * OptiX public API Reference - Host/Device side
28 |  */
29 | 
30 | /******************************************************************************\
31 |  * optix_cuda.h
32 |  *
33 |  * This file provides the nvcc interface for generating PTX that the OptiX is
34 |  * capable of parsing and weaving into the final kernel.  This is included by
35 |  * optix.h automatically if compiling device code.  It can be included explicitly
36 |  * in host code if desired.
37 |  *
38 | \******************************************************************************/
39 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__)
40 | #  define __OPTIX_INCLUDE_INTERNAL_HEADERS__
41 | #  define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__
42 | #endif
43 | #include "optix_7_device.h"
44 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__ )
45 | #  undef __OPTIX_INCLUDE_INTERNAL_HEADERS__
46 | #  undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_DEVICE_H__
47 | #endif
48 | 


--------------------------------------------------------------------------------
/render/optixutils/include/optix_function_table_definition.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
 3 |  *
 4 |  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
 5 |  * rights in and to this software, related documentation and any modifications thereto.
 6 |  * Any use, reproduction, disclosure or distribution of this software and related
 7 |  * documentation without an express license agreement from NVIDIA Corporation is strictly
 8 |  * prohibited.
 9 |  *
10 |  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
11 |  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
12 |  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
13 |  * PARTICULAR PURPOSE.  IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
14 |  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
15 |  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
16 |  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
17 |  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
18 |  * SUCH DAMAGES
19 |  */
20 | 
21 | /// @file
22 | /// @author NVIDIA Corporation
23 | /// @brief  OptiX public API header
24 | 
25 | #ifndef __optix_optix_function_table_definition_h__
26 | #define __optix_optix_function_table_definition_h__
27 | 
28 | #include "optix_function_table.h"
29 | 
30 | #ifdef __cplusplus
31 | extern "C" {
32 | #endif
33 | 
34 | /** \addtogroup optix_function_table
35 | @{
36 | */
37 | 
38 | /// If the stubs in optix_stubs.h are used, then the function table needs to be defined in exactly
39 | /// one translation unit. This can be achieved by including this header file in that translation
40 | /// unit.
41 | OptixFunctionTable g_optixFunctionTable;
42 | 
43 | /*@}*/  // end group optix_function_table
44 | 
45 | #ifdef __cplusplus
46 | }
47 | #endif
48 | 
49 | #endif  // __optix_optix_function_table_definition_h__
50 | 


--------------------------------------------------------------------------------
/render/optixutils/include/optix_host.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
 4 |  *
 5 |  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
 6 |  * rights in and to this software, related documentation and any modifications thereto.
 7 |  * Any use, reproduction, disclosure or distribution of this software and related
 8 |  * documentation without an express license agreement from NVIDIA Corporation is strictly
 9 |  * prohibited.
10 |  *
11 |  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
12 |  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
13 |  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 |  * PARTICULAR PURPOSE.  IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
15 |  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
16 |  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
17 |  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18 |  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
19 |  * SUCH DAMAGES
20 |  */
21 | 
22 | /**
23 |  * @file   optix_host.h
24 |  * @author NVIDIA Corporation
25 |  * @brief  OptiX public API
26 |  *
27 |  * OptiX public API Reference - Host side
28 |  */
29 | 
30 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__)
31 | #  define __OPTIX_INCLUDE_INTERNAL_HEADERS__
32 | #  define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__
33 | #endif
34 | #include "optix_7_host.h"
35 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__ )
36 | #  undef __OPTIX_INCLUDE_INTERNAL_HEADERS__
37 | #  undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_HOST_H__
38 | #endif
39 | 


--------------------------------------------------------------------------------
/render/optixutils/include/optix_types.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
 3 |  *
 4 |  * NVIDIA Corporation and its licensors retain all intellectual property and proprietary
 5 |  * rights in and to this software, related documentation and any modifications thereto.
 6 |  * Any use, reproduction, disclosure or distribution of this software and related
 7 |  * documentation without an express license agreement from NVIDIA Corporation is strictly
 8 |  * prohibited.
 9 |  *
10 |  * TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED *AS IS*
11 |  * AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS OR IMPLIED,
12 |  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
13 |  * PARTICULAR PURPOSE.  IN NO EVENT SHALL NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY
14 |  * SPECIAL, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT
15 |  * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF
16 |  * BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
17 |  * INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF
18 |  * SUCH DAMAGES
19 |  */
20 | 
21 | /**
22 |  * @file   optix_types.h
23 |  * @author NVIDIA Corporation
24 |  * @brief  OptiX public API header
25 |  *
26 |  */
27 | 
28 | #ifndef __optix_optix_types_h__
29 | #define __optix_optix_types_h__
30 | 
31 | // clang-format off
32 | #if !defined(__OPTIX_INCLUDE_INTERNAL_HEADERS__)
33 | #  define __OPTIX_INCLUDE_INTERNAL_HEADERS__
34 | #  define __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__
35 | #endif
36 | #include "optix_7_types.h"
37 | #if defined( __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__ )
38 | #  undef __OPTIX_INCLUDE_INTERNAL_HEADERS__
39 | #  undef __UNDEF_OPTIX_INCLUDE_INTERNAL_HEADERS_OPTIX_TYPES_H__
40 | #endif
41 | // clang-format on
42 | 
43 | #endif // #ifndef __optix_optix_types_h__
44 | 


--------------------------------------------------------------------------------
/render/optixutils/ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import numpy as np
 10 | import os
 11 | import sys
 12 | import torch
 13 | import torch.utils.cpp_extension
 14 | 
 15 | #----------------------------------------------------------------------------
 16 | # C++/Cuda plugin compiler/loader.
 17 | 
 18 | _plugin = None
 19 | if _plugin is None:
 20 | 
 21 |     # Make sure we can find the necessary compiler and libary binaries.
 22 |     if os.name == 'nt':
 23 |         optix_include_dir = os.path.dirname(__file__) + r"\include"
 24 | 
 25 |         def find_cl_path():
 26 |             import glob
 27 |             for edition in ['Enterprise', 'Professional', 'BuildTools', 'Community']:
 28 |                 vs_editions = glob.glob(r"C:\Program Files (x86)\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition) \
 29 |                     + glob.glob(r"C:\Program Files\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition)
 30 |                 paths = sorted(vs_editions, reverse=True)
 31 |                 if paths:
 32 |                     return paths[0]
 33 | 
 34 |         # If cl.exe is not on path, try to find it.
 35 |         if os.system("where cl.exe >nul 2>nul") != 0:
 36 |             cl_path = find_cl_path()
 37 |             if cl_path is None:
 38 |                 raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
 39 |             os.environ['PATH'] += ';' + cl_path
 40 | 
 41 |     elif os.name == 'posix':
 42 |         optix_include_dir = os.path.dirname(__file__) + r"/include"
 43 | 
 44 |     include_paths = [optix_include_dir]
 45 | 
 46 |     # Compiler options.
 47 |     opts = ['-DNVDR_TORCH']
 48 | 
 49 |     # Linker options.
 50 |     if os.name == 'posix':
 51 |         ldflags = ['-lcuda', '-lnvrtc']
 52 |     elif os.name == 'nt':
 53 |         ldflags = ['cuda.lib', 'advapi32.lib', 'nvrtc.lib']
 54 | 
 55 |     # List of sources.
 56 |     source_files = [
 57 |         'c_src/denoising.cu',
 58 |         'c_src/optix_wrapper.cpp',
 59 |         'c_src/torch_bindings.cpp'
 60 |     ]
 61 | 
 62 |     # Some containers set this to contain old architectures that won't compile. We only need the one installed in the machine.
 63 |     os.environ['TORCH_CUDA_ARCH_LIST'] = ''
 64 | 
 65 |     # Compile and load.
 66 |     build_dir = os.path.join(os. path. dirname(__file__), 'build')
 67 |     os.makedirs(build_dir, exist_ok=True)
 68 |     source_paths = [os.path.join(os.path.dirname(__file__), fn) for fn in source_files]
 69 |     torch.utils.cpp_extension.load(name='optixutils_plugin', sources=source_paths, extra_cflags=opts,
 70 |          build_directory=build_dir,
 71 |          extra_cuda_cflags=opts, extra_ldflags=ldflags, extra_include_paths=include_paths, with_cuda=True, verbose=True)
 72 | 
 73 |     # Import, cache, and return the compiled module.
 74 |     import optixutils_plugin
 75 |     _plugin = optixutils_plugin
 76 | 
 77 | #----------------------------------------------------------------------------
 78 | # OptiX autograd func
 79 | #----------------------------------------------------------------------------
 80 | 
 81 | class _optix_env_shade_func(torch.autograd.Function):
 82 |     _random_perm = {}
 83 | 
 84 |     @staticmethod
 85 |     def forward(ctx, optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, BSDF, n_samples_x, rnd_seed, shadow_scale):
 86 |         _rnd_seed = np.random.randint(2**31) if rnd_seed is None else rnd_seed
 87 |         if n_samples_x not in _optix_env_shade_func._random_perm:
 88 |             # Generate (32k) tables with random permutations to decorrelate the BSDF and light stratified samples
 89 |             _optix_env_shade_func._random_perm[n_samples_x] = torch.argsort(torch.rand(32768, n_samples_x * n_samples_x, device="cuda"), dim=-1).int()
 90 | 
 91 |         diff, spec = _plugin.env_shade_fwd(optix_ctx.cpp_wrapper, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, _optix_env_shade_func._random_perm[n_samples_x], BSDF, n_samples_x, _rnd_seed, shadow_scale)
 92 |         ctx.save_for_backward(mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols)
 93 |         ctx.optix_ctx = optix_ctx
 94 |         ctx.BSDF = BSDF
 95 |         ctx.n_samples_x = n_samples_x
 96 |         ctx.rnd_seed = rnd_seed
 97 |         ctx.shadow_scale = shadow_scale
 98 |         return diff, spec
 99 |     
100 |     @staticmethod
101 |     def backward(ctx, diff_grad, spec_grad):
102 |         optix_ctx = ctx.optix_ctx
103 |         _rnd_seed = np.random.randint(2**31) if ctx.rnd_seed is None else ctx.rnd_seed
104 |         mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols = ctx.saved_variables
105 |         gb_pos_grad, gb_normal_grad, gb_kd_grad, gb_ks_grad, light_grad = _plugin.env_shade_bwd(
106 |             optix_ctx.cpp_wrapper, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, _optix_env_shade_func._random_perm[ctx.n_samples_x], 
107 |             ctx.BSDF, ctx.n_samples_x, _rnd_seed, ctx.shadow_scale, diff_grad, spec_grad)
108 |         return None, None, None, gb_pos_grad, gb_normal_grad, None, gb_kd_grad, gb_ks_grad, light_grad, None, None, None, None, None, None, None
109 | 
110 | class _bilateral_denoiser_func(torch.autograd.Function):
111 |     @staticmethod
112 |     def forward(ctx, col, nrm, zdz, sigma):
113 |         ctx.save_for_backward(col, nrm, zdz)
114 |         ctx.sigma = sigma
115 |         out = _plugin.bilateral_denoiser_fwd(col, nrm, zdz, sigma)
116 |         return out
117 |     
118 |     @staticmethod
119 |     def backward(ctx, out_grad):
120 |         col, nrm, zdz = ctx.saved_variables
121 |         col_grad = _plugin.bilateral_denoiser_bwd(col, nrm, zdz, ctx.sigma, out_grad)
122 |         return col_grad, None, None, None
123 | 
124 | #----------------------------------------------------------------------------
125 | # OptiX ray tracing utils
126 | #----------------------------------------------------------------------------
127 | 
128 | class OptiXContext:
129 |     def __init__(self):
130 |         print("Cuda path", torch.utils.cpp_extension.CUDA_HOME)
131 |         self.cpp_wrapper = _plugin.OptiXStateWrapper(os.path.dirname(__file__), torch.utils.cpp_extension.CUDA_HOME)
132 | 
133 | def optix_build_bvh(optix_ctx, verts, tris, rebuild):
134 |     '''
135 |         choose not to raise error since we may have msdf supervision.. should clean the code later
136 |     '''
137 |     # assert tris.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)"
138 |     # assert verts.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)"
139 |     _plugin.optix_build_bvh(optix_ctx.cpp_wrapper, verts.view(-1, 3), tris.view(-1, 3), rebuild)
140 | 
141 | def optix_env_shade(optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, BSDF='pbr', n_samples_x=8, rnd_seed=None, shadow_scale=1.0):
142 |     iBSDF = ['pbr', 'diffuse', 'white'].index(BSDF) # Ordering important, must match the order of the fwd/bwdPbrBSDF kernel.
143 |     return _optix_env_shade_func.apply(optix_ctx, mask, ro, gb_pos, gb_normal, gb_view_pos, gb_kd, gb_ks, light, pdf, rows, cols, iBSDF, n_samples_x, rnd_seed, shadow_scale)
144 | 
145 | def bilateral_denoiser(col, nrm, zdz, sigma):
146 |     col_w = _bilateral_denoiser_func.apply(col, nrm, zdz, sigma)
147 |     return col_w[..., 0:3] / col_w[..., 3:4]
148 | 


--------------------------------------------------------------------------------
/render/optixutils/tests/filter_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto. Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | from pickletools import read_float8
 10 | import torch
 11 | 
 12 | import os
 13 | import sys
 14 | import math
 15 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
 16 | import optixutils as ou
 17 | import numpy as np
 18 | 
 19 | RES = 1024
 20 | DTYPE = torch.float32
 21 | 
 22 | def length(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor:
 23 | 	return torch.sqrt(torch.clamp(dot(x,x), min=eps)) # Clamp to avoid nan gradients because grad(sqrt(0)) = NaN
 24 | 
 25 | def safe_normalize(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor:
 26 | 	return x / length(x, eps)
 27 | 
 28 | def dot(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 29 | 	return torch.sum(x*y, -1, keepdim=True)
 30 | 
 31 | class BilateralDenoiser(torch.nn.Module):
 32 | 	def __init__(self, sigma=1.0):
 33 | 		super(BilateralDenoiser, self).__init__()
 34 | 		self.set_sigma(sigma)
 35 | 
 36 | 	def set_sigma(self, sigma):
 37 | 		self.sigma = max(sigma, 0.0001)
 38 | 		self.variance = self.sigma**2.
 39 | 		self.N = 2 * math.ceil(self.sigma * 2.5) + 1
 40 | 
 41 | 	def forward(self, input):
 42 | 		eps    = 0.0001
 43 | 		col    = input[..., 0:3]
 44 | 		nrm    = input[..., 3:6]
 45 | 		kd     = input[..., 6:9]
 46 | 		zdz    = input[..., 9:11]
 47 | 
 48 | 		accum_col = torch.zeros_like(col)
 49 | 		accum_w = torch.zeros_like(col[..., 0:1])
 50 | 		for y in range(-self.N, self.N+1):
 51 | 			for x in range(-self.N, self.N+1):
 52 | 
 53 | 				ty, tx = torch.meshgrid(torch.arange(0, input.shape[1], dtype=torch.float32, device="cuda"), torch.arange(0, input.shape[2], dtype=torch.float32, device="cuda"))
 54 | 				tx = tx[None, ..., None] + x
 55 | 				ty = ty[None, ..., None] + y
 56 | 
 57 | 				dist_sqr = (x**2 + y**2)
 58 | 				dist = np.sqrt(dist_sqr)
 59 | 				w_xy = np.exp(-dist_sqr / (2 * self.variance))
 60 | 
 61 | 				with torch.no_grad():
 62 | 					nrm_tap = torch.roll(nrm, (-y, -x), (1, 2))
 63 | 					w_normal = torch.pow(torch.clamp(dot(nrm_tap, nrm), min=eps, max=1.0), 128.0)           # From SVGF
 64 | 
 65 | 					zdz_tap = torch.roll(zdz, (-y, -x), (1, 2))
 66 | 					w_depth = torch.exp(-(torch.abs(zdz_tap[..., 0:1] - zdz[..., 0:1]) / torch.clamp(zdz[..., 1:2] * dist, min=eps)) ) # From SVGF	
 67 | 
 68 | 					w = w_xy * w_normal * w_depth
 69 | 					w = torch.where((tx >= 0) & (tx < input.shape[2]) & (ty >= 0) & (ty < input.shape[1]), w, torch.zeros_like(w))
 70 | 
 71 | 				col_tap = torch.roll(col, (-y, -x), (1, 2))
 72 | 				accum_col += col_tap * w
 73 | 				accum_w += w
 74 | 		return accum_col / torch.clamp(accum_w, min=eps)
 75 | 
 76 | def relative_loss(name, ref, cuda):
 77 | 	ref = ref.float()
 78 | 	cuda = cuda.float()
 79 | 	denom = torch.where(ref > 1e-7, ref, torch.ones_like(ref))
 80 | 	relative = torch.abs(ref - cuda) / denom
 81 | 	print(name, torch.max(relative).item())
 82 | 
 83 | 
 84 | def test_filter():
 85 | 	img_cuda = torch.rand(1, RES, RES, 11, dtype=DTYPE, device='cuda')
 86 | 	img_cuda[..., 3:6] = safe_normalize(img_cuda[..., 3:6])
 87 | 	img_ref = img_cuda.clone().detach().requires_grad_(True)
 88 | 	img_cuda = img_cuda.clone().detach().requires_grad_(True)
 89 | 	target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
 90 | 	target_ref = target_cuda.clone().detach().requires_grad_(True)
 91 | 	
 92 | 	SIGMA = 2.0
 93 | 
 94 | 	start = torch.cuda.Event(enable_timing=True)
 95 | 	end = torch.cuda.Event(enable_timing=True)
 96 | 
 97 | 	start.record()
 98 | 	denoiser = BilateralDenoiser(sigma=SIGMA)
 99 | 	denoised_ref = denoiser.forward(img_ref)
100 | 	ref_loss = torch.nn.MSELoss()(denoised_ref, target_ref)
101 | 	ref_loss.backward()
102 | 	end.record()
103 | 	torch.cuda.synchronize()
104 | 	print("Python:", start.elapsed_time(end))
105 | 
106 | 	start.record()
107 | 	denoised_cuda = ou.svgf(img_cuda[..., 0:3], img_cuda[..., 3:6], img_cuda[..., 9:11], img_cuda[..., 6:9], SIGMA)
108 | 	cuda_loss = torch.nn.MSELoss()(denoised_cuda, target_cuda)
109 | 	cuda_loss.backward()
110 | 	end.record()
111 | 	torch.cuda.synchronize()
112 | 	print("CUDA:", start.elapsed_time(end))
113 | 
114 | 	print("-------------------------------------------------------------")
115 | 	print("    Filter loss:")
116 | 	print("-------------------------------------------------------------")
117 | 
118 | 	relative_loss("denoised:", denoised_ref[..., 0:3], denoised_cuda[..., 0:3])
119 | 	relative_loss("grad:", img_ref.grad[..., 0:3], img_cuda.grad[..., 0:3])
120 | 
121 | test_filter()


--------------------------------------------------------------------------------
/render/regularizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
  2 | #
  3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | # property and proprietary rights in and to this material, related
  5 | # documentation and any modifications thereto. Any use, reproduction, 
  6 | # disclosure or distribution of this material and related documentation 
  7 | # without an express license agreement from NVIDIA CORPORATION or 
  8 | # its affiliates is strictly prohibited.
  9 | 
 10 | import torch
 11 | import nvdiffrast.torch as dr
 12 | 
 13 | from render import util
 14 | from . import mesh
 15 | 
 16 | def luma(x):
 17 |     return ((x[..., 0:1] + x[..., 1:2] + x[..., 2:3]) / 3).repeat(1, 1, 1, 3)
 18 | def value(x):
 19 |     return torch.max(x[..., 0:3], dim=-1, keepdim=True)[0].repeat(1, 1, 1, 3)
 20 | 
 21 | def chroma_loss(kd, color_ref, lambda_chroma):
 22 |     eps = 0.001
 23 |     ref_chroma = color_ref[..., 0:3] / torch.clip(value(color_ref), min=eps)
 24 |     opt_chroma = kd[..., 0:3] / torch.clip(value(kd), min=eps)
 25 |     return torch.mean(torch.abs((opt_chroma - ref_chroma) * color_ref[..., 3:])) * lambda_chroma
 26 | 
 27 | # Diffuse luma regularizer + specular 
 28 | def shading_loss(diffuse_light, specular_light, color_ref, lambda_diffuse, lambda_specular):
 29 |     diffuse_luma  = luma(diffuse_light)
 30 |     specular_luma = luma(specular_light)
 31 |     ref_luma      = value(color_ref)
 32 |     
 33 |     eps = 0.001
 34 |     img    = util.rgb_to_srgb(torch.log(torch.clamp((diffuse_luma + specular_luma) * color_ref[..., 3:], min=0, max=65535) + 1))
 35 |     target = util.rgb_to_srgb(torch.log(torch.clamp(ref_luma * color_ref[..., 3:], min=0, max=65535) + 1))
 36 |     # error  = torch.abs(img - target) * diffuse_luma / torch.clamp(diffuse_luma + specular_luma, min=eps) ### encourage specular component to take control
 37 |     error  = torch.abs(img - target) ### the original version in the paper
 38 |     loss   = torch.mean(error) * lambda_diffuse
 39 |     loss  += torch.mean(specular_luma) / torch.clamp(torch.mean(diffuse_luma), min=eps) * lambda_specular
 40 |     return loss
 41 | 
 42 | ######################################################################################
 43 | # Material smoothness loss
 44 | ######################################################################################
 45 | 
 46 | def material_smoothness_grad(kd_grad, ks_grad, nrm_grad, lambda_kd=0.25, lambda_ks=0.1, lambda_nrm=0.0):
 47 |     kd_luma_grad = (kd_grad[..., 0] + kd_grad[..., 1] + kd_grad[..., 2]) / 3
 48 |     loss  = torch.mean(kd_luma_grad * kd_grad[..., -1]) * lambda_kd
 49 |     loss += torch.mean(ks_grad[..., :-1] * ks_grad[..., -1:]) * lambda_ks
 50 |     loss += torch.mean(nrm_grad[..., :-1] * nrm_grad[..., -1:]) * lambda_nrm
 51 |     return loss
 52 | 
 53 | ######################################################################################
 54 | # Computes the image gradient, useful for kd/ks smoothness losses
 55 | ######################################################################################
 56 | def image_grad(buf, std=0.01):
 57 |     t, s = torch.meshgrid(torch.linspace(-1.0 + 1.0 / buf.shape[1], 1.0 - 1.0 / buf.shape[1], buf.shape[1], device="cuda"), 
 58 |                           torch.linspace(-1.0 + 1.0 / buf.shape[2], 1.0 - 1.0 / buf.shape[2], buf.shape[2], device="cuda"),
 59 |                           indexing='ij')
 60 |     tc   = torch.normal(mean=0, std=std, size=(buf.shape[0], buf.shape[1], buf.shape[2], 2), device="cuda") + torch.stack((s, t), dim=-1)[None, ...]
 61 |     tap  = dr.texture(buf, tc, filter_mode='linear', boundary_mode='clamp')
 62 |     return torch.abs(tap[..., :-1] - buf[..., :-1]) * tap[..., -1:] * buf[..., -1:]
 63 | 
 64 | ######################################################################################
 65 | # Computes the avergage edge length of a mesh. 
 66 | # Rough estimate of the tessellation of a mesh. Can be used e.g. to clamp gradients
 67 | ######################################################################################
 68 | def avg_edge_length(v_pos, t_pos_idx):
 69 |     e_pos_idx = mesh.compute_edges(t_pos_idx)
 70 |     edge_len  = util.length(v_pos[e_pos_idx[:, 0]] - v_pos[e_pos_idx[:, 1]])
 71 |     return torch.mean(edge_len)
 72 | 
 73 | ######################################################################################
 74 | # Laplacian regularization using umbrella operator (Fujiwara / Desbrun).
 75 | # https://mgarland.org/class/geom04/material/smoothing.pdf
 76 | ######################################################################################
 77 | def laplace_regularizer_const(v_pos, t_pos_idx):
 78 |     term = torch.zeros_like(v_pos)
 79 |     norm = torch.zeros_like(v_pos[..., 0:1])
 80 | 
 81 |     v0 = v_pos[t_pos_idx[:, 0], :]
 82 |     v1 = v_pos[t_pos_idx[:, 1], :]
 83 |     v2 = v_pos[t_pos_idx[:, 2], :]
 84 | 
 85 |     term.scatter_add_(0, t_pos_idx[:, 0:1].repeat(1,3), (v1 - v0) + (v2 - v0))
 86 |     term.scatter_add_(0, t_pos_idx[:, 1:2].repeat(1,3), (v0 - v1) + (v2 - v1))
 87 |     term.scatter_add_(0, t_pos_idx[:, 2:3].repeat(1,3), (v0 - v2) + (v1 - v2))
 88 | 
 89 |     two = torch.ones_like(v0) * 2.0
 90 |     norm.scatter_add_(0, t_pos_idx[:, 0:1], two)
 91 |     norm.scatter_add_(0, t_pos_idx[:, 1:2], two)
 92 |     norm.scatter_add_(0, t_pos_idx[:, 2:3], two)
 93 | 
 94 |     term = term / torch.clamp(norm, min=1.0)
 95 | 
 96 |     return torch.mean(term**2)
 97 | 
 98 | ######################################################################################
 99 | # Smooth vertex normals
100 | ######################################################################################
101 | def normal_consistency(v_pos, t_pos_idx):
102 |     # Compute face normals
103 |     v0 = v_pos[t_pos_idx[:, 0], :]
104 |     v1 = v_pos[t_pos_idx[:, 1], :]
105 |     v2 = v_pos[t_pos_idx[:, 2], :]
106 | 
107 |     face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0))
108 | 
109 |     tris_per_edge = mesh.compute_edge_to_face_mapping(t_pos_idx)
110 | 
111 |     # Fetch normals for both faces sharind an edge
112 |     n0 = face_normals[tris_per_edge[:, 0], :]
113 |     n1 = face_normals[tris_per_edge[:, 1], :]
114 | 
115 |     # Compute error metric based on normal difference
116 |     term = torch.clamp(util.dot(n0, n1), min=-1.0, max=1.0)
117 |     term = (1.0 - term) * 0.5
118 | 
119 |     return torch.mean(torch.abs(term))
120 | 


--------------------------------------------------------------------------------
/render/renderutils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | from .ops import xfm_points, xfm_vectors, image_loss, diffuse_cubemap, specular_cubemap, prepare_shading_normal, lambert, frostbite_diffuse, pbr_specular, pbr_bsdf, _fresnel_shlick, _ndf_ggx, _lambda_ggx, _masking_smith
11 | __all__ = ["xfm_vectors", "xfm_points", "image_loss", "diffuse_cubemap","specular_cubemap", "prepare_shading_normal", "lambert", "frostbite_diffuse", "pbr_specular", "pbr_bsdf", "_fresnel_shlick", "_ndf_ggx", "_lambda_ggx", "_masking_smith", ]
12 | 


--------------------------------------------------------------------------------
/render/renderutils/bsdf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
  2 | #
  3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  4 | # property and proprietary rights in and to this material, related
  5 | # documentation and any modifications thereto. Any use, reproduction, 
  6 | # disclosure or distribution of this material and related documentation 
  7 | # without an express license agreement from NVIDIA CORPORATION or 
  8 | # its affiliates is strictly prohibited.
  9 | 
 10 | import math
 11 | import torch
 12 | 
 13 | NORMAL_THRESHOLD = 0.1
 14 | 
 15 | ################################################################################
 16 | # Vector utility functions
 17 | ################################################################################
 18 | 
 19 | def _dot(x, y):
 20 |     return torch.sum(x*y, -1, keepdim=True)
 21 | 
 22 | def _reflect(x, n):
 23 |     return 2*_dot(x, n)*n - x
 24 | 
 25 | def _safe_normalize(x):
 26 |     return torch.nn.functional.normalize(x, dim = -1)
 27 | 
 28 | def _bend_normal(view_vec, smooth_nrm, geom_nrm, two_sided_shading):
 29 |     # Swap normal direction for backfacing surfaces
 30 |     if two_sided_shading:
 31 |         smooth_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, smooth_nrm, -smooth_nrm)
 32 |         geom_nrm   = torch.where(_dot(geom_nrm, view_vec) > 0, geom_nrm, -geom_nrm)
 33 | 
 34 |     t = torch.clamp(_dot(view_vec, smooth_nrm) / NORMAL_THRESHOLD, min=0, max=1)
 35 |     return torch.lerp(geom_nrm, smooth_nrm, t)
 36 | 
 37 | 
 38 | def _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl):
 39 |     smooth_bitang = _safe_normalize(torch.cross(smooth_tng, smooth_nrm))
 40 |     if opengl:
 41 |         shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] - smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
 42 |     else:
 43 |         shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] + smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
 44 |     return _safe_normalize(shading_nrm)
 45 | 
 46 | def bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl):
 47 |     smooth_nrm = _safe_normalize(smooth_nrm)
 48 |     smooth_tng = _safe_normalize(smooth_tng)
 49 |     view_vec   = _safe_normalize(view_pos - pos)
 50 |     shading_nrm = _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl)
 51 |     return _bend_normal(view_vec, shading_nrm, geom_nrm, two_sided_shading)
 52 | 
 53 | ################################################################################
 54 | # Simple lambertian diffuse BSDF
 55 | ################################################################################
 56 | 
 57 | def bsdf_lambert(nrm, wi):
 58 |     return torch.clamp(_dot(nrm, wi), min=0.0) / math.pi
 59 | 
 60 | ################################################################################
 61 | # Frostbite diffuse
 62 | ################################################################################
 63 | 
 64 | def bsdf_frostbite(nrm, wi, wo, linearRoughness):
 65 |     wiDotN = _dot(wi, nrm)
 66 |     woDotN = _dot(wo, nrm)
 67 | 
 68 |     h = _safe_normalize(wo + wi)
 69 |     wiDotH = _dot(wi, h)
 70 | 
 71 |     energyBias = 0.5 * linearRoughness
 72 |     energyFactor = 1.0 - (0.51 / 1.51) * linearRoughness
 73 |     f90 = energyBias + 2.0 * wiDotH * wiDotH * linearRoughness
 74 |     f0 = 1.0
 75 | 
 76 |     wiScatter = bsdf_fresnel_shlick(f0, f90, wiDotN)
 77 |     woScatter = bsdf_fresnel_shlick(f0, f90, woDotN)
 78 |     res = wiScatter * woScatter * energyFactor
 79 |     return torch.where((wiDotN > 0.0) & (woDotN > 0.0), res, torch.zeros_like(res))
 80 | 
 81 | ################################################################################
 82 | # Phong specular, loosely based on mitsuba implementation
 83 | ################################################################################
 84 | 
 85 | def bsdf_phong(nrm, wo, wi, N):
 86 |     dp_r = torch.clamp(_dot(_reflect(wo, nrm), wi), min=0.0, max=1.0)
 87 |     dp_l = torch.clamp(_dot(nrm, wi), min=0.0, max=1.0)
 88 |     return (dp_r ** N) * dp_l * (N + 2) / (2 * math.pi)
 89 | 
 90 | ################################################################################
 91 | # PBR's implementation of GGX specular
 92 | ################################################################################
 93 | 
 94 | specular_epsilon = 1e-4
 95 | 
 96 | def bsdf_fresnel_shlick(f0, f90, cosTheta):
 97 |     _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
 98 |     return f0 + (f90 - f0) * (1.0 - _cosTheta) ** 5.0
 99 | 
100 | def bsdf_ndf_ggx(alphaSqr, cosTheta):
101 |     _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
102 |     d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1
103 |     return alphaSqr / (d * d * math.pi)
104 | 
105 | def bsdf_lambda_ggx(alphaSqr, cosTheta):
106 |     _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
107 |     cosThetaSqr = _cosTheta * _cosTheta
108 |     tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr
109 |     res = 0.5 * (torch.sqrt(1 + alphaSqr * tanThetaSqr) - 1.0)
110 |     return res
111 | 
112 | def bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO):
113 |     lambdaI = bsdf_lambda_ggx(alphaSqr, cosThetaI)
114 |     lambdaO = bsdf_lambda_ggx(alphaSqr, cosThetaO)
115 |     return 1 / (1 + lambdaI + lambdaO)
116 | 
117 | def bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08):
118 |     _alpha = torch.clamp(alpha, min=min_roughness*min_roughness, max=1.0)
119 |     alphaSqr = _alpha * _alpha
120 | 
121 |     h = _safe_normalize(wo + wi)
122 |     woDotN = _dot(wo, nrm)
123 |     wiDotN = _dot(wi, nrm)
124 |     woDotH = _dot(wo, h)
125 |     nDotH  = _dot(nrm, h)
126 | 
127 |     D = bsdf_ndf_ggx(alphaSqr, nDotH)
128 |     G = bsdf_masking_smith_ggx_correlated(alphaSqr, woDotN, wiDotN)
129 |     F = bsdf_fresnel_shlick(col, 1, woDotH)
130 | 
131 |     w = F * D * G * 0.25 / torch.clamp(woDotN, min=specular_epsilon)
132 | 
133 |     frontfacing = (woDotN > specular_epsilon) & (wiDotN > specular_epsilon)
134 |     return torch.where(frontfacing, w, torch.zeros_like(w))
135 | 
136 | def bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF):
137 |     wo = _safe_normalize(view_pos - pos)
138 |     wi = _safe_normalize(light_pos - pos)
139 | 
140 |     spec_str  = arm[..., 0:1] # x component
141 |     roughness = arm[..., 1:2] # y component
142 |     metallic  = arm[..., 2:3] # z component
143 |     ks = (0.04 * (1.0 - metallic) + kd * metallic) * (1 - spec_str)
144 |     kd = kd * (1.0 - metallic)
145 | 
146 |     if BSDF == 0:
147 |         diffuse = kd * bsdf_lambert(nrm, wi)
148 |     else:
149 |         diffuse = kd * bsdf_frostbite(nrm, wi, wo, roughness)
150 |     specular = bsdf_pbr_specular(ks, nrm, wo, wi, roughness*roughness, min_roughness=min_roughness)
151 |     return diffuse + specular
152 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/bsdf.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include "common.h"
15 | 
16 | struct LambertKernelParams
17 | {
18 |     Tensor  nrm;
19 |     Tensor  wi;
20 |     Tensor  out;
21 |     dim3    gridSize;
22 | };
23 | 
24 | struct FrostbiteDiffuseKernelParams
25 | {
26 |     Tensor  nrm;
27 |     Tensor  wi;
28 |     Tensor  wo;
29 |     Tensor  linearRoughness;
30 |     Tensor  out;
31 |     dim3    gridSize;
32 | };
33 | 
34 | struct FresnelShlickKernelParams
35 | {
36 |     Tensor  f0;
37 |     Tensor  f90;
38 |     Tensor  cosTheta;
39 |     Tensor  out;
40 |     dim3    gridSize;
41 | };
42 | 
43 | struct NdfGGXParams
44 | {
45 |     Tensor  alphaSqr;
46 |     Tensor  cosTheta;
47 |     Tensor  out;
48 |     dim3    gridSize;
49 | };
50 | 
51 | struct MaskingSmithParams
52 | {
53 |     Tensor  alphaSqr;
54 |     Tensor  cosThetaI;
55 |     Tensor  cosThetaO;
56 |     Tensor  out;
57 |     dim3    gridSize;
58 | };
59 | 
60 | struct PbrSpecular
61 | {
62 |     Tensor  col;
63 |     Tensor  nrm;
64 |     Tensor  wo;
65 |     Tensor  wi;
66 |     Tensor  alpha;
67 |     Tensor  out;
68 |     dim3    gridSize;
69 |     float   min_roughness;
70 | };
71 | 
72 | struct PbrBSDF
73 | {
74 |     Tensor  kd;
75 |     Tensor  arm;
76 |     Tensor  pos;
77 |     Tensor  nrm;
78 |     Tensor  view_pos;
79 |     Tensor  light_pos;
80 |     Tensor  out;
81 |     dim3    gridSize;
82 |     float   min_roughness;
83 |     int     BSDF;
84 | };
85 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/common.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #include <cuda_runtime.h>
13 | #include <algorithm>
14 | 
15 | //------------------------------------------------------------------------
16 | // Block and grid size calculators for kernel launches.
17 | 
18 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims)
19 | {
20 |     int maxThreads = maxWidth * maxHeight;
21 |     if (maxThreads <= 1 || (dims.x * dims.y) <= 1)
22 |         return dim3(1, 1, 1); // Degenerate.
23 | 
24 |     // Start from max size.
25 |     int bw = maxWidth;
26 |     int bh = maxHeight;
27 | 
28 |     // Optimizations for weirdly sized buffers.
29 |     if (dims.x < bw)
30 |     {
31 |         // Decrease block width to smallest power of two that covers the buffer width.
32 |         while ((bw >> 1) >= dims.x)
33 |             bw >>= 1;
34 | 
35 |         // Maximize height.
36 |         bh = maxThreads / bw;
37 |         if (bh > dims.y)
38 |             bh = dims.y;
39 |     }
40 |     else if (dims.y < bh)
41 |     {
42 |         // Halve height and double width until fits completely inside buffer vertically.
43 |         while (bh > dims.y)
44 |         {
45 |             bh >>= 1;
46 |             if (bw < dims.x)
47 |                 bw <<= 1;
48 |         }
49 |     }
50 | 
51 |     // Done.
52 |     return dim3(bw, bh, 1);
53 | }
54 | 
55 | // returns the size of a block that can be reduced using horizontal SIMD operations (e.g. __shfl_xor_sync)
56 | dim3 getWarpSize(dim3 blockSize)
57 | {
58 |     return dim3(
59 |         std::min(blockSize.x, 32u), 
60 |         std::min(std::max(32u / blockSize.x, 1u), std::min(32u, blockSize.y)), 
61 |         std::min(std::max(32u / (blockSize.x * blockSize.y), 1u), std::min(32u, blockSize.z))
62 |     );
63 | }
64 | 
65 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims)
66 | {
67 |     dim3 gridSize;
68 |     gridSize.x = (dims.x  - 1) / blockSize.x + 1;
69 |     gridSize.y = (dims.y - 1) / blockSize.y + 1;
70 |     gridSize.z = (dims.z  - 1) / blockSize.z + 1;
71 |     return gridSize;
72 | }
73 | 
74 | //------------------------------------------------------------------------
75 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | #include <cuda.h>
14 | #include <stdint.h>
15 | 
16 | #include "vec3f.h"
17 | #include "vec4f.h"
18 | #include "tensor.h"
19 | 
20 | dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims);
21 | dim3 getLaunchGridSize(dim3 blockSize, dim3 dims);
22 | 
23 | #ifdef __CUDACC__
24 | 
25 | #ifdef _MSC_VER
26 | #define M_PI 3.14159265358979323846f
27 | #endif
28 | 
29 | __host__ __device__ static inline dim3 getWarpSize(dim3 blockSize)
30 | {
31 |     return dim3(
32 |         min(blockSize.x, 32u),
33 |         min(max(32u / blockSize.x, 1u), min(32u, blockSize.y)),
34 |         min(max(32u / (blockSize.x * blockSize.y), 1u), min(32u, blockSize.z))
35 |     );
36 | }
37 | 
38 | __device__ static inline float clamp(float val, float mn, float mx) { return min(max(val, mn), mx); }
39 | #else
40 | dim3 getWarpSize(dim3 blockSize);
41 | #endif


--------------------------------------------------------------------------------
/render/renderutils/c_src/cubemap.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include "common.h"
15 | 
16 | struct DiffuseCubemapKernelParams
17 | {
18 |     Tensor  cubemap;
19 |     Tensor  out;
20 |     dim3    gridSize;
21 | };
22 | 
23 | struct SpecularCubemapKernelParams
24 | {
25 |     Tensor  cubemap;
26 |     Tensor  bounds;
27 |     Tensor  out;
28 |     dim3    gridSize;
29 |     float   costheta_cutoff;
30 |     float   roughness;
31 | };
32 | 
33 | struct SpecularBoundsKernelParams
34 | {
35 |     float   costheta_cutoff;
36 |     Tensor  out;
37 |     dim3    gridSize;
38 | };
39 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/loss.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 |  *
  4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 |  * property and proprietary rights in and to this material, related 
  6 |  * documentation and any modifications thereto. Any use, reproduction, 
  7 |  * disclosure or distribution of this material and related documentation
  8 |  * without an express license agreement from NVIDIA CORPORATION or 
  9 |  * its affiliates is strictly prohibited.
 10 |  */
 11 | 
 12 | #include <cuda.h>
 13 | 
 14 | #include "common.h"
 15 | #include "loss.h"
 16 | 
 17 | //------------------------------------------------------------------------
 18 | // Utils
 19 | 
 20 | __device__ inline float bwdAbs(float x) { return x == 0.0f ? 0.0f : x < 0.0f ? -1.0f : 1.0f; }
 21 | 
 22 | __device__ float warpSum(float val) {
 23 |     for (int i = 1; i < 32; i *= 2)
 24 |         val += __shfl_xor_sync(0xFFFFFFFF, val, i);
 25 |     return val;
 26 | }
 27 | 
 28 | //------------------------------------------------------------------------
 29 | // Tonemapping
 30 | 
 31 | __device__ inline float fwdSRGB(float x)
 32 | {
 33 |     return x > 0.0031308f ? powf(max(x, 0.0031308f), 1.0f / 2.4f) * 1.055f - 0.055f : 12.92f * max(x, 0.0f);
 34 | }
 35 | 
 36 | __device__ inline void bwdSRGB(float x, float &d_x, float d_out)
 37 | {
 38 |     if (x > 0.0031308f)
 39 |         d_x += d_out * 0.439583f / powf(x, 0.583333f);
 40 |     else if (x > 0.0f)
 41 |         d_x += d_out * 12.92f;
 42 | }
 43 | 
 44 | __device__ inline vec3f fwdTonemapLogSRGB(vec3f x)
 45 | {
 46 |     return vec3f(fwdSRGB(logf(x.x + 1.0f)), fwdSRGB(logf(x.y + 1.0f)), fwdSRGB(logf(x.z + 1.0f)));
 47 | }
 48 | 
 49 | __device__ inline void bwdTonemapLogSRGB(vec3f x, vec3f& d_x, vec3f d_out)
 50 | {
 51 |     if (x.x > 0.0f && x.x < 65535.0f)
 52 |     {
 53 |         bwdSRGB(logf(x.x + 1.0f), d_x.x, d_out.x);
 54 |         d_x.x *= 1 / (x.x + 1.0f);
 55 |     }
 56 |     if (x.y > 0.0f && x.y < 65535.0f)
 57 |     {
 58 |         bwdSRGB(logf(x.y + 1.0f), d_x.y, d_out.y);
 59 |         d_x.y *= 1 / (x.y + 1.0f);
 60 |     }
 61 |     if (x.z > 0.0f && x.z < 65535.0f)
 62 |     {
 63 |         bwdSRGB(logf(x.z + 1.0f), d_x.z, d_out.z);
 64 |         d_x.z *= 1 / (x.z + 1.0f);
 65 |     }
 66 | }
 67 | 
 68 | __device__ inline float fwdRELMSE(float img, float target, float eps = 0.1f)
 69 | {
 70 |     return (img - target) * (img - target) / (img * img + target * target + eps);
 71 | }
 72 | 
 73 | __device__ inline void bwdRELMSE(float img, float target, float &d_img, float &d_target, float d_out, float eps = 0.1f)
 74 | {
 75 |     float denom  = (target * target + img * img + eps);
 76 |     d_img    += d_out * 2 * (img - target) * (target * (target + img) + eps) / (denom * denom);
 77 |     d_target -= d_out * 2 * (img - target) * (img * (target + img) + eps) / (denom * denom);
 78 | }
 79 | 
 80 | __device__ inline float fwdSMAPE(float img, float target, float eps=0.01f)
 81 | {
 82 |     return abs(img - target) / (img + target + eps);
 83 | }
 84 | 
 85 | __device__ inline void bwdSMAPE(float img, float target, float& d_img, float& d_target, float d_out, float eps = 0.01f)
 86 | {
 87 |     float denom = (target + img + eps);
 88 |     d_img    += d_out * bwdAbs(img - target) * (2 * target + eps) / (denom * denom);
 89 |     d_target -= d_out * bwdAbs(img - target) * (2 * img + eps) / (denom * denom);
 90 | }
 91 | 
 92 | //------------------------------------------------------------------------
 93 | // Kernels
 94 | 
 95 | __global__ void imgLossFwdKernel(LossKernelParams p)
 96 | {
 97 |     // Calculate pixel position.
 98 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
 99 |     unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
100 |     unsigned int pz = blockIdx.z;
101 | 
102 |     float floss = 0.0f;
103 |     if (px < p.gridSize.x && py < p.gridSize.y && pz < p.gridSize.z)
104 |     {
105 |         vec3f img = p.img.fetch3(px, py, pz);
106 |         vec3f target = p.target.fetch3(px, py, pz);
107 | 
108 |         img = vec3f(clamp(img.x, 0.0f, 65535.0f), clamp(img.y, 0.0f, 65535.0f), clamp(img.z, 0.0f, 65535.0f));
109 |         target = vec3f(clamp(target.x, 0.0f, 65535.0f), clamp(target.y, 0.0f, 65535.0f), clamp(target.z, 0.0f, 65535.0f));
110 | 
111 |         if (p.tonemapper == TONEMAPPER_LOG_SRGB)
112 |         {
113 |             img = fwdTonemapLogSRGB(img);
114 |             target = fwdTonemapLogSRGB(target);
115 |         }
116 | 
117 |         vec3f vloss(0);
118 |         if (p.loss == LOSS_MSE)
119 |             vloss = (img - target) * (img - target);
120 |         else if (p.loss == LOSS_RELMSE)
121 |             vloss = vec3f(fwdRELMSE(img.x, target.x), fwdRELMSE(img.y, target.y), fwdRELMSE(img.z, target.z));
122 |         else if (p.loss == LOSS_SMAPE)
123 |             vloss = vec3f(fwdSMAPE(img.x, target.x), fwdSMAPE(img.y, target.y), fwdSMAPE(img.z, target.z));
124 |         else
125 |             vloss = vec3f(abs(img.x - target.x), abs(img.y - target.y), abs(img.z - target.z));
126 |         
127 |         floss = sum(vloss) / 3.0f;
128 |     }
129 | 
130 |     floss = warpSum(floss);
131 | 
132 |     dim3 warpSize = getWarpSize(blockDim);
133 |     if (px < p.gridSize.x && py < p.gridSize.y && pz < p.gridSize.z && threadIdx.x % warpSize.x == 0 && threadIdx.y % warpSize.y == 0 && threadIdx.z % warpSize.z == 0)
134 |         p.out.store(px / warpSize.x, py / warpSize.y, pz / warpSize.z, floss);
135 | }
136 | 
137 | __global__ void imgLossBwdKernel(LossKernelParams p)
138 | { 
139 |     // Calculate pixel position.
140 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
141 |     unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
142 |     unsigned int pz = blockIdx.z;
143 | 
144 |     if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
145 |         return;
146 | 
147 |     dim3 warpSize = getWarpSize(blockDim);
148 | 
149 |     vec3f _img = p.img.fetch3(px, py, pz);
150 |     vec3f _target = p.target.fetch3(px, py, pz);
151 |     float d_out = p.out.fetch1(px / warpSize.x, py / warpSize.y, pz / warpSize.z);
152 | 
153 |     /////////////////////////////////////////////////////////////////////
154 |     // FWD
155 | 
156 |     vec3f img = _img, target = _target;
157 |     if (p.tonemapper == TONEMAPPER_LOG_SRGB)
158 |     {
159 |         img = fwdTonemapLogSRGB(img);
160 |         target = fwdTonemapLogSRGB(target);
161 |     }
162 | 
163 |     /////////////////////////////////////////////////////////////////////
164 |     // BWD
165 | 
166 |     vec3f d_vloss = vec3f(d_out, d_out, d_out) / 3.0f;
167 | 
168 |     vec3f d_img(0), d_target(0);
169 |     if (p.loss == LOSS_MSE)
170 |     {
171 |         d_img = vec3f(d_vloss.x * 2 * (img.x - target.x), d_vloss.y * 2 * (img.y - target.y), d_vloss.x * 2 * (img.z - target.z));
172 |         d_target = -d_img;
173 |     }
174 |     else if (p.loss == LOSS_RELMSE)
175 |     {
176 |         bwdRELMSE(img.x, target.x, d_img.x, d_target.x, d_vloss.x);
177 |         bwdRELMSE(img.y, target.y, d_img.y, d_target.y, d_vloss.y);
178 |         bwdRELMSE(img.z, target.z, d_img.z, d_target.z, d_vloss.z);
179 |     }
180 |     else if (p.loss == LOSS_SMAPE)
181 |     {
182 |         bwdSMAPE(img.x, target.x, d_img.x, d_target.x, d_vloss.x);
183 |         bwdSMAPE(img.y, target.y, d_img.y, d_target.y, d_vloss.y);
184 |         bwdSMAPE(img.z, target.z, d_img.z, d_target.z, d_vloss.z);
185 |     }
186 |     else
187 |     {
188 |         d_img = d_vloss * vec3f(bwdAbs(img.x - target.x), bwdAbs(img.y - target.y), bwdAbs(img.z - target.z));
189 |         d_target = -d_img;
190 |     }
191 | 
192 | 
193 |     if (p.tonemapper == TONEMAPPER_LOG_SRGB)
194 |     {
195 |         vec3f d__img(0), d__target(0);
196 |         bwdTonemapLogSRGB(_img, d__img, d_img);
197 |         bwdTonemapLogSRGB(_target, d__target, d_target);
198 |         d_img = d__img; d_target = d__target;
199 |     }
200 | 
201 |     if (_img.x <= 0.0f || _img.x >= 65535.0f) d_img.x = 0;
202 |     if (_img.y <= 0.0f || _img.y >= 65535.0f) d_img.y = 0;
203 |     if (_img.z <= 0.0f || _img.z >= 65535.0f) d_img.z = 0;
204 |     if (_target.x <= 0.0f || _target.x >= 65535.0f) d_target.x = 0;
205 |     if (_target.y <= 0.0f || _target.y >= 65535.0f) d_target.y = 0;
206 |     if (_target.z <= 0.0f || _target.z >= 65535.0f) d_target.z = 0;
207 | 
208 |     p.img.store_grad(px, py, pz, d_img);
209 |     p.target.store_grad(px, py, pz, d_target);
210 | }


--------------------------------------------------------------------------------
/render/renderutils/c_src/loss.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include "common.h"
15 | 
16 | enum TonemapperType
17 | {
18 |     TONEMAPPER_NONE = 0,
19 |     TONEMAPPER_LOG_SRGB = 1
20 | };
21 | 
22 | enum LossType
23 | {
24 |     LOSS_L1 = 0,
25 |     LOSS_MSE = 1,
26 |     LOSS_RELMSE = 2,
27 |     LOSS_SMAPE = 3
28 | };
29 | 
30 | struct LossKernelParams
31 | {
32 |     Tensor          img;
33 |     Tensor          target;
34 |     Tensor          out;
35 |     dim3            gridSize;
36 |     TonemapperType  tonemapper;
37 |     LossType        loss;
38 | };
39 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/mesh.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #include <cuda.h>
13 | #include <stdio.h>
14 | 
15 | #include "common.h"
16 | #include "mesh.h"
17 | 
18 | 
19 | //------------------------------------------------------------------------
20 | // Kernels
21 | 
22 | __global__ void xfmPointsFwdKernel(XfmKernelParams p)
23 | {
24 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
25 |     unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z;
26 | 
27 |     __shared__ float mtx[4][4];
28 |     if (threadIdx.x < 16)
29 |         mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0));
30 |     __syncthreads();
31 |     
32 |     if (px >= p.gridSize.x)
33 |         return;
34 | 
35 |     vec3f pos(
36 |         p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)),
37 |         p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)),
38 |         p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0))
39 |     );
40 | 
41 |     if (p.isPoints)
42 |     {
43 |         p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0] + mtx[3][0]);
44 |         p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1] + mtx[3][1]);
45 |         p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2] + mtx[3][2]);
46 |         p.out.store(p.out.nhwcIndex(pz, px, 3, 0), pos.x * mtx[0][3] + pos.y * mtx[1][3] + pos.z * mtx[2][3] + mtx[3][3]);
47 |     }
48 |     else
49 |     {
50 |         p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0]);
51 |         p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1]);
52 |         p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2]);
53 |     }
54 | }
55 | 
56 | __global__ void xfmPointsBwdKernel(XfmKernelParams p)
57 | { 
58 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
59 |     unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z;
60 | 
61 |     __shared__ float mtx[4][4];
62 |     if (threadIdx.x < 16)
63 |         mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0));
64 |     __syncthreads();
65 | 
66 |     if (px >= p.gridSize.x)
67 |         return;
68 | 
69 |     vec3f pos(
70 |         p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)),
71 |         p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)),
72 |         p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0))
73 |     );
74 | 
75 |     vec4f d_out(
76 |         p.out.fetch(p.out.nhwcIndex(pz, px, 0, 0)),
77 |         p.out.fetch(p.out.nhwcIndex(pz, px, 1, 0)),
78 |         p.out.fetch(p.out.nhwcIndex(pz, px, 2, 0)),
79 |         p.out.fetch(p.out.nhwcIndex(pz, px, 3, 0))
80 |     );
81 | 
82 |     if (p.isPoints)
83 |     {
84 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2] + d_out.w * mtx[0][3]);
85 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2] + d_out.w * mtx[1][3]);
86 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2] + d_out.w * mtx[2][3]);
87 |     }
88 |     else
89 |     {
90 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2]);
91 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2]);
92 |         p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2]);
93 |     }
94 | }


--------------------------------------------------------------------------------
/render/renderutils/c_src/mesh.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include "common.h"
15 | 
16 | struct XfmKernelParams
17 | {
18 |     bool            isPoints;
19 |     Tensor          points;
20 |     Tensor          matrix;
21 |     Tensor          out;
22 |     dim3            gridSize;
23 | };
24 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/normal.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 |  *
  4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 |  * property and proprietary rights in and to this material, related 
  6 |  * documentation and any modifications thereto. Any use, reproduction, 
  7 |  * disclosure or distribution of this material and related documentation
  8 |  * without an express license agreement from NVIDIA CORPORATION or 
  9 |  * its affiliates is strictly prohibited.
 10 |  */
 11 | 
 12 | #include "common.h"
 13 | #include "normal.h"
 14 | 
 15 | #define NORMAL_THRESHOLD 0.1f
 16 | 
 17 | //------------------------------------------------------------------------
 18 | // Perturb shading normal by tangent frame
 19 | 
 20 | __device__ vec3f fwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, bool opengl)
 21 | {
 22 |     vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm);
 23 |     vec3f smooth_bitng = safeNormalize(_smooth_bitng);
 24 |     vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f);
 25 |     return safeNormalize(_shading_nrm);
 26 | }
 27 | 
 28 | __device__ void bwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, vec3f &d_perturbed_nrm, vec3f &d_smooth_nrm, vec3f &d_smooth_tng, const vec3f d_out, bool opengl)
 29 | {
 30 |     ////////////////////////////////////////////////////////////////////////
 31 |     // FWD
 32 |     vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm);
 33 |     vec3f smooth_bitng = safeNormalize(_smooth_bitng);
 34 |     vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f);
 35 |         
 36 |     ////////////////////////////////////////////////////////////////////////
 37 |     // BWD
 38 |     vec3f d_shading_nrm(0);
 39 |     bwdSafeNormalize(_shading_nrm, d_shading_nrm, d_out);
 40 | 
 41 |     vec3f d_smooth_bitng(0);
 42 |     
 43 |     if (perturbed_nrm.z > 0.0f)
 44 |     {
 45 |         d_smooth_nrm += d_shading_nrm * perturbed_nrm.z;
 46 |         d_perturbed_nrm.z += sum(d_shading_nrm * smooth_nrm);
 47 |     }
 48 | 
 49 |     d_smooth_bitng += (opengl ? -1 : 1) * d_shading_nrm * perturbed_nrm.y;
 50 |     d_perturbed_nrm.y += (opengl ? -1 : 1) * sum(d_shading_nrm * smooth_bitng);
 51 | 
 52 |     d_smooth_tng += d_shading_nrm * perturbed_nrm.x;
 53 |     d_perturbed_nrm.x += sum(d_shading_nrm * smooth_tng);
 54 | 
 55 |     vec3f d__smooth_bitng(0);
 56 |     bwdSafeNormalize(_smooth_bitng, d__smooth_bitng, d_smooth_bitng);
 57 | 
 58 |     bwdCross(smooth_tng, smooth_nrm, d_smooth_tng, d_smooth_nrm, d__smooth_bitng);
 59 | }
 60 | 
 61 | //------------------------------------------------------------------------
 62 | #define bent_nrm_eps 0.001f
 63 | 
 64 | __device__ vec3f fwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm)
 65 | {
 66 |     float dp = dot(view_vec, smooth_nrm);
 67 |     float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f);
 68 |     return geom_nrm * (1.0f - t) + smooth_nrm * t;
 69 | }
 70 | 
 71 | __device__ void bwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm, vec3f& d_view_vec, vec3f& d_smooth_nrm, vec3f& d_geom_nrm, const vec3f d_out)
 72 | {
 73 |     ////////////////////////////////////////////////////////////////////////
 74 |     // FWD
 75 |     float dp = dot(view_vec, smooth_nrm);
 76 |     float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f);
 77 | 
 78 |     ////////////////////////////////////////////////////////////////////////
 79 |     // BWD
 80 |     if (dp > NORMAL_THRESHOLD)
 81 |         d_smooth_nrm += d_out;
 82 |     else
 83 |     {
 84 |         // geom_nrm * (1.0f - t) + smooth_nrm * t;
 85 |         d_geom_nrm   += d_out * (1.0f - t);
 86 |         d_smooth_nrm += d_out * t;
 87 |         float d_t = sum(d_out * (smooth_nrm - geom_nrm));
 88 | 
 89 |         float d_dp = dp < 0.0f || dp > NORMAL_THRESHOLD ? 0.0f : d_t / NORMAL_THRESHOLD;
 90 | 
 91 |         bwdDot(view_vec, smooth_nrm, d_view_vec, d_smooth_nrm, d_dp);
 92 |     }
 93 | }
 94 | 
 95 | //------------------------------------------------------------------------
 96 | // Kernels
 97 | 
 98 | __global__ void PrepareShadingNormalFwdKernel(PrepareShadingNormalKernelParams p) 
 99 | {
100 |     // Calculate pixel position.
101 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
102 |     unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
103 |     unsigned int pz = blockIdx.z;
104 |     if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
105 |         return;
106 | 
107 |     vec3f pos = p.pos.fetch3(px, py, pz);
108 |     vec3f view_pos = p.view_pos.fetch3(px, py, pz);
109 |     vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz);
110 |     vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz);
111 |     vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz);
112 |     vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz);
113 | 
114 |     vec3f smooth_nrm = safeNormalize(_smooth_nrm);
115 |     vec3f smooth_tng = safeNormalize(_smooth_tng);
116 |     vec3f view_vec = safeNormalize(view_pos - pos);
117 |     vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl);
118 | 
119 |     vec3f res;
120 |     if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f)
121 |         res = fwdBendNormal(view_vec, -shading_nrm, -geom_nrm);
122 |     else
123 |         res = fwdBendNormal(view_vec, shading_nrm, geom_nrm);
124 | 
125 |     p.out.store(px, py, pz, res);
126 | }
127 | 
128 | __global__ void PrepareShadingNormalBwdKernel(PrepareShadingNormalKernelParams p) 
129 | { 
130 |     // Calculate pixel position.
131 |     unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
132 |     unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
133 |     unsigned int pz = blockIdx.z;
134 |     if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
135 |         return;
136 | 
137 |     vec3f pos = p.pos.fetch3(px, py, pz);
138 |     vec3f view_pos = p.view_pos.fetch3(px, py, pz);
139 |     vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz);
140 |     vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz);
141 |     vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz);
142 |     vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz);
143 |     vec3f d_out = p.out.fetch3(px, py, pz);
144 | 
145 |     ///////////////////////////////////////////////////////////////////////////////////////////////////
146 |     // FWD
147 | 
148 |     vec3f smooth_nrm = safeNormalize(_smooth_nrm);
149 |     vec3f smooth_tng = safeNormalize(_smooth_tng);
150 |     vec3f _view_vec = view_pos - pos;
151 |     vec3f view_vec = safeNormalize(view_pos - pos);
152 | 
153 |     vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl);
154 | 
155 |     ///////////////////////////////////////////////////////////////////////////////////////////////////
156 |     // BWD
157 | 
158 |     vec3f d_view_vec(0), d_shading_nrm(0), d_geom_nrm(0);
159 |     if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f)
160 |     {
161 |         bwdBendNormal(view_vec, -shading_nrm, -geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out);
162 |         d_shading_nrm = -d_shading_nrm;
163 |         d_geom_nrm = -d_geom_nrm;
164 |     }
165 |     else
166 |         bwdBendNormal(view_vec, shading_nrm, geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out);
167 | 
168 |     vec3f d_perturbed_nrm(0), d_smooth_nrm(0), d_smooth_tng(0);
169 |     bwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, d_perturbed_nrm, d_smooth_nrm, d_smooth_tng, d_shading_nrm, p.opengl);
170 | 
171 |     vec3f d__view_vec(0), d__smooth_nrm(0), d__smooth_tng(0);
172 |     bwdSafeNormalize(_view_vec, d__view_vec, d_view_vec);
173 |     bwdSafeNormalize(_smooth_nrm, d__smooth_nrm, d_smooth_nrm);
174 |     bwdSafeNormalize(_smooth_tng, d__smooth_tng, d_smooth_tng);
175 | 
176 |     p.pos.store_grad(px, py, pz, -d__view_vec);
177 |     p.view_pos.store_grad(px, py, pz, d__view_vec);
178 |     p.perturbed_nrm.store_grad(px, py, pz, d_perturbed_nrm);
179 |     p.smooth_nrm.store_grad(px, py, pz, d__smooth_nrm);
180 |     p.smooth_tng.store_grad(px, py, pz, d__smooth_tng);
181 |     p.geom_nrm.store_grad(px, py, pz, d_geom_nrm);
182 | }


--------------------------------------------------------------------------------
/render/renderutils/c_src/normal.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include "common.h"
15 | 
16 | struct PrepareShadingNormalKernelParams
17 | {
18 |     Tensor  pos;
19 |     Tensor  view_pos;
20 |     Tensor  perturbed_nrm;
21 |     Tensor  smooth_nrm;
22 |     Tensor  smooth_tng;
23 |     Tensor  geom_nrm;
24 |     Tensor  out;
25 |     dim3    gridSize;
26 |     bool    two_sided_shading, opengl;
27 | };
28 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/tensor.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once
13 | #if defined(__CUDACC__) && defined(BFLOAT16)
14 | #include <cuda_bf16.h> // bfloat16 is float32 compatible with less mantissa bits
15 | #endif
16 | 
17 | //---------------------------------------------------------------------------------
18 | // CUDA-side Tensor class for in/out parameter parsing. Can be float32 or bfloat16
19 | 
20 | struct Tensor
21 | {
22 |     void*   val;
23 |     void*   d_val;
24 |     int     dims[4], _dims[4];
25 |     int     strides[4];
26 |     bool    fp16;
27 | 
28 | #if defined(__CUDA__) && !defined(__CUDA_ARCH__)
29 |     Tensor() : val(nullptr), d_val(nullptr), fp16(true), dims{ 0, 0, 0, 0 }, _dims{ 0, 0, 0, 0 }, strides{ 0, 0, 0, 0 } {}
30 | #endif
31 | 
32 | #ifdef __CUDACC__
33 |     // Helpers to index and read/write a single element
34 |     __device__ inline int   _nhwcIndex(int n, int h, int w, int c) const { return n * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]; }
35 |     __device__ inline int   nhwcIndex(int n, int h, int w, int c) const { return (dims[0] == 1 ? 0 : n * strides[0]) + (dims[1] == 1 ? 0 : h * strides[1]) + (dims[2] == 1 ? 0 : w * strides[2]) + (dims[3] == 1 ? 0 : c * strides[3]); }
36 |     __device__ inline int   nhwcIndexContinuous(int n, int h, int w, int c) const { return ((n * _dims[1] + h) * _dims[2] + w) * _dims[3] + c; }
37 | #ifdef BFLOAT16
38 |     __device__ inline float fetch(unsigned int idx) const { return fp16 ? __bfloat162float(((__nv_bfloat16*)val)[idx]) : ((float*)val)[idx]; }
39 |     __device__ inline void  store(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)val)[idx] = __float2bfloat16(_val); else ((float*)val)[idx] = _val; }
40 |     __device__ inline void  store_grad(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)d_val)[idx] = __float2bfloat16(_val); else ((float*)d_val)[idx] = _val; }
41 | #else
42 |     __device__ inline float fetch(unsigned int idx) const { return ((float*)val)[idx]; }
43 |     __device__ inline void  store(unsigned int idx, float _val) { ((float*)val)[idx] = _val; }
44 |     __device__ inline void  store_grad(unsigned int idx, float _val) { ((float*)d_val)[idx] = _val; }
45 | #endif
46 | 
47 |     //////////////////////////////////////////////////////////////////////////////////////////
48 |     // Fetch, use broadcasting for tensor dimensions of size 1
49 |     __device__ inline float fetch1(unsigned int x, unsigned int y, unsigned int z) const
50 |     {
51 |         return fetch(nhwcIndex(z, y, x, 0));
52 |     }
53 | 
54 |     __device__ inline vec3f fetch3(unsigned int x, unsigned int y, unsigned int z) const
55 |     {
56 |         return vec3f(
57 |             fetch(nhwcIndex(z, y, x, 0)),
58 |             fetch(nhwcIndex(z, y, x, 1)),
59 |             fetch(nhwcIndex(z, y, x, 2))
60 |         );
61 |     }
62 | 
63 |     /////////////////////////////////////////////////////////////////////////////////////////////////////////////
64 |     // Store, no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside
65 |     __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, float _val)
66 |     {
67 |         store(_nhwcIndex(z, y, x, 0), _val);
68 |     }
69 | 
70 |     __device__ inline void store(unsigned int x, unsigned int y, unsigned int z, vec3f _val)
71 |     {
72 |         store(_nhwcIndex(z, y, x, 0), _val.x);
73 |         store(_nhwcIndex(z, y, x, 1), _val.y);
74 |         store(_nhwcIndex(z, y, x, 2), _val.z);
75 |     }
76 | 
77 |     /////////////////////////////////////////////////////////////////////////////////////////////////////////////
78 |     // Store gradient , no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside
79 |     __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, float _val)
80 |     {
81 |         store_grad(nhwcIndexContinuous(z, y, x, 0), _val);
82 |     }
83 | 
84 |     __device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, vec3f _val)
85 |     {
86 |         store_grad(nhwcIndexContinuous(z, y, x, 0), _val.x);
87 |         store_grad(nhwcIndexContinuous(z, y, x, 1), _val.y);
88 |         store_grad(nhwcIndexContinuous(z, y, x, 2), _val.z);
89 |     }
90 | #endif
91 | 
92 | };
93 | 


--------------------------------------------------------------------------------
/render/renderutils/c_src/vec3f.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  3 |  *
  4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
  5 |  * property and proprietary rights in and to this material, related 
  6 |  * documentation and any modifications thereto. Any use, reproduction, 
  7 |  * disclosure or distribution of this material and related documentation
  8 |  * without an express license agreement from NVIDIA CORPORATION or 
  9 |  * its affiliates is strictly prohibited.
 10 |  */
 11 | 
 12 | #pragma once 
 13 | 
 14 | struct vec3f
 15 | {
 16 |     float x, y, z;
 17 | 
 18 | #ifdef __CUDACC__
 19 |     __device__ vec3f() { }
 20 |     __device__ vec3f(float v) { x = v; y = v; z = v; }
 21 |     __device__ vec3f(float _x, float _y, float _z) { x = _x; y = _y; z = _z; }
 22 |     __device__ vec3f(float3 v) { x = v.x; y = v.y; z = v.z; }
 23 | 
 24 |     __device__ inline vec3f& operator+=(const vec3f& b) { x += b.x; y += b.y; z += b.z; return *this; }
 25 |     __device__ inline vec3f& operator-=(const vec3f& b) { x -= b.x; y -= b.y; z -= b.z; return *this; }
 26 |     __device__ inline vec3f& operator*=(const vec3f& b) { x *= b.x; y *= b.y; z *= b.z; return *this; }
 27 |     __device__ inline vec3f& operator/=(const vec3f& b) { x /= b.x; y /= b.y; z /= b.z; return *this; }
 28 | #endif
 29 | };
 30 | 
 31 | #ifdef __CUDACC__
 32 | __device__ static inline vec3f operator+(const vec3f& a, const vec3f& b) { return vec3f(a.x + b.x, a.y + b.y, a.z + b.z); }
 33 | __device__ static inline vec3f operator-(const vec3f& a, const vec3f& b) { return vec3f(a.x - b.x, a.y - b.y, a.z - b.z); }
 34 | __device__ static inline vec3f operator*(const vec3f& a, const vec3f& b) { return vec3f(a.x * b.x, a.y * b.y, a.z * b.z); }
 35 | __device__ static inline vec3f operator/(const vec3f& a, const vec3f& b) { return vec3f(a.x / b.x, a.y / b.y, a.z / b.z); }
 36 | __device__ static inline vec3f operator-(const vec3f& a) { return vec3f(-a.x, -a.y, -a.z); }
 37 | 
 38 | __device__ static inline float sum(vec3f a)
 39 | {
 40 |     return a.x + a.y + a.z;
 41 | }
 42 | 
 43 | __device__ static inline vec3f cross(vec3f a, vec3f b)
 44 | {
 45 |     vec3f out;
 46 |     out.x = a.y * b.z - a.z * b.y;
 47 |     out.y = a.z * b.x - a.x * b.z;
 48 |     out.z = a.x * b.y - a.y * b.x;
 49 |     return out;
 50 | }
 51 | 
 52 | __device__ static inline void bwdCross(vec3f a, vec3f b, vec3f &d_a, vec3f &d_b, vec3f d_out)
 53 | {
 54 |     d_a.x += d_out.z * b.y - d_out.y * b.z;
 55 |     d_a.y += d_out.x * b.z - d_out.z * b.x;
 56 |     d_a.z += d_out.y * b.x - d_out.x * b.y;
 57 | 
 58 |     d_b.x += d_out.y * a.z - d_out.z * a.y;
 59 |     d_b.y += d_out.z * a.x - d_out.x * a.z;
 60 |     d_b.z += d_out.x * a.y - d_out.y * a.x;
 61 | }
 62 | 
 63 | __device__ static inline float dot(vec3f a, vec3f b)
 64 | {
 65 |     return a.x * b.x + a.y * b.y + a.z * b.z;
 66 | }
 67 | 
 68 | __device__ static inline void bwdDot(vec3f a, vec3f b, vec3f& d_a, vec3f& d_b, float d_out)
 69 | {
 70 |     d_a.x += d_out * b.x; d_a.y += d_out * b.y; d_a.z += d_out * b.z;
 71 |     d_b.x += d_out * a.x; d_b.y += d_out * a.y; d_b.z += d_out * a.z;
 72 | }
 73 | 
 74 | __device__ static inline vec3f reflect(vec3f x, vec3f n)
 75 | {
 76 |     return n * 2.0f * dot(n, x) - x;
 77 | }
 78 | 
 79 | __device__ static inline void bwdReflect(vec3f x, vec3f n, vec3f& d_x, vec3f& d_n, const vec3f d_out)
 80 | {
 81 |     d_x.x += d_out.x * (2 * n.x * n.x - 1) + d_out.y * (2 * n.x * n.y) + d_out.z * (2 * n.x * n.z);
 82 |     d_x.y += d_out.x * (2 * n.x * n.y) + d_out.y * (2 * n.y * n.y - 1) + d_out.z * (2 * n.y * n.z);
 83 |     d_x.z += d_out.x * (2 * n.x * n.z) + d_out.y * (2 * n.y * n.z) + d_out.z * (2 * n.z * n.z - 1);
 84 | 
 85 |     d_n.x += d_out.x * (2 * (2 * n.x * x.x + n.y * x.y + n.z * x.z)) + d_out.y * (2 * n.y * x.x) + d_out.z * (2 * n.z * x.x);
 86 |     d_n.y += d_out.x * (2 * n.x * x.y) + d_out.y * (2 * (n.x * x.x + 2 * n.y * x.y + n.z * x.z)) + d_out.z * (2 * n.z * x.y);
 87 |     d_n.z += d_out.x * (2 * n.x * x.z) + d_out.y * (2 * n.y * x.z) + d_out.z * (2 * (n.x * x.x + n.y * x.y + 2 * n.z * x.z));
 88 | }
 89 | 
 90 | __device__ static inline vec3f safeNormalize(vec3f v)
 91 | {
 92 |     float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
 93 |     return l > 0.0f ? (v / l) : vec3f(0.0f);
 94 | }
 95 | 
 96 | __device__ static inline void bwdSafeNormalize(const vec3f v, vec3f& d_v, const vec3f d_out)
 97 | {
 98 | 
 99 |     float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
100 |     if (l > 0.0f)
101 |     {
102 |         float fac = 1.0 / powf(v.x * v.x + v.y * v.y + v.z * v.z, 1.5f);
103 |         d_v.x += (d_out.x * (v.y * v.y + v.z * v.z) - d_out.y * (v.x * v.y) - d_out.z * (v.x * v.z)) * fac;
104 |         d_v.y += (d_out.y * (v.x * v.x + v.z * v.z) - d_out.x * (v.y * v.x) - d_out.z * (v.y * v.z)) * fac;
105 |         d_v.z += (d_out.z * (v.x * v.x + v.y * v.y) - d_out.x * (v.z * v.x) - d_out.y * (v.z * v.y)) * fac;
106 |     }
107 | }
108 | 
109 | #endif


--------------------------------------------------------------------------------
/render/renderutils/c_src/vec4f.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 3 |  *
 4 |  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 5 |  * property and proprietary rights in and to this material, related 
 6 |  * documentation and any modifications thereto. Any use, reproduction, 
 7 |  * disclosure or distribution of this material and related documentation
 8 |  * without an express license agreement from NVIDIA CORPORATION or 
 9 |  * its affiliates is strictly prohibited.
10 |  */
11 | 
12 | #pragma once 
13 | 
14 | struct vec4f
15 | {
16 |     float x, y, z, w;
17 | 
18 | #ifdef __CUDACC__
19 |     __device__ vec4f() { }
20 |     __device__ vec4f(float v) { x = v; y = v; z = v; w = v; }
21 |     __device__ vec4f(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; }
22 |     __device__ vec4f(float4 v) { x = v.x; y = v.y; z = v.z; w = v.w; }
23 | #endif
24 | };
25 | 
26 | 


--------------------------------------------------------------------------------
/render/renderutils/loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import torch
11 | 
12 | #----------------------------------------------------------------------------
13 | # HDR image losses
14 | #----------------------------------------------------------------------------
15 | 
16 | def _tonemap_srgb(f, exposure=5):
17 |     f = f * exposure
18 |     return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
19 | 
20 | def _SMAPE(img, target, eps=0.01):
21 |     nom = torch.abs(img - target)
22 |     denom = torch.abs(img) + torch.abs(target) + 0.01
23 |     return torch.mean(nom / denom)
24 | 
25 | def _RELMSE(img, target, eps=0.1):
26 |     nom = (img - target) * (img - target)
27 |     denom = img * img + target * target + 0.1 
28 |     return torch.mean(nom / denom)
29 | 
30 | def image_loss_fn(img, target, loss, tonemapper):
31 |     if tonemapper == 'log_srgb':
32 |         img    = _tonemap_srgb(torch.log(torch.clamp(img, min=0, max=65535) + 1))
33 |         target = _tonemap_srgb(torch.log(torch.clamp(target, min=0, max=65535) + 1))
34 | 
35 |     if loss == 'mse':
36 |         return torch.nn.functional.mse_loss(img, target)
37 |     elif loss == 'smape':
38 |         return _SMAPE(img, target)
39 |     elif loss == 'relmse':
40 |         return _RELMSE(img, target)
41 |     else:
42 |         return torch.nn.functional.l1_loss(img, target)
43 | 


--------------------------------------------------------------------------------
/render/renderutils/tests/test_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import torch
11 | 
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 | 
17 | RES = 8
18 | DTYPE = torch.float32
19 | 
20 | def tonemap_srgb(f):
21 |     return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
22 | 
23 | def l1(output, target):
24 |     x = torch.clamp(output, min=0, max=65535)
25 |     r = torch.clamp(target, min=0, max=65535)
26 |     x = tonemap_srgb(torch.log(x + 1))
27 |     r = tonemap_srgb(torch.log(r + 1))
28 |     return torch.nn.functional.l1_loss(x,r)
29 | 
30 | def relative_loss(name, ref, cuda):
31 | 	ref = ref.float()
32 | 	cuda = cuda.float()
33 | 	print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item())
34 | 
35 | def test_loss(loss, tonemapper):
36 | 	img_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
37 | 	img_ref = img_cuda.clone().detach().requires_grad_(True)
38 | 	target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
39 | 	target_ref = target_cuda.clone().detach().requires_grad_(True)
40 | 
41 | 	ref_loss = ru.image_loss(img_ref, target_ref, loss=loss, tonemapper=tonemapper, use_python=True)
42 | 	ref_loss.backward()
43 | 
44 | 	cuda_loss = ru.image_loss(img_cuda, target_cuda, loss=loss, tonemapper=tonemapper)
45 | 	cuda_loss.backward()
46 | 
47 | 	print("-------------------------------------------------------------")
48 | 	print("    Loss: %s, %s" % (loss, tonemapper))
49 | 	print("-------------------------------------------------------------")
50 | 
51 | 	relative_loss("res:", ref_loss, cuda_loss)
52 | 	relative_loss("img:", img_ref.grad, img_cuda.grad)
53 | 	relative_loss("target:", target_ref.grad, target_cuda.grad)
54 | 
55 | 
56 | test_loss('l1', 'none')
57 | test_loss('l1', 'log_srgb')
58 | test_loss('mse', 'log_srgb')
59 | test_loss('smape', 'none')
60 | test_loss('relmse', 'none')
61 | test_loss('mse', 'none')


--------------------------------------------------------------------------------
/render/renderutils/tests/test_mesh.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import torch
11 | 
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 | 
17 | BATCH = 8
18 | RES = 1024
19 | DTYPE = torch.float32
20 | 
21 | torch.manual_seed(0)
22 | 
23 | def tonemap_srgb(f):
24 |     return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
25 | 
26 | def l1(output, target):
27 |     x = torch.clamp(output, min=0, max=65535)
28 |     r = torch.clamp(target, min=0, max=65535)
29 |     x = tonemap_srgb(torch.log(x + 1))
30 |     r = tonemap_srgb(torch.log(r + 1))
31 |     return torch.nn.functional.l1_loss(x,r)
32 | 
33 | def relative_loss(name, ref, cuda):
34 | 	ref = ref.float()
35 | 	cuda = cuda.float()
36 | 	print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref)).item())
37 | 
38 | def test_xfm_points():
39 | 	points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
40 | 	points_ref = points_cuda.clone().detach().requires_grad_(True)
41 | 	mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False)
42 | 	mtx_ref = mtx_cuda.clone().detach().requires_grad_(True)
43 | 	target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True)
44 | 
45 | 	ref_out = ru.xfm_points(points_ref, mtx_ref, use_python=True)
46 | 	ref_loss = torch.nn.MSELoss()(ref_out, target)
47 | 	ref_loss.backward()
48 | 
49 | 	cuda_out = ru.xfm_points(points_cuda, mtx_cuda)
50 | 	cuda_loss = torch.nn.MSELoss()(cuda_out, target)
51 | 	cuda_loss.backward()
52 | 
53 | 	print("-------------------------------------------------------------")
54 | 
55 | 	relative_loss("res:", ref_out, cuda_out)
56 | 	relative_loss("points:", points_ref.grad, points_cuda.grad)
57 | 
58 | def test_xfm_vectors():
59 | 	points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
60 | 	points_ref = points_cuda.clone().detach().requires_grad_(True)
61 | 	points_cuda_p = points_cuda.clone().detach().requires_grad_(True)
62 | 	points_ref_p = points_cuda.clone().detach().requires_grad_(True)
63 | 	mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False)
64 | 	mtx_ref = mtx_cuda.clone().detach().requires_grad_(True)
65 | 	target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True)
66 | 
67 | 	ref_out = ru.xfm_vectors(points_ref.contiguous(), mtx_ref, use_python=True)
68 | 	ref_loss = torch.nn.MSELoss()(ref_out, target[..., 0:3])
69 | 	ref_loss.backward()
70 | 
71 | 	cuda_out = ru.xfm_vectors(points_cuda.contiguous(), mtx_cuda)
72 | 	cuda_loss = torch.nn.MSELoss()(cuda_out, target[..., 0:3])
73 | 	cuda_loss.backward()
74 | 
75 | 	ref_out_p = ru.xfm_points(points_ref_p.contiguous(), mtx_ref, use_python=True)
76 | 	ref_loss_p = torch.nn.MSELoss()(ref_out_p, target)
77 | 	ref_loss_p.backward()
78 | 	
79 | 	cuda_out_p = ru.xfm_points(points_cuda_p.contiguous(), mtx_cuda)
80 | 	cuda_loss_p = torch.nn.MSELoss()(cuda_out_p, target)
81 | 	cuda_loss_p.backward()
82 | 
83 | 	print("-------------------------------------------------------------")
84 | 
85 | 	relative_loss("res:", ref_out, cuda_out)
86 | 	relative_loss("points:", points_ref.grad, points_cuda.grad)
87 | 	relative_loss("points_p:", points_ref_p.grad, points_cuda_p.grad)
88 | 
89 | test_xfm_points()
90 | test_xfm_vectors()
91 | 


--------------------------------------------------------------------------------
/render/renderutils/tests/test_perf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 
 2 | #
 3 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 4 | # property and proprietary rights in and to this material, related
 5 | # documentation and any modifications thereto. Any use, reproduction, 
 6 | # disclosure or distribution of this material and related documentation 
 7 | # without an express license agreement from NVIDIA CORPORATION or 
 8 | # its affiliates is strictly prohibited.
 9 | 
10 | import torch
11 | 
12 | import os
13 | import sys
14 | sys.path.insert(0, os.path.join(sys.path[0], '../..'))
15 | import renderutils as ru
16 | 
17 | DTYPE=torch.float32
18 | 
19 | def test_bsdf(BATCH, RES, ITR):
20 | 	kd_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
21 | 	kd_ref = kd_cuda.clone().detach().requires_grad_(True)
22 | 	arm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
23 | 	arm_ref = arm_cuda.clone().detach().requires_grad_(True)
24 | 	pos_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
25 | 	pos_ref = pos_cuda.clone().detach().requires_grad_(True)
26 | 	nrm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
27 | 	nrm_ref = nrm_cuda.clone().detach().requires_grad_(True)
28 | 	view_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
29 | 	view_ref = view_cuda.clone().detach().requires_grad_(True)
30 | 	light_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
31 | 	light_ref = light_cuda.clone().detach().requires_grad_(True)
32 | 	target = torch.rand(BATCH, RES, RES, 3, device='cuda')
33 | 
34 | 	start = torch.cuda.Event(enable_timing=True)
35 | 	end = torch.cuda.Event(enable_timing=True)
36 | 
37 | 	ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
38 | 
39 | 	print("--- Testing: [%d, %d, %d] ---" % (BATCH, RES, RES))
40 | 
41 | 	start.record()
42 | 	for i in range(ITR):
43 | 		ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True)
44 | 	end.record()
45 | 	torch.cuda.synchronize()
46 | 	print("Pbr BSDF python:", start.elapsed_time(end))
47 | 
48 | 	start.record()
49 | 	for i in range(ITR):
50 | 		cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
51 | 	end.record()
52 | 	torch.cuda.synchronize()
53 | 	print("Pbr BSDF cuda:", start.elapsed_time(end))
54 | 
55 | test_bsdf(1, 512, 1000)
56 | test_bsdf(16, 512, 1000)
57 | test_bsdf(1, 2048, 1000)
58 | 


--------------------------------------------------------------------------------